[med-svn] [r-hms-dbmi-spp] 01/01: New upstream version 1.14
Michael Crusoe
misterc-guest at moszumanska.debian.org
Mon Oct 17 08:33:56 UTC 2016
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to annotated tag upstream/1.14
in repository r-hms-dbmi-spp.
commit d6cef67b78711a975fa7a8eff0ca17f18173f4a6
Author: Michael R. Crusoe <michael.crusoe at gmail.com>
Date: Sat Oct 15 04:55:09 2016 -0700
New upstream version 1.14
---
DESCRIPTION | 13 +
NAMESPACE | 3 +
R/zroutines.R | 2627 +++++++++++++
README.md | 22 +
aclocal.m4 | 1560 ++++++++
configure | 4568 +++++++++++++++++++++++
configure.ac | 9 +
man/add.broad.peak.regions.Rd | 27 +
man/find.binding.positions.Rd | 128 +
man/get.binding.characteristics.Rd | 55 +
man/get.broad.enrichment.clusters.Rd | 27 +
man/get.conservative.fold.enrichment.profile.Rd | 59 +
man/get.mser.Rd | 46 +
man/get.mser.interpolation.Rd | 56 +
man/get.smoothed.enrichment.mle.Rd | 35 +
man/get.smoothed.tag.density.Rd | 45 +
man/output.binding.results.Rd | 24 +
man/read.bam.tags.Rd | 24 +
man/read.bin.maqmap.tags.Rd | 23 +
man/read.bowtie.tags.Rd | 23 +
man/read.eland.tags.Rd | 30 +
man/read.maqmap.tags.Rd | 23 +
man/read.meland.tags.Rd | 29 +
man/remove.local.tag.anomalies.Rd | 50 +
man/select.informative.tags.Rd | 29 +
man/spp-package.Rd | 144 +
man/write.broadpeak.info.Rd | 16 +
man/write.narrowpeak.binding.Rd | 25 +
man/writewig.Rd | 31 +
src/Makevars.in | 4 +
src/api_global.h | 22 +
src/bed2vector.cpp | 2705 ++++++++++++++
src/cdensum.c | 185 +
src/const.h | 18 +
src/maqmap.c | 164 +
src/maqmap.h | 70 +
src/maqread.cpp | 207 +
src/pc.h | 17 +
src/peaks.cpp | 806 ++++
src/wdl.cpp | 659 ++++
40 files changed, 14608 insertions(+)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..46e9a94
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,13 @@
+Package: spp
+Type: Package
+Title: some description
+Version: 1.14
+Date: 2016-06-02
+Author: Peter K
+Depends: caTools
+Imports: Rsamtools
+Maintainer: peterK<peter.kharchenko at post.harvard.edu>
+Description: Describe the package
+License: GPL-2
+LazyLoad: yes
+Packaged: Wed Nov 12 10:42:54 2008; vidhuch
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..caf30e6
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,3 @@
+useDynLib(spp)
+
+exportPattern("^[^\\.]")
diff --git a/R/zroutines.R b/R/zroutines.R
new file mode 100644
index 0000000..108ab79
--- /dev/null
+++ b/R/zroutines.R
@@ -0,0 +1,2627 @@
+#library(caTools)
+#dyn.load("src/bed2vector.so");
+#dyn.load("src/wdl.so");
+#dyn.load("src/peaks.so");
+#dyn.load("src/cdensum.so");
+
+
+# -------- ROUTINES FOR READING IN THE DATA FILES ------------
+# fix.chromosome.names : remove ".fa" suffix from match sequence names
+read.eland.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T,max.eland.tag.length=-1,extended=F,multi=F) {
+ if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+ storage.mode(max.eland.tag.length) <- "integer";
+ callfunction <- "read_eland";
+ if(extended) { callfunction <- "read_eland_extended"; };
+ if(multi) { callfunction <- "read_eland_multi"; };
+ tl <- lapply(.Call(callfunction,path.expand(filename),rtn,max.eland.tag.length),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ if(read.tag.names) {
+ d$s <- d$s[xo];
+ }
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ if(read.tag.names) {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+ } else {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+ }
+}
+
+read.tagalign.tags <- function(filename,fix.chromosome.names=T,fix.quality=T) {
+ tl <- lapply(.Call("read_tagalign",path.expand(filename)),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ if(fix.quality) {
+ if (min(d$n)<0.5){
+ d$n = ceiling(1000/4^d$n);
+ }
+ break.vals <- unique(sort(c(0,unique(d$n))));
+ d$n <- length(break.vals)-1-cut(d$n,breaks=break.vals,labels=F);
+ }
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+}
+
+
+read.short.arachne.tags <- function(filename,fix.chromosome.names=F) {
+ tl <- lapply(.Call("read_arachne",path.expand(filename)),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+}
+
+
+read.arachne.tags <- function(filename,fix.chromosome.names=F) {
+ tl <- lapply(.Call("read_arachne_long",path.expand(filename)),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ d$l <- d$l[xo];
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l)));
+}
+
+read.bowtie.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F) {
+ if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+ tl <- lapply(.Call("read_bowtie",path.expand(filename),rtn),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ if(read.tag.names) {
+ d$s <- d$s[xo];
+ }
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ if(read.tag.names) {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+ } else {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+ }
+}
+
+read.bam.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F) {
+ #require(Rsamtools)
+ if(!is.element("fastcluster", installed.packages()[, 1])) {
+ stop("Rsamtools Bioconductor package is now required for BAM file support. Please install")
+ }
+
+ ww <- c("flag","rname","pos","isize","strand","mapq","qwidth"); if(read.tag.names) { ww <- c(ww,"qname") };
+ bam <- Rsamtools::scanBam(filename,param=Rsamtools::ScanBamParam(what=ww,flag=Rsamtools::scanBamFlag(isUnmappedQuery=FALSE)))[[1]];
+ strm <- as.integer(bam$strand=="+")
+ if(any(bitwAnd(bam$flag,0x1))) { # paired-end data
+ # use only positive strand mappings
+ posvi <- which(strm==1);
+ rl <- list(tags=tapply(posvi,bam$rname[posvi],function(ii) as.numeric(bam$pos[ii])),
+ flen=tapply(posvi,bam$rname[posvi],function(ii) as.numeric(abs(bam$isize[ii]))))
+ # alternatively, handle reads with NA isize (unpaired?) just like single-ended reads
+ #pos <- tapply(1:length(bam$pos),bam$rname,function(ii) ifelse(is.na(bam$isize[ii]), bam$pos[ii]*strm[ii] - (1-strm[ii])*(bam$pos[ii]+bam$qwidth[ii]), strm[ii]*bam$pos[ii] - (1-strm[ii])*(bam$pos[ii]+bam$isize[ii])))
+ } else {
+ rl <- list(tags=tapply(1:length(bam$pos),bam$rname,function(ii) bam$pos[ii]*strm[ii] - (1-strm[ii])*(bam$pos[ii]+bam$qwidth[ii])))
+ }
+
+ rl <- c(rl,list(quality=tapply(1:length(bam$pos),bam$rname,function(ii) bam$mapq[ii])))
+ if(read.tag.names) {
+ rl <- c(rl,list(names=tapply(1:length(bam$pos),bam$rname,function(ii) bam$qname[ii])))
+ }
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(rl) <- gsub("\\.fa","",names(rl))
+ }
+ return(rl)
+}
+
+
+read.helicos.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F,include.length.info=T) {
+ if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+ tl <- lapply(.Call("read_helicostabf",path.expand(filename),rtn),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ d$l <- d$l[xo];
+ if(read.tag.names) {
+ d$s <- d$s[xo];
+ }
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ if(read.tag.names) {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l),names=lapply(tl,function(d) d$s)));
+ } else {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l)));
+ }
+}
+
+read.maqmap.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) {
+ if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+ tl <- lapply(.Call("read_maqmap",path.expand(filename),rtn),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ if(read.tag.names) {
+ d$s <- d$s[xo];
+ }
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ if(read.tag.names) {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+ } else {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+ }
+}
+
+
+read.bin.maqmap.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) {
+ if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+ tl <- lapply(.Call("read_binmaqmap",path.expand(filename),rtn),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ if(read.tag.names) {
+ d$s <- d$s[xo];
+ }
+ return(d);
+ });
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ if(read.tag.names) {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+ } else {
+ return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+ }
+}
+
+
+# read in tags from an extended eland format with match length information
+read.meland.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) {
+ if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+ tl <- lapply(.Call("read_meland",path.expand(filename),rtn),function(d) {
+ xo <- order(abs(d$t));
+ d$t <- d$t[xo];
+ d$n <- d$n[xo];
+ d$l <- d$l[xo];
+ if(read.tag.names) {
+ d$s <- d$s[xo];
+ }
+ return(d);
+ });
+
+ if(fix.chromosome.names) {
+ # remove ".fa"
+ names(tl) <- gsub("\\.fa","",names(tl))
+ }
+ # separate tags and quality
+ chrl <- names(tl); names(chrl) <- chrl;
+ # reformulate quality scores into monotonic integers
+ ml <- max(unlist(lapply(tl,function(d) max(d$l))));
+ qual <- lapply(chrl,function(chr) (ml-tl[[chr]]$l)+tl[[chr]]$n/10);
+ if(read.tag.names) {
+ return(list(tags=lapply(tl,function(d) d$t),quality=qual,names=lapply(tl,function(d) d$s)));
+ } else {
+ return(list(tags=lapply(tl,function(d) d$t),quality=qual));
+ }
+}
+
+# -------- ROUTINES FOR ASSESSING BINDING PATTERN AND SELECTING INFORMATIVE TAGS ------------
+
+# removes tag positions that have anomalously high counts on both strands
+# z - z-score used to determine anomalous bins
+# zo - z used to filter out one-strand matches
+# trim.fraction - fraction of top bins to discard when calculating overall background density
+# var.base - minimal base variability of tag counts (for processing of flattened datasets with close to 0 variance)
+remove.tag.anomalies <- function(data, bin=1,trim.fraction=1e-3,z=5,zo=3*z,var.base=0.1) {
+
+ t.remove.tag.anomalies <- function(tv,bin=1,trim.fraction=1e-3,z=5,zo=3*z,return.indecies=F) {
+ tt <- table(floor(tv/bin));
+
+ # trim value
+ stt <- sort(as.numeric(tt));
+ stt <- stt[1:(length(stt)*(1-trim.fraction))];
+ mtc <- mean(stt); tcd <- sqrt(var(stt)+var.base);
+
+ thr <- max(1,ceiling(mtc+z*tcd));
+ thr.o <- max(1,ceiling(mtc+zo*tcd));
+ # filter tt
+ tt <- tt[tt>thr]
+ # get + and - tags
+ tp <- as.numeric(names(tt));
+ pti <- tp>0;
+ it <- intersect(tp[pti],(-1)*tp[!pti]);
+ # add one-strand matches
+ it <- unique(c(it,tp[tt>thr.o]));
+ sit <- c(it,(-1)*it);
+
+ if(bin>1) {
+ sit <- sit*bin;
+ sit <- c(sit,unlist(lapply(1:bin,function(i) sit+i)))
+ }
+ if(return.indecies) {
+ return(!tv %in% sit);
+ } else {
+ return(tv[!tv %in% sit]);
+ }
+ }
+
+ vil <- lapply(data$tags,t.remove.tag.anomalies,return.indecies=T,bin=bin,trim.fraction=trim.fraction,z=z,zo=zo);
+ chrl <- names(data$tags); names(chrl) <- chrl;
+ data$tags <- lapply(chrl,function(chr) data$tags[[chr]][vil[[chr]]]);
+ # count tags to remove empty chromosomes
+ nt <- unlist(lapply(data$tags,length));
+ if(any(nt==0)) {
+ data$tags <- data$tags[nt!=0]
+ }
+
+ if(!is.null(data$quality)) {
+ data$quality <- lapply(chrl,function(chr) data$quality[[chr]][vil[[chr]]]);
+ data$quality <- data$quality[nt!=0];
+ }
+ if(!is.null(data$names)) {
+ data$names <- lapply(chrl,function(chr) data$names[[chr]][vil[[chr]]]);
+ data$names <- data$names[nt!=0];
+ }
+
+ return(data);
+}
+
+# caps or removes tag positions that are significantly higher than local background
+remove.local.tag.anomalies <- function(tags,window.size=200,eliminate.fold=10,cap.fold=4,z.threshold=3) {
+ lapply(tags,filter.singular.positions.by.local.density,window.size=2e2,eliminate.fold=10,cap.fold=4,z.threshold=3);
+}
+
+
+
+# assess strand cross-correlation, determine peak position, determine appropriate window size
+# for binding detection.
+get.binding.characteristics <- function(data,srange=c(50,500),bin=5,cluster=NULL,debug=F,min.tag.count=1e3,acceptance.z.score=3,remove.tag.anomalies=T,anomalies.z=5,accept.all.tags=F) {
+ if(remove.tag.anomalies) {
+ data <- remove.tag.anomalies(data,z=anomalies.z);
+ }
+
+ # take highest quality tag bin
+ if(!is.null(data$quality) && !accept.all.tags) {
+ min.bin <- min(unlist(lapply(data$quality,min)))
+ chrl <- names(data$tags); names(chrl) <- chrl;
+ otl <- lapply(chrl,function(chr) data$tags[[chr]][data$quality[[chr]]==min.bin]);
+ } else {
+ otl <- data$tags;
+ }
+ # remove empty chromosomes
+ otl <- otl[unlist(lapply(otl,length))!=0];
+
+
+ # calculate strand scc
+ if(!is.null(cluster)) {
+ cc <- clusterApplyLB(cluster,otl,tag.scc,srange=srange,bin=bin);
+ names(cc) <- names(otl);
+ } else {
+ cc <- lapply(otl,tag.scc,srange=srange,bin=bin);
+ }
+ ccl<-list(sample=cc);
+ ccl.av <- lapply(names(ccl),t.plotavcc,type='l',ccl=ccl,return.ac=T,ttl=list(sample=otl),plot=F)[[1]]
+ ccl.av <- data.frame(x=as.numeric(names(ccl.av)),y=as.numeric(ccl.av));
+
+ # find peak
+ pi <- which.max(ccl.av$y);
+
+ # determine width at third-height
+ th <- (ccl.av$y[pi]-ccl.av$y[length(ccl.av$y)])/3+ccl.av$y[length(ccl.av$y)]
+ whs <- max(ccl.av$x[ccl.av$y>=th]);
+
+ if (! is.integer(whs)) { # Anshul: added this to avoid situations where whs ends up being -Inf
+ whs <- ccl.av$x[ min(c(2*pi,length(ccl.av$y))) ]
+ }
+
+ # determine acceptance of different quality bins
+
+ # calculates tag scc for the best tags, and combinations of best tag category with every other category
+ # for subsequent selection of acceptable categories
+ scc.acceptance.calc <- function() {
+
+ qr <- range(unlist(lapply(data$quality,range)))
+
+ # start with best tags
+
+ # determine half-width for scc calculations
+ pi <- which.max(ccl.av$y);
+
+ # determine width at half-height
+ th <- (ccl.av$y[pi]-ccl.av$y[length(ccl.av$y)])/2+ccl.av$y[length(ccl.av$y)]
+ lwhs <- max(ccl.av$x[ccl.av$y>=th])-ccl.av$x[pi];
+ lwhs <- max(c(20,bin*10,lwhs));
+ srange <- ccl.av$x[pi]+c(-lwhs,lwhs)
+
+ # calculate chromosome-average scc
+ t.scc <- function(tags) {
+ if(is.null(cluster)) {
+ cc <- lapply(tags,tag.scc,srange=srange,bin=bin);
+ } else {
+ cc <- clusterApplyLB(cluster,tags,tag.scc,srange=srange,bin=bin); names(cc) <- names(tags);
+ }
+ return(t.plotavcc(1,type='l',ccl=list(cc),ttl=list(tags),plot=F,return.ac=T))
+ }
+
+
+ # returns info list for a given tag length (lv), mismatch count (nv)
+ t.cat <- function(qual) {
+ # construct tag set
+ if(qual==qr[1]) {
+ ts <- otl;
+ } else {
+ nts <- names(otl); names(nts) <- nts;
+ # select tags
+ at <- lapply(nts,function(chr) data$tags[[chr]][data$quality[[chr]]==qual]);
+ ntags <- sum(unlist(lapply(at,length)));
+ if(ntags<min.tag.count) { return(NULL); }
+
+ # append to otl
+ ts <- lapply(nts,function(nam) c(otl[[nam]],at[[nam]]));
+ }
+
+ return(t.scc(ts));
+ }
+
+
+ # calculate cross-correlation values for each quality bin
+ ql <- sort(unique(unlist(lapply(data$quality,unique)))); names(ql) <- ql;
+
+ qccl <- lapply(ql,t.cat);
+
+ # acceptance tests
+ ac <- c(T,unlist(lapply(qccl[-1],function(d) if(is.null(d)) { return(F) } else { t.test(d-qccl[[as.character(min.bin)]],alternative="greater")$p.value<pnorm(acceptance.z.score,lower.tail=F) }))); names(ac) <- names(qccl);
+ return(list(informative.bins=ac,quality.cc=qccl))
+ }
+
+ if(accept.all.tags | is.null(data$quality)) {
+ return(list(cross.correlation=ccl.av,peak=list(x=ccl.av$x[pi],y=ccl.av$y[pi]),whs=whs))
+ } else {
+ acc <- scc.acceptance.calc();
+ return(list(cross.correlation=ccl.av,peak=list(x=ccl.av$x[pi],y=ccl.av$y[pi]),whs=whs,quality.bin.acceptance=acc));
+ }
+
+}
+
+
+# select a set of informative tags based on the pre-calculated binding characteristics
+select.informative.tags <- function(data,binding.characteristics=NULL) {
+ if(is.null(binding.characteristics)) {
+ return(data$tags);
+ }
+ if(is.null(binding.characteristics$quality.bin.acceptance)) {
+ cat("binding characteristics doesn't contain quality selection info, accepting all tags\n");
+ return(data$tags);
+ }
+
+ ib <- binding.characteristics$quality.bin.acceptance$informative.bins;
+ abn <- names(ib)[ib]
+
+ chrl <- names(data$tags); names(chrl) <- chrl;
+ lapply(chrl,function(chr) {
+ data$tags[[chr]][as.character(data$quality[[chr]]) %in% abn]
+ })
+}
+
+# -------- ROUTINES FOR CALLING BINDING POSITIONS ------------
+
+# determine binding positions
+# signal.data - IP tag lists
+# control.data - input tag lists
+# e.value - desired E-value threshold (either E-value or FDR threshold must be provided)
+# fdr - desired FDR threshold
+# min.dist - minimal distance between detected positions
+# tag.count.whs - size of the window to be used to estimate confidence interval of the peak fold enrichment ratios
+# enrichmnent.z - Z-score defining the desired confidence level for enrichment interval estimates
+# enrichment.background.scales - define how many tiems larger should be the window for estimating background
+# tag density when evaluating peak enrichment confidence intervals.
+# If multiple values are given, multiple independent interval estimates will be
+# calculated.
+# tec.filter - whether to mask out the regions that exhibit significant background enrichment
+# tec.window.size, tec.z - window size and Z-score for maksing out significant background enrichment regions
+#
+# If the control.data is not provided, the method will assess significance of the determined binding positions
+# based on the randomizations of the original data. The following paramters control such randomizations:
+# n.randomizations - number of randomizations to be performed
+# shuffle.window - size of the bin that defines the tags that are kept together during randomization.
+# value of 0 means that all tags are shuffled independently
+#
+# Binding detection methods:
+# tag.wtd - default method.
+# must specify parameter "whs", which is the half-size of the window used to calculate binding scores
+# tag.lwcc - LWCC method;
+# must specify whs - a size of the window used to calculate binding scores
+# can specify isize (default=15bp) - size of the internal window that is masked out
+find.binding.positions <- function(signal.data,f=1,e.value=NULL,fdr=NULL, masked.data=NULL,control.data=NULL,whs=200,min.dist=200,window.size=4e7,cluster=NULL,debug=T,n.randomizations=3,shuffle.window=1,min.thr=2,topN=NULL, tag.count.whs=100, enrichment.z=2, method=tag.wtd, tec.filter=T,tec.window.size=1e4,tec.z=5,tec.masking.window.size=tec.window.size, tec.poisson.z=5,tec.poisson.ratio=5, tec=NULL, n.control.samples=1, enrichment.scale.down.control=F, enrichment.background.scales=c(1,5 [...]
+
+ if(f<1) {
+ if(debug) { cat("subsampling signal ... "); }
+ signal.data <- lapply(signal.data,function(x) sample(x,length(x)*f))
+ if(debug) { cat("done\n"); }
+ }
+
+
+ if(!is.null(control.data) && !use.randomized.controls) {
+ # limit both control and signal data to a common set of chromosomes
+ chrl <- intersect(names(signal.data),names(control.data));
+ signal.data <- signal.data[chrl];
+ control.data <- control.data[chrl];
+ control <- list(control.data);
+ } else {
+ control <- NULL;
+ }
+
+ prd <- lwcc.prediction(signal.data,min.dist=min.dist,whs=whs,window.size=window.size,e.value=e.value,fdr=fdr,debug=debug,n.randomizations=n.randomizations,shuffle.window=shuffle.window,min.thr=min.thr,cluster=cluster,method=method,bg.tl=control.data,mask.tl=masked.data, topN=topN, control=control,tec.filter=tec.filter,tec.z=tec.z,tec.window.size=tec.window.size, tec.masking.window.size=tec.masking.window.size, tec.poisson.z=tec.poisson.z,tec.poisson.ratio=tec.poisson.ratio, background. [...]
+
+ # add tag counts
+ chrl <- names(prd$npl); names(chrl) <- chrl;
+ prd$npl <- lapply(chrl,function(chr) {
+ pd <- prd$npl[[chr]];
+ pd$nt <- points.within(abs(signal.data[[chr]]),pd$x-tag.count.whs,pd$x+tag.count.whs,return.point.counts=T);
+ return(pd);
+ });
+ prd$f <- f;
+ prd$n <- sum(unlist(lapply(signal.data,length)));
+ if(!is.null(control.data)) {
+ prd$n.bg <- sum(unlist(lapply(control.data,length)));
+ }
+
+ # calculate enrichment ratios
+ prd <- calculate.enrichment.estimates(prd,signal.data,control.data=control.data,fraction=1,tag.count.whs=tag.count.whs,z=enrichment.z,scale.down.control=enrichment.scale.down.control,background.scales=enrichment.background.scales);
+
+ if(mle.filter) {
+ if(!is.null(prd$npl)) {
+ if(length(prd$npl)>1) {
+ mle.columns <- grep("enr.mle",colnames(prd$npl[[1]]));
+ if(length(mle.columns)>1) {
+ prd$npl <- lapply(prd$npl,function(d) d[apply(d[,mle.columns],1,function(x) all(x>min.mle.threshold)),])
+ }
+ }
+ }
+ }
+
+ prd$whs <- whs;
+
+ return(prd);
+}
+
+
+
+# -------- ROUTINES FOR WRITING OUT TAG DENSITY AND ENRICHMENT PROFILES ------------
+# calculate smoothed tag density, optionally subtracting the background
+get.smoothed.tag.density <- function(signal.tags,control.tags=NULL,bandwidth=150,bg.weight=NULL,tag.shift=146/2,step=round(bandwidth/3),background.density.scaling=T,rngl=NULL,scale.by.dataset.size=F) {
+ chrl <- names(signal.tags);
+ if(!is.null(rngl)) { chrl <- names(rngl); }
+ names(chrl) <- chrl;
+
+
+ if(!is.null(control.tags) && is.null(bg.weight)) {
+ bg.weight <- dataset.density.ratio(signal.tags,control.tags,background.density.scaling=background.density.scaling);
+ }
+
+ if(scale.by.dataset.size) {
+ den.scaling <- 1/(dataset.density.size(signal.tags,background.density.scaling=background.density.scaling)/1e6);
+ } else {
+ den.scaling <- 1;
+ }
+
+ lapply(chrl,function(chr) {
+ ad <- abs(signal.tags[[chr]]+tag.shift);
+ rng <- NULL;
+ if(!is.null(rngl)) {
+ rng <- rngl[[chr]];
+ }
+ if(is.null(rng)) {
+ rng <- range(ad);
+ }
+
+ ds <- densum(ad,bw=bandwidth,from=rng[1],to=rng[2],return.x=T,step=step);
+ if(!is.null(control.tags)) {
+ if(!is.null(control.tags[[chr]])) {
+ bsd <- densum(abs(control.tags[[chr]]+tag.shift),bw=bandwidth,from=rng[1],to=rng[2],return.x=F,step=step);
+ ds$y <- ds$y-bsd*bg.weight;
+ }
+ }
+ return(data.frame(x=seq(ds$x[1],ds$x[2],by=step),y=den.scaling*ds$y))
+ })
+}
+
+# get smoothed maximum likelihood estimate of the log2 signal to control enrichment ratio
+get.smoothed.enrichment.mle <- function(signal.tags, control.tags, tag.shift=146/2, background.density.scaling=F, pseudocount=1,bg.weight=NULL, rngl=NULL, chrl=NULL, ... ) {
+ # determine common range
+ if(is.null(chrl)) {
+ chrl <- intersect(names(signal.tags),names(control.tags)); names(chrl) <- chrl;
+ }
+ if(is.null(rngl)) {
+ rngl <- lapply(chrl,function(chr) range(c(range(abs(signal.tags[[chr]]+tag.shift)),range(abs(control.tags[[chr]]+tag.shift)))))
+ } else {
+ chrl <- names(rngl); names(chrl) <- chrl;
+ }
+ ssd <- get.smoothed.tag.density(signal.tags, rngl=rngl, ..., scale.by.dataset.size=F)
+ csd <- get.smoothed.tag.density(control.tags, rngl=rngl, ..., scale.by.dataset.size=F)
+ if(is.null(bg.weight)) {
+ bg.weight <- dataset.density.ratio(signal.tags,control.tags,background.density.scaling=background.density.scaling);
+ }
+ cmle <- lapply(chrl,function(chr) { d <- ssd[[chr]]; d$y <- log2(d$y+pseudocount*bg.weight) - log2(csd[[chr]]$y+pseudocount) - log2(bg.weight); return(d); })
+}
+
+# same as get.smoothed.enrichment.mle, but correcting for the backgroudnd (input) for each of the experiment
+# returns the ratio of enrichment1 to enrichment2
+# params:
+# signal.tags1,control.tags1 - per-chromosome tag lists for IP and input in experiment 1
+# signal.tags2,control.tags2 - per-chromosome tag lists for IP and input in experiment 2
+# bg.weight1,bg.weight2 - optional explicit signal/control (IP/input) normalization factors for experiments 1 and 2
+get.smoothed.enrichment.mle2 <- function(signal.tags1, control.tags1, signal.tags2,control.tags2, tag.shift=146/2, background.density.scaling=F, pseudocount=1,bg.weight1=NULL,bg.weight2=NULL, rngl=NULL, chrl=NULL, ... ) {
+ # determine common range
+ if(is.null(chrl)) {
+ chrl <- intersect(names(signal.tags1),names(signal.tags2)); names(chrl) <- chrl;
+ }
+ if(is.null(rngl)) {
+ rngl <- lapply(chrl,function(chr) range(c(range(abs(signal.tags1[[chr]]+tag.shift)),range(abs(signal.tags2[[chr]]+tag.shift)))))
+ } else {
+ chrl <- names(rngl); names(chrl) <- chrl;
+ }
+ ssd1 <- get.smoothed.tag.density(signal.tags1, rngl=rngl, ..., scale.by.dataset.size=F)
+ ssd2 <- get.smoothed.tag.density(signal.tags2, rngl=rngl, ..., scale.by.dataset.size=F)
+ csd1 <- get.smoothed.tag.density(control.tags1, rngl=rngl, ..., scale.by.dataset.size=F)
+ csd2 <- get.smoothed.tag.density(control.tags2, rngl=rngl, ..., scale.by.dataset.size=F)
+ if(is.null(bg.weight1)) {
+ bg.weight1 <- dataset.density.ratio(signal.tags1,control.tags1,background.density.scaling=background.density.scaling);
+ }
+ if(is.null(bg.weight2)) {
+ bg.weight2 <- dataset.density.ratio(signal.tags2,control.tags2,background.density.scaling=background.density.scaling);
+ }
+ cmle <- lapply(chrl,function(chr) { d <- ssd1[[chr]]; d$y <- log2(ssd1[[chr]]$y+pseudocount*bg.weight1) - log2(csd1[[chr]]$y+pseudocount) - log2(bg.weight1) - log2(ssd2[[chr]]$y+pseudocount*bg.weight2) + log2(csd2[[chr]]$y+pseudocount) + log2(bg.weight2); return(d); })
+}
+
+
+# returns a conservative upper/lower bound profile (log2) given signal tag list, background tag list and window scales
+get.conservative.fold.enrichment.profile <- function(ftl,btl,fws,bwsl=c(1,5,25,50)*fws,step=50,tag.shift=146/2,alpha=0.05,use.most.informative.scale=F,quick.calculation=T,background.density.scaling=T,bg.weight=NULL,posl=NULL,return.mle=F) {
+ # include only chromosomes with more than 2 reads
+ ftl <- ftl[unlist(lapply(ftl,length))>2]
+ chrl <- names(ftl); names(chrl) <- chrl;
+ if(!is.null(posl)) {
+ chrl <- chrl[chrl %in% names(posl)];
+ }
+ # calculate background tag ratio
+ if(is.null(bg.weight)) {
+ bg.weight <- dataset.density.ratio(ftl,btl,background.density.scaling=background.density.scaling);
+ }
+ lapply(chrl,function(chr) {
+ if(is.null(btl[[chr]])) { bt <- c(); } else { bt <- abs(btl[[chr]]+tag.shift); }
+ if(is.null(posl)) {
+ x <- mbs.enrichment.bounds(abs(ftl[[chr]]+tag.shift),bt,fws=fws,bwsl=bwsl,step=step,calculate.upper.bound=T,bg.weight=bg.weight,use.most.informative.scale=use.most.informative.scale,quick.calculation=quick.calculation,alpha=alpha);
+ } else {
+ x <- mbs.enrichment.bounds(abs(ftl[[chr]]+tag.shift),bt,fws=fws,bwsl=bwsl,step=step,calculate.upper.bound=T,bg.weight=bg.weight,use.most.informative.scale=use.most.informative.scale,quick.calculation=quick.calculation,alpha=alpha,pos=posl[[chr]]);
+ }
+ # compose profile showing lower bound for enriched, upper bound for depleted regions
+ ps <- rep(1,length(x$mle));
+ vi <- which(!is.na(x$lb) & x$lb>1);
+ ps[vi] <- x$lb[vi];
+ vi <- which(!is.na(x$ub) & x$ub<1);
+ ps[vi] <- x$ub[vi];
+ ps <- log2(ps);
+ if(is.null(posl)) {
+ if(return.mle) {
+ return(data.frame(x=seq(x$x$s,x$x$e,by=x$x$step),y=ps,mle=log2(x$mle),lb=log2(x$lb),ub=log2(x$ub)));
+ } else {
+ return(data.frame(x=seq(x$x$s,x$x$e,by=x$x$step),y=ps));
+ }
+ } else {
+ if(return.mle) {
+ return(data.frame(x=posl[[chr]],y=ps,mle=log2(x$mle),lb=log2(x$lb),ub=log2(x$ub)));
+ } else {
+ return(data.frame(x=posl[[chr]],y=ps));
+ }
+ }
+ })
+}
+
+
+# same as above, controlling for input, and supporting only a single background scale
+get.conservative.fold.enrichment.profile2 <- function(ftl1,ftl2,btl1,btl2,fws,bws=1*fws,step=50,tag.shift=146/2,alpha=0.05,background.density.scaling=T,bg.weight1=NULL,bg.weight2=NULL,posl=NULL,return.mle=F) {
+ # include only chromosomes with more than 2 reads
+ ftl1 <- ftl1[unlist(lapply(ftl1,length))>2]
+ chrl <- names(ftl1); names(chrl) <- chrl;
+ if(!is.null(posl)) {
+ chrl <- chrl[chrl %in% names(posl)];
+ }
+ # calculate background tag ratio
+ if(is.null(bg.weight1)) {
+ bg.weight1 <- dataset.density.ratio(ftl1,btl1,background.density.scaling=background.density.scaling);
+ }
+ if(is.null(bg.weight2)) {
+ bg.weight2 <- dataset.density.ratio(ftl2,btl2,background.density.scaling=background.density.scaling);
+ }
+ lapply(chrl,function(chr) {
+ x <- binomial.proportion.ratio.bounds(abs(ftl1[[chr]]+tag.shift),abs(btl1[[chr]]+tag.shift),abs(ftl2[[chr]]+tag.shift),abs(btl2[[chr]]+tag.shift),fws=fws,bws=bws,step=step,bg.weight1=bg.weight1,bg.weight2=bg.weight2,alpha=alpha,pos=if(is.null(posl)) { NULL; } else { posl[[chr]] });
+
+ # compose profile showing lower bound for enriched, upper bound for depleted regions
+ ps <- rep(0,length(x$mle));
+ vi <- which(!is.na(x$lb) & x$lb>0);
+ ps[vi] <- x$lb[vi];
+ vi <- which(!is.na(x$ub) & x$ub<0);
+ ps[vi] <- x$ub[vi];
+
+ if(is.null(posl)) {
+ if(return.mle) {
+ return(data.frame(x=x$x,y=ps,mle=x$mle,lb=x$lb,ub=x$ub));
+ } else {
+ return(data.frame(x=x$x,y=ps));
+ }
+ } else {
+ if(return.mle) {
+ return(data.frame(x=posl[[chr]],y=ps,mle=x$mle,lb=x$lb,ub=x$ub));
+ } else {
+ return(data.frame(x=posl[[chr]],y=ps));
+ }
+ }
+ })
+}
+
+
+# write a per-chromosome $x/$y data structure into a wig file
+writewig <- function(dat,fname,feature,threshold=5,zip=F) {
+ chrl <- names(dat); names(chrl) <- chrl;
+ invisible(lapply(chrl,function(chr) {
+ bdiff <- dat[[chr]];
+ ind <- seq(1,length(bdiff$x));
+ ind <- ind[!is.na(bdiff$y[ind])];
+ header <- chr==chrl[1];
+ write.probe.wig(chr,bdiff$x[ind],bdiff$y[ind],fname,append=!header,feature=feature,header=header);
+ }))
+ if(zip) {
+ zf <- paste(fname,"zip",sep=".");
+ system(paste("zip \"",zf,"\" \"",fname,"\"",sep=""));
+ system(paste("rm \"",fname,"\"",sep=""));
+ return(zf);
+ } else {
+ return(fname);
+ }
+}
+
+
+
+# -------- ROUTINES FOR ANALYZING SATURATION PROPERTIES ------------
+
+# PUBLIC
+# calculate minimal saturation enrichment ratios (MSER)
+get.mser <- function(signal.data,control.data,n.chains=5,step.size=1e5, chains=NULL, cluster=NULL, test.agreement=0.99, return.chains=F, enrichment.background.scales=c(1), n.steps=1, ...) {
+ if(is.null(chains)) {
+ ci <- c(1:n.chains); names(ci) <- ci;
+ if(is.null(cluster)) {
+ chains <- lapply(ci,get.subsample.chain.calls,signal.data=signal.data,control.data=control.data,n.steps=n.steps,step.size=step.size,subsample.control=F, enrichment.background.scales=enrichment.background.scales, ...);
+ } else {
+ chains <- clusterApplyLB(cluster,ci,get.subsample.chain.calls,signal.data=signal.data,control.data=control.data,n.steps=n.steps,step.size=step.size,subsample.control=F, enrichment.background.scales=enrichment.background.scales, ...);
+ names(chains) <- ci;
+ }
+ }
+ cvl <- mser.chain.interpolation(chains=chains,enrichment.background.scales=enrichment.background.scales,test.agreement=test.agreement,return.lists=F);
+ if(n.steps>1) {
+ msers <- cvl;
+ } else {
+ msers <- unlist(lapply(cvl,function(d) d$me))
+ }
+ if(return.chains) {
+ return(list(mser=msers,chains=chains));
+ } else {
+ return(msers);
+ }
+}
+
+# PUBLIC
+# interpolate MSER dependency on tag counts
+get.mser.interpolation <- function(signal.data,control.data,target.fold.enrichment=5,n.chains=10,n.steps=6,step.size=1e5, chains=NULL, test.agreement=0.99, return.chains=F, enrichment.background.scales=c(1), excluded.steps=c(seq(2,n.steps-2)), ...) {
+ msers <- get.mser(signal.data,control.data,n.chains=n.chains,n.steps=n.steps,step.size=step.size,chains=chains,test.agrement=test.agreement,return.chains=T,enrichment.background.scales=enrichment.background.scales,excluded.steps=excluded.steps, ...);
+
+ # adjust sizes in case a subset of chromosomes was used
+ mser <- mser.chain.interpolation(chains=msers$chains,enrichment.background.scales=enrichment.background.scales,test.agreement=test.agreement,return.lists=T);
+ sr <- sum(unlist(lapply(signal.data,length)))/mser[[1]][[1]]$n[1];
+
+ # Subsampling each chain requires removing a fraction of each chromosome's
+ # tag list. To get the exact step.size, this often leaves chromosomes with
+ # a non-integer number of tags. The non-integer values are floored, so each
+ # chr can contribute at most 0.999.. <= 1 error to the step.size.
+ floor.error <- length(msers$chains[[1]][[1]]$npl)
+ intpn <- lapply(mser,function(ms) {
+ lmvo <- do.call(rbind,ms)
+ lmvo$n <- lmvo$n*sr;
+ # Don't select rows corresponding to excluded.steps
+ # Keep in mind that nd values are negative.
+ lmvo <- lmvo[lmvo$nd <= (lmvo$nd[1] + floor.error) & lmvo$nd >= (lmvo$nd[1] - floor.error),];
+ lmvo <- na.omit(lmvo);
+ if(any(lmvo$me==1)) {
+ return(list(prediction=NA));
+ }
+ lmvo$n <- log10(lmvo$n); lmvo$me <- log10(lmvo$me-1)
+ # remove non-standard steps
+ emvf <- lm(me ~ n,data=lmvo);
+ tfe <- (log10(target.fold.enrichment-1)-coef(emvf)[[1]])/coef(emvf)[[2]];
+ tfen <- 10^tfe;
+ return(list(prediction=tfen,log10.fit=emvf));
+ })
+
+ if(return.chains) {
+ return(list(interpolation=intpn,chains=msers$chains))
+ } else {
+ return(intpn);
+ }
+
+ return(msers);
+
+}
+
+
+# output binding detection results to a text file
+# the file will contain a table with each row corresponding
+# to a detected position, with the following columns:
+# chr - chromosome or target sequence
+# pos - position of detected binding site on the chromosome/sequence
+# score - a score reflecting magnitude of the binding
+# Evalue - E-value corresponding to the peak magnitude
+# FDR - FDR corresponding to the peak magnitude
+# enrichment.lb - lower bound of the fold-enrichment ratio
+# enrichment.mle - maximum likelihood estimate of the fold-enrichment ratio
+output.binding.results <- function(results,filename) {
+ write(file=filename,"chr\tpos\tscore\tEvalue\tFDR\tenrichment.lb\tenrichment.mle",append=F);
+ chrl <- names(results$npl); names(chrl) <- chrl;
+ x <- lapply(chrl,function(chr) {
+ d <- results$npl[[chr]];
+ if(dim(d)[1]>0) {
+ if(results$thr$type=="topN") {
+ od <- cbind(rep(chr,dim(d)[1]),subset(d,select=c(x,y,enr,enr.mle)))
+ } else {
+ od <- cbind(rep(chr,dim(d)[1]),subset(d,select=c(x,y,evalue,fdr,enr,enr.mle)))
+ }
+ write.table(od,file=filename,col.names=F,row.names=F,sep="\t",append=T,quote=F)
+ }
+ })
+}
+
+
+# -------- LOW-LEVEL ROUTINES ------------
+
+# calculates tag strand cross-correlation for a range of shifts (on positive strand)
+tag.scc <- function(tags,srange=c(50,250),bin=1,tt=NULL,llim=10) {
+ if(is.null(tt)) {
+ tt <- table(sign(tags)*as.integer(floor(abs(tags)/bin+0.5)));
+ }
+ if(!is.null(llim)) { l <- mean(tt); tt <- tt[tt<llim*l] }
+ tc <- as.integer(names(tt));
+ tt <- as.numeric(tt);
+
+ pv <- tt; pv[tc<0]<-0;
+ nv <- tt; nv[tc>0]<-0;
+
+ pti <- which(tc>0)
+ nti <- which(tc<0);
+
+ ptc <- tc[pti];
+ ntc <- (-1)*tc[nti];
+
+ ptv <- tt[pti];
+ ntv <- tt[nti];
+
+ trng <- range(c(range(ptc),range(ntc)))
+ l <- diff(trng)+1;
+ rm(tc,tt);
+
+ mp <- sum(ptv)*bin/l; mn <- sum(ntv)*bin/l;
+ ptv <- ptv-mp; ntv <- ntv-mn;
+ ss <- sqrt((sum(ptv*ptv)+(l-length(ptv))*mp^2) * (sum(ntv*ntv)+(l-length(ntv))*mn^2));
+
+ t.cor <- function(s) {
+ smi <- match(ptc+s,ntc);
+ return((sum(ptv[!is.na(smi)]*ntv[na.omit(smi)]) -
+ mn*sum(ptv[is.na(smi)]) -
+ mp*sum(ntv[-na.omit(smi)]) +
+ mp*mn*(l-length(ptv)-length(ntv)+length(which(!is.na(smi)))))/ss);
+ }
+ shifts <- floor(seq(srange[1],srange[2],by=bin)/bin+0.5);
+ scc <- unlist(lapply(shifts,t.cor)); names(scc) <- shifts*bin;
+ return(scc);
+}
+
+
+# plot tag cross-correlation
+t.plotcc <- function(ac, lab=c(10,5,7), ylab="correlation", xlab="lag", pch=19, grid.i=c(-5:5), grid.s=10, type='b', plot.grid=F, cols=c(1,2,4,"orange",8,"pink"), min.peak.x=NULL, xlim=NULL, plot.147=F, plot.max=T, rmw=1, rescale=F, legendx="right", ltys=rep(1,length(ac)), ...) {
+ if(is.list(ac)) {
+ cols <- cols[1:length(ac)];
+
+ if(!is.null(xlim)) {
+ vx <- as.numeric(names(ac[[1]])); vx <- which(vx>=xlim[1] & vx<=xlim[2]);
+ ac[[1]] <- (ac[[1]])[vx];
+ } else {
+ xlim <- range(as.numeric(names(ac[[1]])));
+ }
+
+
+ plot(as.numeric(names(ac[[1]])),runmean(ac[[1]],rmw),type=type,pch=pch,xlab=xlab,ylab=ylab,lab=lab, col=cols[1], xlim=xlim, lty=ltys[1], ...);
+ if(length(ac)>1) {
+ for(i in seq(2,length(ac))) {
+ irng <- range(ac[[i]]);
+ vx <- as.numeric(names(ac[[i]])); vx <- which(vx>=xlim[1] & vx<=xlim[2]);
+ if(rescale) {
+ lines(as.numeric(names(ac[[i]])[vx]),runmean((ac[[i]][vx]-irng[1])/diff(irng)*diff(range(ac[[1]]))+min(ac[[1]]),rmw),col=cols[i],lty=ltys[i]);
+ } else {
+ lines(as.numeric(names(ac[[i]]))[vx],runmean(ac[[i]][vx],rmw),col=cols[i],lty=ltys[i]);
+ }
+ }
+ }
+ if(is.null(min.peak.x)) {
+ m <- as.numeric(names(ac[[1]])[which.max(ac[[1]])]);
+ } else {
+ sac <- (ac[[1]])[which(as.numeric(names(ac[[1]]))>min.peak.x)]
+ m <- as.numeric(names(sac)[which.max(sac)]);
+ }
+ legend(x="topright",bty="n",legend=c(names(ac)),col=cols,lty=ltys)
+ } else {
+ if(!is.null(xlim)) {
+ vx <- as.numeric(names(ac));
+ vx <- which(vx>=xlim[1] & vx<=xlim[2]);
+ ac <- ac[vx];
+ } else {
+ xlim <- range(as.numeric(names(ac)));
+ }
+
+ plot(names(ac),runmean(ac,rmw),type=type,pch=pch,xlab=xlab,ylab=ylab,lab=lab, xlim=xlim, ...);
+ if(is.null(min.peak.x)) {
+ m <- as.numeric(names(ac)[which.max(ac)]);
+ } else {
+ sac <- ac[which(names(ac)>min.peak.x)]
+ m <- as.numeric(names(sac)[which.max(sac)]);
+ }
+ }
+ if(plot.147) {
+ abline(v=147,lty=2,col=8);
+ }
+ if(plot.grid) {
+ abline(v=m+grid.i*grid.s,lty=3,col="pink");
+ }
+ if(plot.max) {
+ abline(v=m,lty=2,col=2);
+ legend(x=legendx,bty="n",legend=c(paste("max at ",m,"bp",sep="")));
+ return(m);
+ }
+ }
+
+ # plot chromosome-acerage cross-correlation
+ t.plotavcc <- function(ci, main=paste(ci,"chromosome average"), ccl=tl.cc, return.ac=F, ttl=tl, plot=T, ... ) {
+ cc <- ccl[[ci]];
+ if(length(cc)==1) { return(cc[[1]]) };
+ if(length(cc)==0) { return(c()) };
+ ac <- do.call(rbind,cc);
+ # omit NA chromosomes
+ ina <- apply(ac,1,function(d) any(is.na(d)));
+
+ tags <- ttl[[ci]];
+ avw <- unlist(lapply(tags,length)); avw <- avw/sum(avw);
+ ac <- ac[!ina,]; avw <- avw[!ina];
+ ac <- apply(ac,2,function(x) sum(x*avw));
+ if(plot) {
+ m <- t.plotcc(ac, main=main, ...);
+ if(!return.ac) { return(m) }
+ }
+ if(return.ac) { return(ac) }
+ }
+
+ t.plotchrcc <- function(ci,ncol=4, ccl=tl.cc, ... ) {
+ cc <- ccl[[ci]];
+ ac <- do.call(rbind,cc);
+ par(mfrow = c(length(cc)/ncol,ncol), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8)
+ lapply(names(cc),function(ch) { t.plotcc(cc[[ch]],main=paste(ci,": chr",ch,sep=""), ...) })
+ }
+
+ t.plotavccl <- function(ci, ccl=tl.ccl, main=paste(ci,"chromosome average"), rtl=tl, ... ) {
+ #cc <- lapply(ccl[[ci]],function(x) { if(!is.null(x$M)) { x$M <- NULL;}; return(x); });
+ cc <- ccl[[ci]];
+ chrs <- names(cc[[1]]); names(chrs) <- chrs;
+ acl <- lapply(cc,function(x) do.call(rbind,x));
+ tags <- rtl[[ci]][chrs];
+ avw <- unlist(lapply(tags,length)); avw <- avw/sum(avw);
+ acl <- lapply(acl,function(ac) apply(ac,2,function(x) sum(x*avw)))
+ t.plotcc(acl, main=main, ...);
+ }
+
+ t.plotchrccl <- function(ci,ccl=tl.ccl,ncol=4, ... ) {
+ par(mfrow = c(length(cc[[1]])/ncol,ncol), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8)
+ lapply(names(cc[[1]]),function(ch) { t.plotcc(lapply(cc,function(x) x[[ch]]),main=paste(ci,": chr",ch,sep=""), ...) })
+ }
+
+
+
+show.scc <- function(tl,srange,cluster=NULL) {
+ if(!is.null(cluster)) {
+ cc <- clusterApplyLB(cluster,tl,tag.scc,srange=srange);
+ names(cc) <- names(tl);
+ } else {
+ cc <- lapply(tl,tag.scc,srange=srange);
+ }
+ par(mfrow = c(1,1), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
+ ccl<-list(sample=cc);
+ ccl.av <- lapply(names(ccl),t.plotavcc,type='l',ccl=ccl,xlim=srange,return.ac=F,ttl=list(sample=tl),main="")[[1]]
+}
+
+# find regions of significant tag enrichment
+find.significantly.enriched.regions <- function(signal.data,control.data,window.size=500,multiplier=1,z.thr=3,mcs=0,debug=F,background.density.scaling=T,masking.window.size=window.size,poisson.z=0,poisson.ratio=4,either=F,tag.shift=146/2,bg.weight=NULL) {
+ if(is.null(bg.weight)) {
+ bg.weight <- dataset.density.ratio(signal.data,control.data,background.density.scaling=background.density.scaling);
+ }
+
+ if(debug) {
+ cat("bg.weight=",bg.weight,"\n");
+ }
+ chrl <- names(signal.data); names(chrl) <- chrl;
+ tec <- lapply(chrl,function(chr) {
+ d <- tag.enrichment.clusters(signal.data[[chr]],control.data[[chr]],bg.weight=bg.weight*multiplier,thr=z.thr,wsize=window.size,mcs=mcs,min.tag.count.z=poisson.z,min.tag.count.ratio=poisson.ratio,either=either,tag.shift=tag.shift);
+ d$s <- d$s-masking.window.size/2; d$e <- d$e+masking.window.size/2;
+ return(d);
+ })
+}
+
+
+# given tag position vectors, find contigs of significant enrichment of signal over background
+# thr - z score threshold
+# mcs - minimal cluster size
+# bg.weight - fraction by which background counts should be multipled
+# min.tag.count.z will impose a poisson constraint based on randomized signal in parallel of background constaint (0 - no constraint)
+tag.enrichment.clusters <- function(signal,background,wsize=200,thr=3,mcs=1,bg.weight=1,min.tag.count.z=0,tag.av.den=NULL,min.tag.count.thr=0,min.tag.count.ratio=4,either=F,tag.shift=146/2) {
+ if(is.null(tag.av.den)) {
+ tag.av.den <- length(signal)/diff(range(abs(signal)));
+ }
+ if(min.tag.count.z>0) {
+ min.tag.count.thr <- qpois(pnorm(min.tag.count.z,lower.tail=F),min.tag.count.ratio*tag.av.den*wsize,lower.tail=F)
+ } else {
+ min.tag.count.thr <- 0;
+ }
+
+ #if(bg.weight!=1) {
+ # background <- sample(background,length(background)*(bg.weight),replace=T);
+ #}
+ # make up combined position, flag vectors
+ pv <- abs(c(signal,background)+tag.shift);
+ fv <- c(rep(1,length(signal)),rep(0,length(background)));
+ po <- order(pv);
+ pv <- pv[po];
+ fv <- fv[po];
+
+ #thr <- pnorm(thr,lower.tail=F);
+
+ storage.mode(wsize) <- storage.mode(mcs) <- storage.mode(fv) <- "integer";
+ storage.mode(thr) <- storage.mode(pv) <- "double";
+ storage.mode(bg.weight) <- "double";
+ storage.mode(min.tag.count.thr) <- "double";
+ either <- as.integer(either);
+ storage.mode(either) <- "integer";
+
+ z <- .Call("find_poisson_enrichment_clusters",pv,fv,wsize,thr,mcs,bg.weight,min.tag.count.thr,either)
+ return(z);
+}
+
+
+
+
+
+# estimates threshold, calculates predictions on complete data and randomized data
+# input: tvl
+# control - a list of control tag datasets
+# no randomization is done if control is supplied
+# return.rtp - return randomized tag peaks - do not fit thresholds or do actual predictions
+# topN - use min threshold to do a run, return topN peaks from entire genome
+# threshold - specify a user-defined threshold
+lwcc.prediction <- function(tvl,e.value=NULL, fdr=0.01, chrl=names(tvl), min.thr=0, n.randomizations=1, shuffle.window=1, debug=T, predict.on.random=F, shuffle.both.strands=T,strand.shuffle.only=F, return.rtp=F, control=NULL, print.level=0, threshold=NULL, topN=NULL, bg.tl=NULL, tec.filter=T, tec.window.size=1e3,tec.z=3, tec.masking.window.size=tec.window.size, tec.poisson.z=3,tec.poisson.ratio=4, bg.reverse=T, return.control.predictions=F, return.core.data=F, background.density.scaling= [...]
+
+ control.predictions <- NULL;
+ core.data <- list();
+
+ if(!is.null(bg.tl) && tec.filter) {
+ if(debug) { cat("finding background exclusion regions ... "); }
+ tec <- find.significantly.enriched.regions(bg.tl,tvl,window.size=tec.window.size,z.thr=tec.z,masking.window.size=tec.masking.window.size,poisson.z=tec.poisson.z,poisson.ratio=tec.poisson.ratio,background.density.scaling=background.density.scaling,either=T);
+ if(return.core.data) {
+ core.data <- c(core.data,list(tec=tec));
+ }
+ if(debug) { cat("done\n"); }
+ }
+
+
+ if(is.null(threshold) && is.null(topN)) { # threshold determination is needed
+ # generate control predictions
+ if(!is.null(control)) {
+ if(debug) { cat("determining peaks on provided",length(control),"control datasets:\n"); }
+ if(!is.null(bg.tl)) {
+ if(bg.reverse) {
+ if(debug) { cat("using reversed signal for FDR calculations\n"); }
+ rbg.tl <- tvl;
+ } else {
+ if(debug) { cat("generating randomized (within chromosome) background ... "); }
+ rbg.tl <- lapply(bg.tl,function(d) {
+ if(length(d)<2) { return(d); }
+ rng <- range(abs(d));
+ rd <- round(runif(length(d),rng[1],rng[2]));
+ nrd <- sample(1:length(rd),length(which(d<0)));
+ rd[nrd] <- rd[nrd]*(-1);
+ return(rd);
+ })
+ if(debug) { cat("done\n"); }
+ }
+ } else {
+ rbg.tl <- NULL;
+ }
+ n.randomizations <- length(control);
+ #signal.size <- sum(unlist(lapply(tvl,length)));
+ rtp <- lapply(control,function(d) {
+ # calculate tag.weight
+ #tag.weight <- sum(unlist(lapply(tvl,length)))/sum(unlist(lapply(d,length)));
+ tag.weight <- dataset.density.ratio(tvl,d,background.density.scaling=background.density.scaling);
+ #cat("tag.weight=",tag.weight," ");
+ return(window.call.mirror.binding(d,min.thr=min.thr, tag.weight=tag.weight,bg.tl=rbg.tl, debug=debug, round.up=T,background.density.scaling=background.density.scaling, ...));
+ #return(window.call.mirror.binding(d,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist,cluster=cluster))
+ });
+ if(return.core.data) {
+ core.data <- c(core.data,list(rtp.unfiltered=rtp));
+ }
+ if(tec.filter) {
+ if(debug) { cat("excluding systematic background anomalies ... "); }
+ rtp <- lapply(rtp,filter.binding.sites,tec,exclude=T);
+ if(debug) { cat("done\n"); }
+ }
+ } else {
+ if(debug) { cat("determining peaks on ",n.randomizations,"randomized datasets:\n"); }
+ rtp <- lapply(1:n.randomizations,function(i) {
+ rd <- generate.randomized.data(tvl,shuffle.window=shuffle.window,shuffle.both.strands=shuffle.both.strands,strand.shuffle.only=strand.shuffle.only);
+ return(window.call.mirror.binding(rd,min.thr=min.thr,bg.tl=bg.tl, debug=debug, ...));
+ #return(window.call.mirror.binding(rd,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist))
+ });
+ }
+ if(return.control.predictions) {
+ control.predictions <- rtp;
+ }
+ rtp <- do.call(rbind,lapply(rtp,function(d) do.call(rbind,d))); # merge tables
+
+ # generate real data predictions
+ if(debug) { cat("determining peaks on real data:\n"); }
+ npl <- window.call.mirror.binding(tvl,min.thr=min.thr,bg.tl=bg.tl, debug=debug, background.density.scaling=background.density.scaling, ...);
+ #npl <- window.call.mirror.binding(tvl,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist,cluster=cluster);
+ if(return.core.data) {
+ core.data <- c(core.data,list(npl.unfiltered=npl));
+ }
+
+ if(!is.null(bg.tl) && tec.filter) {
+ if(debug) { cat("excluding systematic background anomalies ... "); }
+ npl <- filter.binding.sites(npl,tec,exclude=T);
+ if(debug) { cat("done\n"); }
+ }
+
+ # calculate E-value and FDRs for all of the peaks
+ if(debug) { cat("calculating statistical thresholds\n"); }
+ chrl <- names(npl); names(chrl) <- chrl;
+ npld <- do.call(rbind,lapply(names(npl),function(chr) {
+ k <- npl[[chr]];
+ if(!is.null(k) && dim(k)[1]>0) {
+ k$chr <- rep(chr,dim(k)[1])
+ };
+ return(k)
+ }))
+ npld <- cbind(npld,get.eval.fdr.vectors(npld$y,rtp$y));
+ # correct for n.randomizations
+ npld$fdr <- npld$fdr/n.randomizations;
+ npld$evalue <- npld$evalue/n.randomizations;
+
+ if(return.core.data) {
+ core.data <- c(core.data,list(npld=npld));
+ }
+
+ # determine actual thresholds
+ if(is.null(e.value)) {
+ if(is.null(fdr)) { fdr <- 0.01; }
+ thr <- list(root=min(npld$y[npld$fdr<=fdr]),type="FDR",fdr=fdr)
+ if(debug) { cat("FDR",fdr,"threshold=",thr$root,"\n"); }
+ } else {
+ # determine threshold based on e-value
+ thr <- list(root=min(npld$y[npld$evalue<=e.value]),type="Evalue",e.value=e.value)
+ if(debug) { cat("E-value",e.value,"threshold=",thr$root,"\n"); }
+ }
+
+
+ npld <- npld[npld$y>=thr$root,];
+ if(dim(npld)[1]>0) {
+ npl <- tapply(c(1:dim(npld)[1]),as.factor(npld$chr),function(ii) {df <- npld[ii,]; df$chr <- NULL; return(df) });
+ } else {
+ npl <- list();
+ }
+ } else {
+ if(is.null(threshold)) {
+ thr <- list(root=min.thr,type="minimal");
+ } else {
+ thr <- list(root=threshold,type="user specified");
+ }
+
+ cat("calling binding positions using",thr$type,"threshold (",thr$root,") :\n");
+ npl <- window.call.mirror.binding(tvl=tvl,min.thr=thr$root,bg.tl=bg.tl, debug=debug, ...);
+ if(!is.null(bg.tl) && tec.filter) {
+ if(debug) { cat("excluding systematic background anomalies ... "); }
+ npl <- filter.binding.sites(npl,tec,exclude=T);
+ if(debug) { cat("done\n"); }
+ }
+
+ if(!is.null(topN)) {
+ # determine threshold based on topN peaks
+ ay <- unlist(lapply(npl,function(d) d$y));
+ if(length(ay)>topN) {
+ thr <- list(root=sort(ay,decreasing=T)[topN],type="topN",topN=topN);
+ cat(paste("determined topN threshold :",thr$root,"\n"));
+ npl <- lapply(npl,function(d) d[d$y>thr$root,]);
+ }
+ }
+ }
+
+ if(return.core.data) {
+ return(c(list(npl=npl,thr=thr),core.data));
+ }
+ if(return.control.predictions && !is.null(control.predictions)) {
+ return(list(npl=npl,thr=thr,control.predictions=control.predictions));
+ }
+ return(list(npl=npl,thr=thr));
+}
+
+# window tag difference method
+wtd <- function(x,y,s,e,whs=200,return.peaks=T,min.thr=5,min.dist=200,step=1,direct.count=F,tag.weight=1,bg.x=NULL,bg.y=NULL,bg.weight=1,mask.x=NULL,mask.y=NULL,ignore.masking=F, bg.whs=whs, round.up=F, ...) {
+ ignore.masking <- ignore.masking | (is.null(mask.x) & is.null(mask.y));
+ if(step>1) {
+ x <- floor(x/step+0.5); y <- floor(y/step+0.5)
+
+ if(!is.null(bg.x)) {
+ bg.x <- floor(bg.x/step+0.5); bg.y <- floor(bg.y/step+0.5)
+ }
+
+ if(!is.null(mask.x)) {
+ mask.x <- floor(mask.x/step+0.5); mask.y <- floor(mask.y/step+0.5)
+ }
+
+
+ whs <- floor(whs/step+0.5);
+ bg.whs <- floor(bg.whs/step+0.5);
+ min.dist <- floor(min.dist/step +0.5);
+ s <- floor(s/step+0.5)
+ e <- floor(e/step+0.5)
+ }
+
+ # scale bg.weight, since within calculation they are considered independent
+ bg.weight <- bg.weight*tag.weight;
+
+ rx <- c(s-whs,e+whs);
+
+ # compile tag vectors
+ xt <- table(x);
+ xh <- integer(diff(rx)+1);
+ xh[as.integer(names(xt))-rx[1]+1] <- as.integer(xt);
+
+ yt <- table(y);
+ yh <- integer(diff(rx)+1);
+ yh[as.integer(names(yt))-rx[1]+1] <- as.integer(yt);
+
+ # compile background vectors
+ if(!is.null(bg.x) && length(bg.x)>0) {
+ bg.subtract <- 1;
+
+ bg.xt <- table(bg.x);
+ bg.xh <- integer(diff(rx)+1);
+ bg.xh[as.integer(names(bg.xt))-rx[1]+1] <- as.integer(bg.xt);
+ rm(bg.xt);
+
+ bg.yt <- table(bg.y);
+ bg.yh <- integer(diff(rx)+1);
+ bg.yh[as.integer(names(bg.yt))-rx[1]+1] <- as.integer(bg.yt);
+ rm(bg.yt);
+
+ # adjust bg.weight according to bg.whs
+ if(bg.whs!=whs) {
+ bg.weight <- bg.weight*whs/bg.whs;
+ }
+ } else {
+ bg.subtract <- 0;
+ bg.xh <- bg.yh <- c();
+ }
+
+ # record masked positions
+ if(!ignore.masking) {
+ if(!is.null(mask.x) && length(mask.x)>0) {
+ mvx <- unique(mask.x); mvx <- setdiff(mvx,as.numeric(names(xt)));
+ mvx <- mvx[mvx>=rx[1] & mvx<=rx[2]];
+ xh[mvx-rx[1]+1] <- -1;
+ }
+
+ if(!is.null(mask.y) && length(mask.y)>0) {
+ mvy <- unique(mask.y); mvy <- setdiff(mvy,as.numeric(names(yt)));
+ mvy <- mvy[mvy>=rx[1] & mvy<=rx[2]];
+ yh[mvy-rx[1]+1] <- -1;
+ }
+ }
+
+ rm(xt,yt);
+
+ if(round.up) { round.up <- 1; } else { round.up <- 0; }
+
+ storage.mode(xh) <- storage.mode(yh) <- "integer";
+ storage.mode(bg.xh) <- storage.mode(bg.yh) <- "integer";
+ nx <- length(xh); storage.mode(nx) <- storage.mode(whs) <- storage.mode(bg.whs) <- "integer";
+ rp <- as.integer(return.peaks);
+ dcon <- as.integer(direct.count);
+ storage.mode(rp) <- storage.mode(min.dist) <- "integer";
+ storage.mode(min.thr) <- "double";
+ storage.mode(dcon) <- "integer";
+ storage.mode(tag.weight) <- "double";
+ storage.mode(bg.weight) <- "double";
+ storage.mode(bg.subtract) <- "integer";
+ storage.mode(round.up) <- "integer";
+ im <- as.integer(ignore.masking);
+ storage.mode(im) <- "integer";
+ z <- .Call("wtd",xh,yh,whs,rp,min.dist,min.thr,dcon,tag.weight,im,bg.subtract,bg.xh,bg.yh,bg.whs,bg.weight,round.up);
+ if(return.peaks) {
+ return(data.frame(x=(z$x+rx[1])*step,y=z$v));
+ } else {
+ return(list(x=rx*step,y=z));
+ }
+}
+
+
+tag.wtd <- function(ctv,s,e,return.peaks=T, bg.ctv=NULL, mask.ctv=NULL, ...) {
+ x <- ctv[ctv>=s & ctv<=e];
+ y <- (-1)*ctv[ctv<=-s & ctv>=-e];
+
+ if(!is.null(bg.ctv)) {
+ bg.x <- bg.ctv[bg.ctv>=s & bg.ctv<=e];
+ bg.y <- (-1)*bg.ctv[bg.ctv<=-s & bg.ctv>=-e];
+ } else {
+ bg.x <- bg.y <- NULL;
+ }
+
+ if(!is.null(mask.ctv)) {
+ mask.x <- mask.ctv[mask.ctv>=s & mask.ctv<=e];
+ mask.y <- (-1)*mask.ctv[mask.ctv<=-s & mask.ctv>=-e];
+ } else {
+ mask.x <- mask.y <- NULL;
+ }
+
+ if(length(x)==0 | length(y) ==0) {
+ if(return.peaks) {
+ return(data.frame(x=c(),y=c()));
+ } else {
+ rx <- range(c(x,y));
+ return(list(x=rx,y=numeric(diff(rx)+1)));
+ }
+ } else {
+ return(wtd(x,y,s,e,return.peaks=return.peaks, bg.x=bg.x,bg.y=bg.y, mask.x=mask.x,mask.y=mask.y, ...))
+ }
+}
+
+# shuffles tags in chromosome blocks of a specified size
+# note: all coordinates should be positive
+tag.block.shuffle <- function(tags,window.size=100) {
+ if(length(tags)<3) {
+ warning("too few tags for shuffling");
+ return(tags);
+ }
+ rng <- range(tags);
+ #if(rng[1]<0) { stop("negative tag coordinates found") }
+ if(diff(rng)<=window.size) {
+ warning(paste("tag range (",diff(rng),") is smaller than shuffle window size"));
+ return(tags);
+ }
+
+ if(window.size==0) {
+ return(as.integer(runif(length(tags),min=rng[1],max=rng[2])))
+ } else if(window.size==1) {
+ tt <- table(tags);
+ return(rep(runif(length(tt),min=rng[1],max=rng[2]),as.integer(tt)))
+ } else {
+ # block positions
+ bp <- tags %/% window.size;
+ # block-relative tag positions
+ rp <- tags %% window.size;
+
+ # shuffle block positions
+ bpu <- unique(bp);
+ rbp <- range(bpu);
+ bps <- as.integer(runif(length(bpu),min=rbp[1],max=rbp[2]));
+ bpi <- match(bp,bpu);
+ sbp <- bps[bpi];
+ #sbp <- rbp[1]+match(bp,sample(rbp[1]:rbp[2]))
+ return(sbp*window.size+rp);
+ }
+}
+
+
+# calculate window cross-correlation
+lwcc <- function(x,y,s,e,whs=100,isize=20,return.peaks=T,min.thr=1,min.dist=100,step=1,tag.weight=1,bg.x=NULL,bg.y=NULL,bg.weight=NULL,mask.x=NULL,mask.y=NULL,bg.whs=whs,round.up=F) {
+ if(step>1) {
+ x <- floor(x/step+0.5); y <- floor(y/step+0.5)
+
+ if(!is.null(bg.x)) {
+ bg.x <- floor(bg.x/step+0.5); bg.y <- floor(bg.y/step+0.5)
+ }
+
+ if(!is.null(mask.x)) {
+ mask.x <- floor(mask.x/step+0.5); mask.y <- floor(mask.y/step+0.5)
+ }
+
+ whs <- floor(whs/step+0.5);
+ bg.whs <- floor(bg.whs/step+0.5);
+ isize <- floor(isize/step+0.5);
+ min.dist <- floor(min.dist/step +0.5);
+ s <- floor(s/step+0.5)
+ e <- floor(e/step+0.5)
+ }
+
+ # scale bg.weight, since within calculation they are considered independent
+ bg.weight <- bg.weight*tag.weight;
+
+
+ rx <- c(s-whs,e+whs);
+ xt <- table(x);
+ xh <- integer(diff(rx)+1);
+ xh[as.integer(names(xt))-rx[1]+1] <- as.integer(xt);
+
+ yt <- table(y);
+
+ yh <- integer(diff(rx)+1);
+ yh[as.integer(names(yt))-rx[1]+1] <- as.integer(yt);
+
+ # compile background vectors
+ if(!is.null(bg.x) && length(bg.x)>0) {
+ bg.subtract <- 1;
+
+ bg.xt <- table(bg.x);
+ bg.xh <- integer(diff(rx)+1);
+ bg.xh[as.integer(names(bg.xt))-rx[1]+1] <- as.integer(bg.xt);
+ rm(bg.xt);
+
+ bg.yt <- table(bg.y);
+ bg.yh <- integer(diff(rx)+1);
+ bg.yh[as.integer(names(bg.yt))-rx[1]+1] <- as.integer(bg.yt);
+ rm(bg.yt);
+
+ # adjust bg.weight according to bg.whs
+ bg.weight <- bg.weight*(whs-isize)/bg.whs;
+ } else {
+ bg.subtract <- 0;
+ bg.xh <- bg.yh <- c();
+ }
+
+ # record masked positions
+ if(!is.null(mask.x) && length(mask.x)>0) {
+ mvx <- unique(mask.x); mvx <- setdiff(mvx,as.numeric(names(xt)));
+ mvx <- mvx[mvx>=rx[1] & mvx<=rx[2]];
+
+ xh[mvx-rx[1]+1] <- -1;
+ }
+
+ if(!is.null(mask.y) && length(mask.y)>0) {
+ mvy <- unique(mask.y); mvy <- setdiff(mvy,as.numeric(names(yt)));
+ mvy <- mvy[mvy>=rx[1] & mvy<=rx[2]];
+ yh[mvy-rx[1]+1] <- -1;
+ }
+
+ rm(xt,yt);
+ if(round.up) { round.up <- 1; } else { round.up <- 0; }
+
+ storage.mode(xh) <- storage.mode(yh) <- "integer";
+ storage.mode(bg.xh) <- storage.mode(bg.yh) <- "integer";
+ nx <- length(xh); storage.mode(nx) <- storage.mode(whs) <- storage.mode(isize) <- storage.mode(bg.whs) <- "integer";
+ rp <- as.integer(return.peaks);
+ storage.mode(rp) <- storage.mode(min.dist) <- "integer";
+ storage.mode(min.thr) <- "double";
+ storage.mode(tag.weight) <- "double";
+ storage.mode(bg.weight) <- "double";
+ storage.mode(bg.subtract) <- "integer";
+ storage.mode(round.up) <- "integer";
+
+ # allocate return arrays
+ #cc <- numeric(nx); storage.mode(cc) <- "double";
+ z <- .Call("lwcc",xh,yh,whs,isize,rp,min.dist,min.thr,tag.weight,bg.subtract,bg.xh,bg.yh,bg.whs,bg.weight,round.up);
+ if(return.peaks) {
+ return(data.frame(x=(z$x+rx[1])*step,y=z$v));
+ } else {
+ return(list(x=rx*step,y=z));
+ }
+}
+
+
+tag.lwcc <- function(ctv,s,e,return.peaks=T, bg.ctv=NULL, mask.ctv=NULL, ...) {
+ x <- ctv[ctv>=s & ctv<=e];
+ y <- (-1)*ctv[ctv<=-s & ctv>=-e];
+
+ if(!is.null(bg.ctv)) {
+ bg.x <- bg.ctv[bg.ctv>=s & bg.ctv<=e];
+ bg.y <- (-1)*bg.ctv[bg.ctv<=-s & bg.ctv>=-e];
+ } else {
+ bg.x <- bg.y <- NULL;
+ }
+
+ if(!is.null(mask.ctv)) {
+ mask.x <- mask.ctv[mask.ctv>=s & mask.ctv<=e];
+ mask.y <- (-1)*mask.ctv[mask.ctv<=-s & mask.ctv>=-e];
+ } else {
+ mask.x <- mask.y <- NULL;
+ }
+
+ if(length(x)==0 | length(y) ==0) {
+ if(return.peaks) {
+ return(data.frame(x=c(),y=c()));
+ } else {
+ rx <- range(c(x,y));
+ return(list(x=rx,y=numeric(diff(rx)+1)));
+ }
+ } else {
+ return(lwcc(x,y, s,e,return.peaks=return.peaks, bg.x=bg.x,bg.y=bg.y, mask.x=mask.x,mask.y=mask.y, ...))
+ }
+}
+
+# determine mirror-based binding positions using sliding window along each chromosome
+# extra parameters are passed on to call.nucleosomes()
+window.call.mirror.binding <- function(tvl,window.size=4e7, debug=T, cluster=NULL, bg.tl=NULL, mask.tl=NULL, background.density.scaling=T, ...) {
+ chrl <- names(tvl);
+ # determine bg.weight
+ if(!is.null(bg.tl)) {
+ bg.weight <- dataset.density.ratio(tvl,bg.tl,background.density.scaling=background.density.scaling);
+ } else {
+ bg.weight <- NULL;
+ }
+ if(debug) {
+ cat("bg.weight=",bg.weight," ");
+ }
+
+ names(chrl) <- chrl;
+
+ if(is.null(cluster)) {
+ return(lapply(chrl,function(chr) {
+ bg.ctv <- NULL; if(!is.null(bg.tl)) { bg.ctv <- bg.tl[[chr]]; };
+ mask.ctv <- NULL; if(!is.null(mask.tl)) { mask.ctv <- mask.tl[[chr]]; };
+
+ window.chr.call.mirror.binding(list(ctv=tvl[[chr]],bg.ctv=bg.ctv,mask.ctv=mask.ctv),window.size=window.size,chr=chr,debug=debug, bg.weight=bg.weight, bg.ctv=bg.ctv, mask.ctv=mask.ctv, ...);
+ }));
+ } else {
+ # add bg.ctv and mask.ctv to parallel call
+ tvll <- lapply(chrl,function(chr) {
+ bg.ctv <- NULL; if(!is.null(bg.tl)) { bg.ctv <- bg.tl[[chr]]; };
+ mask.ctv <- NULL; if(!is.null(mask.tl)) { mask.ctv <- mask.tl[[chr]]; };
+ return(list(ctv=tvl[[chr]],bg.ctv=bg.ctv,mask.ctv=mask.ctv))
+ });
+ bl <- clusterApplyLB(cluster,tvll,window.chr.call.mirror.binding,window.size=window.size,debug=debug, bg.weight=bg.weight, ...);
+ names(bl) <- chrl;
+ return(bl);
+ }
+}
+
+window.chr.call.mirror.binding <- function(ctvl,window.size,debug=T, chr="NA", cluster=NULL, method=tag.wtd, bg.ctv=NULL, mask.ctv=NULL, ...) {
+ ctv <- ctvl$ctv; bg.ctv <- ctvl$bg.ctv; mask.ctv <- ctvl$mask.ctv;
+ if(is.null(ctv)) { return(data.frame(x=c(),y=c())) }
+ if(length(ctv)<2) { return(data.frame(x=c(),y=c())) }
+
+ dr <- range(unlist(lapply(ctv,function(x) range(abs(x)))))
+ n.windows <- ceiling(diff(dr)/window.size);
+
+
+ pinfo <- c();
+ if(debug) {
+ cat(paste("processing ",chr," in ",n.windows," steps [",sep=""));
+ }
+ for(i in 1:n.windows) {
+ s <- dr[1]+(i-1)*window.size;
+ npn <- method(s=s, e=s+window.size,ctv=ctv, return.peaks=T, bg.ctv=bg.ctv, mask.ctv=mask.ctv, ... );
+ if(length(npn) > 0) { pinfo <- rbind(pinfo,npn) }
+ if(debug) {
+ cat(".");
+ }
+ }
+ if(debug) {
+ cat(paste("] done (",dim(pinfo)[1],"positions)\n"));
+ } else {
+ cat(".");
+ }
+ return(data.frame(x=pinfo[,1],y=pinfo[,2]));
+}
+
+generate.randomized.data <- function(data,shuffle.window=1,shuffle.both.strands=T,strand.shuffle.only=F,chrl=names(data)) {
+ names(chrl) <- unlist(chrl);
+ if(strand.shuffle.only) {
+ # shuffle just strand assignment, not tag positions
+ rt <- lapply(data[unlist(chrl)],function(tv) tv*sample(c(-1,1),length(tv),replace=T));
+ } else {
+ if(shuffle.both.strands) {
+ rt <- lapply(data[unlist(chrl)],function(tv) {
+ pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tag.block.shuffle(tv[-pti],window.size=shuffle.window)))
+ });
+ } else {
+ rt <- lapply(data[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tv[-pti]))});
+ }
+ }
+}
+
+# determine threshold based on E value
+# for efficiency chrl should include just one or two small chromosomes
+# optional parameters are passed to call.nucleosomes()
+determine.lwcc.threshold <- function(tvl,chrl=names(tvl),e.value=100, n.randomizations=1, min.thr=1, debug=F, tol=1e-2, shuffle.window=1, shuffle.both.strands=T, return.rtp=F, control=NULL, strand.shuffle=F, ...) {
+ names(chrl) <- unlist(chrl);
+
+ # determine fraction of total tags contained in the specified nucleosomes
+ ntags <- sum(unlist(lapply(tvl,function(cv) length(cv))));
+ nctags <- sum(unlist(lapply(chrl, function(cn) length(tvl[[cn]]))));
+ # calculate actual target E value
+ if(!is.null(control)) {
+ n.randomizations <- length(control);
+ }
+ eval <- e.value*n.randomizations*nctags/ntags
+ if(eval<1) {
+ warning("specified e.value and set of chromosomes results in target e.value of less than 1");
+ eval <- 1;
+ }
+
+ if(debug) {
+ cat(paste("randomizations =",n.randomizations," chromosomes =",length(chrl),"\n"))
+ cat(paste("adjusted target eval =",eval,"\ngenerating randomized tag peaks ..."));
+ }
+
+ # get peaks on randomized tags
+ if(is.null(control)) {
+ rtp <- data.frame(do.call(rbind,lapply(1:n.randomizations,function(i) {
+ if(strand.shuffle) {
+ # shuffle just strand assignment, not tag positions
+ rt <- lapply(tvl[unlist(chrl)],function(tv) tv*sample(c(-1,1),length(tv),replace=T));
+ } else {
+ if(shuffle.both.strands) {
+ rt <- lapply(tvl[unlist(chrl)],function(tv) {
+ pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tag.block.shuffle(tv[-pti],window.size=shuffle.window)))
+ });
+ } else {
+ rt <- lapply(tvl[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tv[-pti]))});
+ }
+ }
+ if(debug) {
+ cat(".");
+ }
+ rl <- window.call.mirror.binding(rt,min.thr=min.thr, debug=F, ...);
+
+ return(do.call(rbind,rl))
+ #return(do.call(rbind,window.call.mirror.binding(rt,min.thr=min.thr, debug=F, whs=100,isize=10,window.size=3e7,min.dist=200)))
+ })));
+
+ } else {
+ if(debug) {
+ cat(" using provided controls ");
+ }
+ rtp <- data.frame(do.call(rbind,lapply(control,function(rt) do.call(rbind,window.call.mirror.binding(rt,min.thr=min.thr, debug=F, ...)))))
+ }
+
+ if(return.rtp) {
+ return(rtp)
+ }
+
+ if(debug) {
+ cat(" done\nfinding threshold .");
+ }
+
+ # determine range and starting value
+ rng <- c(min.thr,max(na.omit(rtp$y)))
+
+ # find E value threshold
+ count.nucs.f <- function(nthr) {
+ return(eval-length(which(rtp$y>=nthr)));
+ }
+
+ # estimate position of the root by downward bisection iterations
+ mv <- c(eval); mvp <- c(rng[2]); ni <- 1;
+ max.it <- 2*as.integer(log2(rng[2]/rng[1])+0.5);
+ while((ni<=max.it) & (mv[1]>=0)) {
+ np <- mvp[1]/2;
+ npv <- count.nucs.f(np);
+ mv <- c(npv,mv);
+ mvp <- c(np,mvp);
+ ni <- ni+1;
+ }
+
+
+ if(ni>max.it) {
+ # determine lowest value
+ if(debug) {
+ cat(paste("exceeded max.it (",max.it,"), returning lowest point",signif(mvp[1],4)));
+ }
+ return(list(root=mvp[1]))
+ } else {
+ rng <- mvp[1:2];
+ if(mv[2]==0) rng[2] <- mvp[3];
+ if(debug) {
+ cat(paste("bound to (",signif(rng[1],4),signif(rng[2],4),") "));
+ }
+ }
+
+ # find root on the right side
+ x <- uniroot(count.nucs.f,rng,tol=tol);
+ #x$max <- o$par;
+ #x$f.max <- (-1)*o$value;
+ if(debug) {
+ cat(paste(" done (thr=",signif(x$root,4),")\n"));
+ }
+ return(x);
+
+}
+
+
+# determine membership of points in fragments
+points.within <- function(x,fs,fe,return.list=F,return.unique=F,sorted=F,return.point.counts=F) {
+ if(is.null(x) | length(x) < 1) { return(c()) };
+ if(!sorted) {
+ ox <- rank(x,ties="first");
+ x <- sort(x);
+ }
+
+ se <- c(fs,fe);
+ fi <- seq(1:length(fs));
+ fi <- c(fi,-1*fi);
+
+ fi <- fi[order(se)];
+ se <- sort(se);
+
+ storage.mode(x) <- storage.mode(fi) <- storage.mode(se) <- "integer";
+ if(return.unique) { iu <- 1; } else { iu <- 0; }
+ if(return.list) { il <- 1; } else { il <- 0; }
+ if(return.point.counts) { rpc <- 1; } else { rpc <- 0; }
+ storage.mode(iu) <- storage.mode(il) <- storage.mode(rpc) <- "integer";
+ result <- .Call("points_within",x,se,fi,il,iu,rpc);
+ if(!sorted & !return.point.counts) {
+ result <- result[ox];
+ }
+ return(result);
+}
+
+
+# determine cooridnates of points x relative to signed
+# positions pos within size range
+get.relative.coordinates <- function(x,pos,size,sorted=F) {
+ if(!sorted) {
+ op <- order(abs(pos));
+ x <- sort(x); pos <- pos[op];
+ }
+ #dyn.load("~/zhao/sc/peaks.so");
+ storage.mode(x) <- storage.mode(pos) <- storage.mode(size) <- "integer";
+ rf <- .Call("get_relative_coordinates",x,pos,size);
+ if(!sorted) {
+ rf$i <- op[rf$i];
+ } else {
+ return(rf$i);
+ }
+ return(rf);
+}
+
+# given list of magnitude values for signal(x) and control (y),
+# return a dataframe with $e.val and $fdr
+get.eval.fdr.vectors <- function(x,y) {
+ nx <- length(x); ny <- length(y);
+ if(nx==0) { return(data.frame(evalue=c(),fdr=c())) }
+ if(ny==0) { return(data.frame(evalue=rep(0,nx),fdr=rep(1,nx))) }
+ ex <- ecdf(x); ey <- ecdf(y);
+
+ evals <- (1-ey(x))*ny;
+ yvals <- (1-ex(x))*nx;
+ fdr <- (evals+0.5)/(yvals+0.5); # with pseudo-counts
+ fdr[yvals==0] <- min(fdr); # correct for undercounts
+ # find a min x corresponding to a minimal FDR
+ mfdr <- min(fdr);
+ mfdrmx <- min(x[fdr==mfdr]);
+ # correct
+ fdr[x>=mfdrmx] <- mfdr;
+ return(data.frame(evalue=(evals+1),fdr=fdr));
+}
+
+
+# filter predictions to remove calls failling into the tag enrichment clusters ( chr list of $s/$e dfs)
+filter.binding.sites <- function(bd,tec,exclude=F) {
+ chrl <- names(bd); names(chrl) <- chrl;
+ lapply(chrl,function(chr) {
+ cbd <- bd[[chr]];
+ if(is.null(cbd)) { return(NULL) };
+ if(length(cbd)==0) { return(NULL) };
+ if(dim(cbd)[1]>0) {
+ ctec <- tec[[chr]];
+ if(length(ctec$s)>0) {
+ if(exclude) {
+ pwi <- which(points.within(cbd$x,ctec$s,ctec$e)== -1);
+ } else {
+ pwi <- which(points.within(cbd$x,ctec$s,ctec$e)> -1);
+ }
+ return(cbd[pwi,]);
+ } else {
+ if(exclude) {
+ return(cbd);
+ } else {
+ return(data.frame(x=c(),y=c()));
+ }
+ }
+ } else {
+ return(cbd);
+ }
+ });
+}
+
+
+# PUBLIC
+# generate predictions on sequential (chained) subsamples of data
+# if step.size <1, it is intepreted as a fraciton and a each subsequent subsample
+# is of a size (1-fraction.step)*N (N - size of the signal data);
+# otherwise the step.size is interpreted as a number of tags, and each subsample is of the size N-step.size
+get.subsample.chain.calls <- function(signal.data,control.data,n.steps=NULL,step.size=1e6,subsample.control=F,debug=F,min.ntags=1e3, excluded.steps=c(), test.chromosomes=NULL, ... ) {
+
+ if(!is.null(test.chromosomes)) {
+ # adjust step size
+ sz <- sum(unlist(lapply(signal.data,length)))
+ signal.data <- signal.data[test.chromosomes];
+ control.data <- control.data[test.chromosomes];
+
+ if(step.size>1) {
+ step.size <- step.size*sum(unlist(lapply(signal.data,length)))/sz;
+ # cat("adjusted step.size=",step.size,"\n");
+ }
+ }
+
+ if(is.null(n.steps)) {
+ if(step.size<1) {
+ # down to 10%
+ n.steps <- log(0.1)/log(step.size);
+ } else {
+ n.steps <- floor(sum(unlist(lapply(signal.data,length)))/step.size)
+ }
+ }
+ if(subsample.control && !is.null(control.data)) {
+ # normalize control to the signal size
+ if(debug) { cat("pre-subsampling control.\n"); }
+ bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length)))
+ control.data <- lapply(control.data,function(d) sample(d,length(d)*bg.weight,replace=(bg.weight>1)))
+ }
+ calls <- list();
+ callnames <- c();
+ for(i in 0:n.steps) {
+ if(debug) { cat("chained subsample step",i,":\n"); }
+ if(!i %in% excluded.steps) {
+ ans <- list(find.binding.positions(signal.data=signal.data,control.data=control.data,debug=debug, skip.control.normalization=T, ...));
+ names(ans) <- as.character(c(i));
+ calls <- c(calls,ans);
+ callnames <- c(callnames,i);
+ }
+ # subsample
+ if(step.size<1) {
+ # fraction steps
+ f <- 1-step.size;
+ } else {
+ # bin steps
+ sz <- sum(unlist(lapply(signal.data,length)));
+ f <- (sz-step.size)/sz;
+ if(f<=0) break;
+ }
+ if(debug) { cat("chained subsampling using fraction",f,".\n"); }
+ signal.data <- lapply(signal.data,function(d) sample(d,length(d)*f));
+ if(subsample.control && !is.null(control.data)) {
+ control.data <- lapply(control.data,function(d) sample(d,length(d)*f));
+ }
+ sz <- sum(unlist(lapply(signal.data,length)));
+ if(sz<min.ntags) break;
+ }
+ names(calls) <- callnames;
+ return(calls);
+}
+
+
+# chain-subsample dataset and calculate MSER interpolation
+mser.chain.interpolation <- function(signal.data=NULL,control.data=NULL,chains=NULL,n.chains=5,debug=F, enrichment.background.scales=c(1,5), test.agreement=0.99, agreement.distance=50, return.median=F, mean.trim=0.1, enr.field="enr", return.lists=F, ...) {
+ if(is.null(chains)) {
+ cn <- c(1:n.chains); names(cn) <- cn;
+ tf <- function(i, ...) get.subsample.chain.calls(signal.data,control.data,debug=debug, enrichment.background.scales=enrichment.background.scales, ...);
+ chains <- lapply(cn,tf,...);
+ }
+ names(enrichment.background.scales) <- enrichment.background.scales;
+ lapply(enrichment.background.scales,function(scale) {
+ actual.enr.field <- enr.field;
+ if(scale>1) {
+ actual.enr.field <- paste(actual.enr.field,scale,sep=".");
+ }
+
+ cvl <- lapply(chains,function(chain) {
+ nn <- sort(unlist(lapply(chain,function(d) d$n)),decreasing=T);
+ nd <- diff(nn);
+ nn <- nn[-length(nn)];
+ me <- lapply(c(2:length(chain)),function(i) {
+ sla <- t.precalculate.ref.peak.agreement(chain[[i-1]],chain[i],agreement.distance=agreement.distance,enr.field=actual.enr.field)
+ me <- t.find.min.saturated.enr(sla,thr=1-test.agreement)
+ menr <- max(min(na.omit(unlist(lapply(chain[[i-1]]$npl,function(d) d[actual.enr.field])))),min(na.omit(unlist(lapply(chain[[i]]$npl,function(d) d[actual.enr.field])))),1)
+ if(me<=menr) { me <- 1; };
+ return(me);
+ })
+ data.frame(n=nn,me=unlist(me),nd=nd);
+ });
+ if(return.lists) { return(cvl) }
+ cvl <- na.omit(do.call(rbind,cvl));
+ if(return.median) {
+ tv <- tapply(cvl$me,as.factor(cvl$n),median)
+ } else {
+ tv <- tapply(cvl$me,as.factor(cvl$n),mean,trim=mean.trim);
+ }
+ df <- data.frame(n=as.numeric(names(tv)),me=as.numeric(tv));
+ return(df[order(df$n,decreasing=T),])
+ })
+}
+
+
+
+# returns agreement as a function of dataset size, possibly filtering peaks by min.enr threshold, and by max.fdr
+chain.to.reference.comparison <- function(chains,min.enr=NULL,debug=F,agreement.distance=50, return.median=F, mean.trim=0.1, enr.field="enr",max.fdr=NULL) {
+ cvl <- lapply(chains,function(chain) {
+ # filter chain by fdr
+ if(!is.null(max.fdr)) {
+ chain <- lapply(chain,function(d) { d$npl <- lapply(d$npl,function(cd) cd[cd$fdr<=max.fdr,]); return(d); });
+ }
+ nn <- sort(unlist(lapply(chain,function(d) d$n)),decreasing=T);
+ nn <- nn[-length(nn)];
+ me <- lapply(c(2:length(chain)),function(i) {
+ sla <- t.precalculate.ref.peak.agreement(chain[[1]],chain[i],agreement.distance=agreement.distance,enr.field=enr.field)
+ # calculate overlap
+ x <- lapply(sla,function(mpd) {
+ if(!is.null(min.enr)) {
+
+ me <- mpd$re >= min.enr;
+ me[is.na(me)] <- F;
+ mpd <- mpd[me,];
+ ome <- mpd$oe < min.enr;
+ ome[is.na(ome)] <- T;
+ mpd$ov[ome] <- 0;
+ }
+ return(mean(mpd$ov));
+ })
+ })
+
+ data.frame(n=nn,me=unlist(me));
+ });
+
+ cvl <- na.omit(do.call(rbind,cvl));
+ if(return.median) {
+ tv <- tapply(cvl$me,as.factor(cvl$n),median)
+ } else {
+ tv <- tapply(cvl$me,as.factor(cvl$n),mean,trim=mean.trim);
+ }
+ df <- data.frame(n=as.numeric(names(tv)),me=as.numeric(tv));
+ return(df[order(df$n,decreasing=T),])
+}
+
+
+# estimates enrichment confidence interval based on 2*tag.count.whs window around each position, and a z-score (alpha/2)
+# if(multiple.background.scales=T) the enrichment is also estimated using 5- and 10-fold increased background tag window
+# adds $enr (lower bound), $enr.ub (upper bound) and $enr.mle fields
+calculate.enrichment.estimates <- function(binding.positions,signal.data=NULL,control.data=NULL,fraction=1,tag.count.whs=100,z=2,effective.genome.size=3e9,scale.down.control=F,background.scales=c(1),bg.weight=NULL) {
+ f <- fraction;
+ qv <- pnorm(z,lower.tail=F);
+ cn <- names(binding.positions$npl); names(cn) <- cn;
+
+ if(is.null(control.data)) {
+ # estimate from gamma distribution
+ fg.lambda <- f*sum(unlist(lapply(signal.data,length)))*2*tag.count.whs/effective.genome.size;
+ binding.positions$npl <- lapply(binding.positions$npl,function(d) {
+ d$enr <- qgamma(qv,d$nt,scale=1)/fg.lambda;
+ d$enr.ub <- qgamma(1-qv,d$nt,scale=1)/fg.lambda;
+ d$enr.mle <- d$nt/fg.lambda;
+ return(d);
+ });
+ } else {
+ # estimate using beta distribution
+ if(is.null(bg.weight)) {
+ bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length)))
+ }
+
+ if(scale.down.control) {
+ # sample down control to be the same size as true signal.data (bg.weight*f)
+ control.data <- lapply(control.data,function(d) sample(d,length(d)*bg.weight*f,replace=(f*bg.weight>1)))
+ #bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length)))
+ bg.weight <- 1/f;
+
+ }
+
+ binding.positions$enrichment.bg.weight <- bg.weight;
+ binding.positions$enrichment.whs <- tag.count.whs;
+ binding.positions$enrichment.z <- z;
+
+ binding.positions$npl <- lapply(cn,function(chr) {
+ d <- binding.positions$npl[[chr]];
+
+ edf <- lapply(background.scales,function(background.width.multiplier) {
+ sig.mult <- bg.weight*f/background.width.multiplier;
+ nbg <- points.within(abs(control.data[[chr]]),d$x-tag.count.whs*background.width.multiplier,d$x+tag.count.whs*background.width.multiplier,return.point.counts=T,return.unique=F);
+
+ nfg <- d$nt;
+
+
+ # Poisson ratio Bayesian LB with non-informative prior (Clopper & Pearson 1934)
+ nf <- ((nfg+0.5)/(nbg+0.5))*qf(1-qv,2*(nfg+0.5),2*(nbg+0.5),lower.tail=F)
+ nf <- nf/sig.mult;
+
+ ub <- ((nfg+0.5)/(nbg+0.5))*qf(qv,2*(nfg+0.5),2*(nbg+0.5),lower.tail=F)
+ ub <- ub/sig.mult;
+
+ mle <- (nfg+0.5)/(nbg+0.5);
+ mle <- mle/sig.mult;
+ if(is.null(nbg)) { nbg <- numeric(0) }
+ if(is.null(nf)) { nf <- numeric(0) }
+ if(is.null(ub)) { ub <- numeric(0) }
+ if(is.null(mle)) { mle <- numeric(0) }
+ return(data.frame(nbg=nbg,lb=nf,ub=ub,mle=mle))
+ })
+
+ adf <- do.call(cbind,lapply(c(1:length(background.scales)),function(i) {
+ df <- edf[[i]];
+ cn <- c("nbgt","enr","enr.ub","enr.mle");
+ if(background.scales[i]>1) {
+ cn <- paste(cn,as.character(background.scales[i]),sep=".");
+ }
+ names(df) <- cn;
+ return(df);
+ }))
+
+ return(cbind(d,adf));
+ });
+ }
+
+ return(binding.positions);
+}
+
+
+# precalculate peak agreement of a sampling list given a reference
+t.precalculate.ref.peak.agreement <- function(ref,sf,agreement.distance=50,enr.field="enr") {
+ ref <- ref$npl;
+ cn <- names(ref); names(cn) <- cn;
+
+ # for each sampling round
+ lapply(sf,function(sd) {
+ # calculate overlap
+
+ ov <- data.frame(do.call(rbind,lapply(cn,function(chr) {
+ if(dim(ref[[chr]])[1]<1) { return(cbind(ov=c(),re=c(),oe=c())) };
+ pwi <- points.within(ref[[chr]]$x,sd$npl[[chr]]$x-agreement.distance,sd$npl[[chr]]$x+agreement.distance);
+ pwi[pwi==-1] <- NA;
+ renr <- ref[[chr]][,enr.field]
+ oenr <- sd$npl[[chr]][,enr.field][pwi];
+ if(length(oenr)==0) { oenr <- rep(NA,length(renr)); }
+ return(cbind(ov=as.integer(!is.na(pwi)),re=renr,oe=oenr));
+ })))
+ })
+}
+
+
+# find minimal saturated enrichment given a list of replicate agreement matrices (for one fraction)
+t.find.min.saturated.enr <- function(pal,thr=0.01,plot=F,return.number.of.peaks=F,plot.individual=T,return.median=F,return.vector=F) {
+ nr <- length(pal);
+ # merge replicate data frames
+ mpd <- data.frame(do.call(rbind,pal));
+
+ mpd$re[is.na(mpd$re)] <- Inf;
+ mpd$oe[is.na(mpd$oe)] <- Inf;
+
+
+
+ # round up values to avoid miscounting
+ mpd$re <- round(mpd$re,digits=2);
+ mpd$oe <- round(mpd$oe,digits=2);
+
+ me <- pmin(mpd$re,mpd$oe);
+ ome <- order(me,decreasing=T);
+ df <- data.frame(me=me[ome],ov=mpd$ov[ome]);
+ recdf <- ecdf(-mpd$re); ren <- length(mpd$re);
+
+ # collapse equal peak heights
+ xk <- tapply(df$ov,as.factor(df$me),sum); xk <- data.frame(ov=as.numeric(xk),me=as.numeric(names(xk))); xk <- xk[order(xk$me,decreasing=T),];
+
+
+ cso <- cumsum(xk$ov)/(recdf(-xk$me)*ren);
+ cso[is.na(cso)] <- 0;
+ cso[!is.finite(cso)] <- 0;
+ mv <- max(which(cso >= 1-thr))
+ menr <- xk$me[mv];
+
+ ir <- lapply(pal,function(d) {
+ d$re[is.na(d$re)] <- Inf;
+ d$oe[is.na(d$oe)] <- Inf;
+
+ me <- pmin(d$re,d$oe);
+ ome <- order(me,decreasing=T);
+ df <- data.frame(me=me[ome],ov=d$ov[ome]);
+ cso <- cumsum(df$ov)/c(1:length(df$ov));
+ mv <- max(which(cso >= 1-thr))
+ menr <- df$me[mv];
+ return(list(df=df,menr=menr));
+ });
+
+ if(plot) {
+ par(mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
+ plot(df$me,cumsum(df$ov)/c(1:length(df$ov)),type='l',ylab="fraction of positions overlapping with reference",xlab="minimal enrichment of binding positions",xlim=c(min(df$me),2*menr));
+ abline(h=1-thr,lty=2,col=4)
+ if(plot.individual) {
+ lapply(ir,function(d) {
+ df <- d$df;
+ lines(df$me,cumsum(df$ov)/c(1:length(df$ov)),col=8);
+ abline(v=menr,col="pink",lty=3)
+ });
+ lines(df$me,cumsum(df$ov)/c(1:length(df$ov)),col=1);
+ }
+ abline(v=menr,col=2,lty=2)
+ legend(x="bottomright",lty=c(1,2,1,3,2),col=c(1,2,8,"pink",4),legend=c("combined samples","combined sample MSER","individual samples","individual MSERs","consistency threshold"));
+ }
+
+ if(return.number.of.peaks) {
+ mpd <- data.frame(do.call(rbind,pal));
+ return(length(which(!is.na(mpd$re) & mpd$re >=menr))/nr);
+ } else {
+ if(return.vector) {
+ return(unlist(lapply(ir,function(d) d$menr)));
+ }
+ if(return.median) {
+ return(median(unlist(lapply(ir,function(d) d$menr))));
+ } else {
+ return(menr);
+ }
+ }
+}
+
+
+
+# determine d1/d2 dataset size ratio. If background.density.scaling=F, the ratio of tag counts is returned.
+# if background.density.scaling=T, regions of significant tag enrichment are masked prior to ratio calculation.
+dataset.density.ratio <- function(d1,d2,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T,return.proportion=F) {
+ if(!background.density.scaling) {
+ return(sum(unlist(lapply(d1,length)))/sum(unlist(lapply(d2,length))))
+ }
+
+ chrl <- intersect(names(d1),names(d2));
+ ntc <- do.call(rbind,lapply(chrl,function(chr) {
+ x1 <- tag.enrichment.clusters(abs(d1[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+ x2 <- tag.enrichment.clusters(abs(d2[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+ return(c(length(which(points.within(abs(d1[[chr]]),c(x1$s,x2$s)-wsize/2,c(x1$e,x2$e)+wsize/2)==-1)),length(which(points.within(abs(d2[[chr]]),c(x1$s,x2$s)-wsize/2,c(x1$e,x2$e)+wsize/2)==-1))))
+ }))
+ ntcs <- apply(ntc,2,sum);
+ #print(ntcs/c(sum(unlist(lapply(d1,length))),sum(unlist(lapply(d2,length)))));
+ return(ntcs[1]/ntcs[2])
+}
+
+# returns effective size of the dataset based on the same logic as dataset.density.ratio
+dataset.density.size <- function(d1,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) {
+ if(!background.density.scaling) {
+ return(sum(unlist(lapply(d1,length))))
+ }
+
+ chrl <- names(d1);
+ ntc <- lapply(chrl,function(chr) {
+ x1 <- tag.enrichment.clusters(abs(d1[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+ return(length(which(points.within(abs(d1[[chr]]),x1$s-wsize/2,x1$e+wsize/2)==-1)))
+ })
+ return(sum(unlist(ntc)))
+}
+
+old.dataset.density.ratio <- function(d1,d2,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) {
+ if(!background.density.scaling) {
+ return(sum(unlist(lapply(d1,length)))/sum(unlist(lapply(d2,length))))
+ }
+
+ t.chromosome.counts <- function(tl) {
+ lapply(tl,function(d) {
+ x <- tag.enrichment.clusters(abs(d),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+ x$s <- x$s-wsize/2; x$e <- x$e+wsize/2;
+ x <- regionset.intersection.c(list(x),do.union=T)
+ return(c(n=length(which(points.within(abs(d),x$s,x$e)==-1)),s=diff(range(abs(d))),m=sum(x$e-x$s)));
+ })
+ }
+
+ l1 <- t.chromosome.counts(d1);
+ l2 <- t.chromosome.counts(d2);
+
+ l2 <- data.frame(do.call(rbind,l2[names(l1)]));
+ l1 <- data.frame(do.call(rbind,l1));
+
+ # genome size
+ gs <- sum(pmax(l1$s,l2$s))
+
+ den1 <- sum(l1$n)/(gs-sum(l1$m))
+ den2 <- sum(l2$n)/(gs-sum(l2$m))
+ return(den1/den2);
+}
+
+
+
+
+# calculate cumulative density based on sum of scaled gaussian curves
+# (by Michael Tolstorukov)
+#
+# vin - input vector; bw -- standard deviation, dw-gaussina cutoff in stdev; dout - output "density")
+# output - if return.x=F vector of cumulative density values corresponding to integer positions described by range(vin)
+# output - if return.x=T a data structure with $x and $y corresponding to the cumulative density
+# optional match.wt.f is a function that will return weights for a tag vector
+densum <- function(vin,bw=5,dw=3,match.wt.f=NULL,return.x=T,from=min(vin),to=max(vin),step=1,new.code=T) {
+ # construct vector of unique tags and their counts
+ tc <- table(vin[vin>=from & vin<=to]);
+ pos <- as.numeric(names(tc)); storage.mode(pos) <- "double";
+ tc <- as.numeric(tc); storage.mode(tc) <- "double";
+ n <- length(pos)
+ # weight counts
+ if(!is.null(match.wt.f)) {
+ tc <- tc*match.wt.f(pos);
+ }
+
+ rng <- c(from,to);
+ if(rng[1]<0) { stop("range extends into negative values") }
+ if(range(pos)[1]<0) { stop("position vector contains negative values") }
+
+ storage.mode(n) <- storage.mode(rng) <- storage.mode(bw) <- storage.mode(dw) <- storage.mode(step) <- "integer";
+
+ spos <- rng[1]; storage.mode(spos) <- "double";
+
+ dlength <- floor((rng[2] - rng[1])/step) + 1; # length of output array
+ if(dlength<1) { stop("zero data range") }
+ if(new.code) {
+ storage.mode(step) <- storage.mode(dlength) <- storage.mode(bw) <- storage.mode(dw) <-"integer";
+ dout <- .Call("ccdensum",pos,tc,spos,bw,dw,dlength,step);
+ } else {
+ dout <- numeric(dlength); storage.mode(dout) <- "double";
+ storage.mode(dlength) <- "integer";
+ .C("cdensum",n,pos,tc,spos,bw,dw,dlength,step,dout,DUP=F);
+ }
+
+
+ if(return.x) {
+ return(list(x=c(rng[1],rng[1]+step*(dlength-1)),y=dout,step=step))
+ } else {
+ return(dout)
+ }
+}
+
+# count tags within sliding window of a specified size
+# vin - tag vector (postive values, pre-shifted)
+# window.size/window.step - window characteristics
+# tv - optional, pre-sorted, pre-trimmed tag vector
+window.tag.count <- function(vin,window.size,window.step=1,return.x=T,from=min(vin)+floor(window.size/2),to=max(vin)-floor(window.size/2),tv=NULL) {
+ whs <- floor(window.size/2);
+ # select tags with margins
+ if(is.null(tv)) {
+ tv <- sort(vin[vin>=from-whs-1 & vin<=to+whs+1])
+ }
+ storage.mode(tv) <- "double";
+ n <- length(tv)
+ nsteps <- ceiling((to-from)/window.step);
+
+ storage.mode(n) <- storage.mode(nsteps) <- storage.mode(window.size) <- storage.mode(window.step) <- "integer";
+
+ spos <- from; storage.mode(spos) <- "double";
+
+ if(nsteps<1) { stop("zero data range") }
+ #dout <- integer(nsteps); storage.mode(dout) <- "integer";
+ #.C("window_n_tags",n,tv,spos,window.size,window.step,nsteps,dout,DUP=F);
+ dout <- .Call("cwindow_n_tags",tv,spos,window.size,window.step,nsteps);
+
+ if(return.x) {
+ return(list(x=c(from,from+(nsteps-1)*window.step),y=dout,step=window.step))
+ } else {
+ return(dout)
+ }
+}
+
+# count tags in windows around specified positions (pos)
+window.tag.count.around <- function(vin,window.size,pos,return.x=T,tc=NULL,sorted=F) {
+ if(is.null(tc)) {
+ tc <- table(vin);
+ }
+ if(!sorted) {
+ op <- rank(pos);
+ pos <- sort(pos);
+ }
+ storage.mode(pos) <- "double";
+ tpos <- as.integer(names(tc)); storage.mode(tpos) <- "double";
+ tc <- as.integer(tc); storage.mode(tc) <- "integer";
+
+ whs <- floor(window.size/2);
+
+ storage.mode(whs) <- "integer";
+ twc <- .Call("cwindow_n_tags_around",tpos,tc,pos,whs);
+ if(return.x) {
+ if(sorted) {
+ return(data.frame(x=pos,y=twc));
+ } else {
+ return(data.frame(x=pos[op],y=twc[op]));
+ }
+ } else {
+ if(sorted) {
+ return(twc);
+ } else {
+ return(twc[op]);
+ }
+ }
+}
+
+# given a tag vector (signed), identify and clean up (either remove or cap) singular positions that exceed local tag density
+# vin - tag vector
+# cap.fold - maximal fold over enrichment over local density allowed for a single tag position, at which the tag count is capped
+# eliminate.fold - max fold enrichment that, when exceeded, results in exclusion of all the tags at that position (e.g. counted as anomaly)
+# z.threshold - Z-score used to determine max allowed counts
+filter.singular.positions.by.local.density <- function(tags,window.size=200,cap.fold=4,eliminate.fold=10,z.threshold=3) {
+ # tabulate tag positions
+ if(length(tags)<2) { return(tags); };
+
+ tc <- table(tags);
+ pos <- as.numeric(names(tc)); storage.mode(pos) <- "double";
+ tc <- as.integer(tc); storage.mode(tc) <- "integer";
+ n <- length(pos);
+
+ whs <- floor(window.size/2);
+
+ storage.mode(n) <- storage.mode(whs) <- "integer";
+ twc <- .Call("cwindow_n_tags_around",pos,tc,pos,whs);
+ twc <- (twc-tc+1)/window.size; # local density
+
+ pv <- pnorm(z.threshold,lower.tail=F)
+ # exclude
+ max.counts <- qpois(pv,twc*eliminate.fold,lower.tail=F)
+ tc[tc>max.counts] <- 0;
+ # cap
+ max.counts <- qpois(pv,twc*cap.fold,lower.tail=F)
+ ivi <- which(tc>max.counts);
+ tc[ivi] <- max.counts[ivi]+1;
+
+ # reconstruct tag vector
+ tv <- rep(pos,tc);
+ to <- order(abs(tv)); tv <- tv[to];
+ return(tv);
+}
+
+# calculates enrichment bounds using multiple background scales
+# ft - foreground tags (pre-shifted, positive)
+# bt - background tags
+# fws - foreground window size
+# bwsl - background window size list
+# step - window step
+# rng - from/to coordinates (to will be adjusted according to step)
+#
+# returns: a list with $x ($s $e $step), $lb vector and $mle vector ($ub if calculate.upper.bound=T)
+mbs.enrichment.bounds <- function(ft,bt,fws,bwsl,step=1,rng=NULL,alpha=0.05,calculate.upper.bound=F,bg.weight=length(ft)/length(bt),use.most.informative.scale=F,quick.calculation=F,pos=NULL) {
+ # determine range
+ if(is.null(rng)) {
+ rng <- range(range(ft));
+ }
+ # foreground counts
+ if(is.null(pos)) {
+ fwc <- window.tag.count(ft,fws,window.step=step,from=rng[1],to=rng[2],return.x=T);
+ } else {
+ fwc <- window.tag.count.around(ft,fws,pos,return.x=T)
+ }
+ fwc$y <- fwc$y+0.5;
+
+ zal <- qnorm(alpha/2,lower.tail=F);
+
+ # background counts
+ bt <- sort(bt);
+ if(!is.null(pos)) {
+ tc <- table(bt);
+ }
+ bgcm <- lapply(bwsl,function(bgws) {
+ if(is.null(pos)) {
+ window.tag.count(bt,bgws,window.step=step,from=rng[1],to=rng[2],return.x=F,tv=bt)+0.5;
+ } else {
+ window.tag.count.around(bt,bgws,pos,return.x=F,tc=tc)+0.5
+ }
+ })
+ if(!is.null(pos)) {
+ rm(tc);
+ }
+
+ # pick most informative scale
+ if(use.most.informative.scale) {
+ bgcm <- t(do.call(cbind,bgcm))
+ isi <- max.col(t((bgcm)/(bwsl/fws))) # add pseudo-counts to select lowest scale in case of a tie
+
+ bgc <- c(bgcm)[isi+dim(bgcm)[1]*(c(1:length(isi))-1)]
+
+ if(quick.calculation) {
+ rte <- fwc$y+bgc-0.25*zal*zal; rte[rte<0] <- 0;
+ dn <- bgc - 0.25*zal*zal;
+ lbm=(sqrt(fwc$y*bgc) - 0.5*zal*sqrt(rte))/dn;
+ ivi <- which(lbm<0);
+ lbm <- lbm*lbm*bwsl[isi]/fws/bg.weight;
+ lbm[rte<=0] <- 1;
+ lbm[dn<=0] <- 1;
+ lbm[ivi] <- 1;
+ } else {
+ lbm <- (fwc$y/bgc)*qf(1-alpha/2,2*fwc$y,2*bgc,lower.tail=F)*bwsl[isi]/fws/bg.weight;
+ }
+
+ mle <- fwc$y/bgc*bwsl[isi]/fws/bg.weight; mle[is.nan(mle)] <- Inf; mle[is.na(mle)] <- Inf;
+
+ rl <- list(x=list(s=fwc$x[1],e=fwc$x[2],step=fwc$step),lb=lbm,mle=mle);
+
+ if(calculate.upper.bound) {
+ isi <- max.col(t((-bgcm)/(bwsl/fws))) # add pseudo-counts to select highest scale in case of a tie
+ bgc <- c(bgcm)[isi+dim(bgcm)[1]*(c(1:length(isi))-1)]
+
+ if(quick.calculation) {
+ ubm=(sqrt(fwc$y*bgc) + 0.5*zal*sqrt(rte))/dn;
+ ivi <- which(ubm<0);
+ ubm <- ubm*ubm*bwsl[isi]/fws/bg.weight;
+ ubm[rte<=0] <- 1;
+ ubm[ivi] <- 1;
+ lbm[dn<=0] <- 1;
+ } else {
+ ubm <- (fwc$y/bgc)*qf(alpha/2,2*fwc$y,2*bgc,lower.tail=F)*bwsl[isi]/fws/bg.weight;
+ }
+ rl <- c(rl,list(ub=ubm));
+ }
+ return(rl);
+
+ } else {
+ # determine lower bounds
+ lbm <- lapply(c(1:length(bgcm)),function(i) {
+ nbg <- bgcm[[i]];
+ if(quick.calculation) {
+ rte <- fwc$y+nbg-0.25*zal*zal; rte[rte<0] <- 0;
+ dn <- (nbg - 0.25*zal*zal);
+ lbm=(sqrt(fwc$y*nbg) - 0.5*zal*sqrt(rte))/dn;
+ ivi <- which(lbm<0);
+ lbm <- lbm*lbm*bwsl[i]/fws/bg.weight;
+ lbm[rte<=0] <- 1;
+ lbm[dn<=0] <- 1;
+ lbm[ivi] <- 1;
+ return(lbm);
+ } else {
+ return((fwc$y/nbg)*qf(1-alpha/2,2*fwc$y,2*nbg,lower.tail=F)*bwsl[i]/fws/bg.weight);
+ }
+ })
+ lbm <- do.call(pmin,lbm);
+
+ # calculate mle
+ #mle <- do.call(pmin,lapply(bgcm,function(bgc) fwc/bgc))
+ mle <- do.call(pmin,lapply(c(1:length(bgcm)),function(i) {
+ bgc <- bgcm[[i]];
+ x <- fwc$y/bgc*bwsl[i]/fws/bg.weight; x[is.nan(x)] <- Inf; x[is.na(x)] <- Inf; return(x);
+ }))
+
+ rl <- list(x=list(s=fwc$x[1],e=fwc$x[2],step=fwc$step),lb=lbm,mle=mle);
+
+ if(calculate.upper.bound) {
+ # determine upper bound
+ ubm <- lapply(c(1:length(bgcm)),function(i) {
+ nbg <- bgcm[[i]];
+ if(quick.calculation) {
+ rte <- fwc$y+nbg-0.25*zal*zal; rte[rte<0] <- 0;
+ dn <- (nbg - 0.25*zal*zal);
+ ubm=(sqrt(fwc$y*nbg) + 0.5*zal*sqrt(rte))/dn;
+ ivi <- which(ubm<0);
+ ubm <- ubm*ubm*bwsl[i]/fws/bg.weight;
+ ubm[rte<=0] <- 1;
+ ubm[dn<=0] <- 1;
+ ubm[ivi] <- 1;
+ return(ubm);
+ } else {
+ return((fwc$y/nbg)*qf(alpha/2,2*fwc$y,2*nbg,lower.tail=F)*bwsl[i]/fws/bg.weight);
+ }
+ })
+ ubm <- do.call(pmax,ubm);
+ rl <- c(rl,list(ub=ubm));
+ }
+
+ return(rl);
+ }
+}
+
+# calculates binomail proportion ratio bounds
+# returns: a list with $x, $lb vector and $mle, $ub vector
+binomial.proportion.ratio.bounds <- function(ft1,bt1,ft2,bt2,fws,bws,step=1,rng=NULL,alpha=0.05,bg.weight1=length(ft1)/length(bt1),bg.weight2=length(ft2)/length(bt2),pos=NULL,a=1.25,b=2.50) {
+ # determine range
+ if(is.null(rng)) {
+ rng <- range(range(ft1));
+ }
+ # counts
+ if(is.null(pos)) {
+ fwc1 <- window.tag.count(ft1,fws,window.step=step,from=rng[1],to=rng[2],return.x=T);
+ fwc2 <- window.tag.count(ft2,fws,window.step=step,from=rng[1],to=rng[2],return.x=T);
+ bwc1 <- window.tag.count(bt1,fws,window.step=step,from=rng[1],to=rng[2],return.x=T);
+ bwc2 <- window.tag.count(bt2,fws,window.step=step,from=rng[1],to=rng[2],return.x=T);
+ pos <- seq(fwc1$x[1],fwc1$x[2],by=fwc1$step)
+ } else {
+ fwc1 <- window.tag.count.around(ft1,bws,pos,return.x=T)
+ fwc2 <- window.tag.count.around(ft2,bws,pos,return.x=T)
+ bwc1 <- window.tag.count.around(bt1,bws,pos,return.x=T)
+ bwc2 <- window.tag.count.around(bt2,bws,pos,return.x=T)
+ }
+
+ bg.weight1 <- bg.weight1*fws/bws; bg.weight2 <- bg.weight2*fws/bws;
+ ls1 <- log2(1/(1+1/bg.weight1)); ls2 <- log2(1/(1+1/bg.weight2));
+ ltheta <- log2((fwc1$y+a-1)/(fwc1$y+bwc1$y+a+b-2)) - log2((fwc2$y+a-1)/(fwc2$y+bwc2$y+a+b-2)) - ls1 + ls2;
+
+ vltheta <- 1/((fwc1$y+a-1) + (fwc1$y+a-1)^2/(bwc1$y+b-1)) + 1/((fwc2$y+a-1) + (fwc2$y+a-1)^2/(bwc2$y+b-1))
+
+ zal <- qnorm(alpha/2,lower.tail=F);
+
+ rl <- list(x=pos,mle=ltheta,lb=ltheta-zal*sqrt(vltheta),ub=ltheta+zal*sqrt(vltheta));
+}
+
+write.probe.wig <- function(chr,pos,val,fname,append=F,feature="M",probe.length=35,header=T) {
+ min.dist <- min(diff(pos));
+ if(probe.length>=min.dist) {
+ probe.length <- min.dist-1;
+ cat("warning: adjusted down wig segment length to",probe.length,"\n");
+ }
+ mdat <- data.frame(chr,as.integer(pos),as.integer(pos+probe.length),val)
+
+ if(header) {
+ write(paste("track type=wiggle_0 name=\"Bed Format\" description=\"",feature,"\" visibility=dense color=200,100,0 altColor=0,100,200 priority=20",sep=""),file=fname,append=append)
+ write.table(mdat,file=fname,col.names=F,row.names=F,quote=F,sep=" ",append=T);
+ } else {
+ write.table(mdat,file=fname,col.names=F,row.names=F,quote=F,sep=" ",append=append);
+ }
+
+}
+
+# returns intersection of multiple region sets
+# each regionset needs to contain $s, $e and optional $v column
+regionset.intersection.c <- function(rsl,max.val=-1,do.union=F) {
+ # translate into position/flag form
+ rfl <- lapply(rsl,function(rs) {
+ rp <- c(rs$s,rs$e); rf <- c(rep(c(1,-1),each=length(rs$s)));
+
+ ro <- order(rp);
+ rp <- rp[ro]; rf <- rf[ro];
+ if(!is.null(rs$v)) {
+ rv <- c(rs$v,rs$v)[ro];
+ return(data.frame(p=as.numeric(rp),f=as.integer(rf),v=as.numeric(rv)));
+ } else {
+ return(data.frame(p=as.numeric(rp),f=as.integer(rf)));
+ }
+ })
+ rfd <- data.frame(do.call(rbind,lapply(1:length(rfl),function(i) {
+ d <- rfl[[i]]; d$f <- d$f*i; return(d);
+ })))
+ rfd <- rfd[order(rfd$p),];
+ if(is.null(rfd$v)) { max.val <- 0; }
+ if(do.union) { ur <- 1; } else { ur <- 0; };
+ rl <- .Call("region_intersection",as.integer(length(rfl)),as.numeric(rfd$p),as.integer(rfd$f),as.numeric(rfd$v),as.integer(max.val),as.integer(ur));
+ return(data.frame(do.call(cbind,rl)));
+}
+
+
+# idenfity if binding peak falls within a larger region of significant tag enrichment, and if so record its booundaries
+add.broad.peak.regions <- function(chip.tags,input.tags,bp,window.size=500,z.thr=2) {
+ se <- find.significantly.enriched.regions(chip.tags,input.tags,window.size=window.size,z.thr=z.thr,poisson.z=0,poisson.ratio=0,either=F)
+ chrl <- names(bp$npl); names(chrl) <- chrl;
+ bnpl <- lapply(chrl,function(chr) {
+ npl <- bp$npl[[chr]];
+ if(is.null(npl) | dim(npl)[1]<1) {
+ return(npl);
+ }
+ pi <- points.within(npl$x,se[[chr]]$s,se[[chr]]$e,return.list=T);
+
+ pm <- do.call(rbind,lapply(pi,function(rl) {
+ if(length(rl)>0) {
+ return(range(c(se[[chr]]$s[rl],se[[chr]]$e[rl])))
+ } else {
+ return(c(NA,NA));
+ }
+ }))
+
+ npl$rs <- pm[,1];
+ npl$re <- pm[,2];
+ return(npl);
+ })
+ bp$npl <- bnpl;
+ return(bp);
+}
+
+# writing out binding results in a narrowpeak format, incorporating broad region boundaries if they are present
+# if broad region info is not present, margin is used to determine region width. The default margin is equal
+# to the window half size used to call the binding peaks
+write.narrowpeak.binding <- function(bd,fname,margin=bd$whs,npeaks=NA) {
+ if(is.null(margin)) { margin <- 50; }
+ chrl <- names(bd$npl); names(chrl) <- chrl;
+ md <- do.call(rbind,lapply(chrl,function(chr) {
+ df <- bd$npl[[chr]];
+ x <- df$x;
+ rs <- df$rs; if(is.null(rs)) { rs <- rep(NA,length(x)) }
+ re <- df$re; if(is.null(re)) { re <- rep(NA,length(x)) }
+ ivi <- which(is.na(rs)); if(any(ivi)) {rs[ivi] <- x[ivi]-margin;}
+ ivi <- which(is.na(re)); if(any(ivi)) {re[ivi] <- x[ivi]+margin;}
+ cbind(chr,rs,re,".","0",".",df$y,-1,-log10(df$fdr),x-rs) # Anshul: converted fdr to -log10
+ }))
+ md <- md[order(as.numeric(md[,7]),decreasing=T),]
+ if (!is.na(npeaks)) { # Anshul: added this option to print a limited number of peaks
+ npeaks <- min(nrow(md),npeaks)
+ md <- md[1:npeaks,]
+ }
+ write.table(md,file=fname,col.names=F,row.names=F,quote=F,sep="\t",append=F);
+}
+
+
+get.broad.enrichment.clusters <- function(signal.data,control.data,window.size=1e3,z.thr=3, tag.shift=146/2,background.density.scaling=F, ... ) {
+ # find significantly enriched clusters
+ bg.weight <- dataset.density.ratio(signal.data,control.data,background.density.scaling=background.density.scaling);
+ se <- find.significantly.enriched.regions(signal.data,control.data,window.size=window.size,z.thr=z.thr,tag.shift=tag.shift, bg.weight=bg.weight, ...)
+ chrl <- names(se); names(chrl) <- chrl;
+ se <- lapply(chrl,function(chr) {
+ d <- se[[chr]];
+ if(length(d$s>1)) {
+ d <- regionset.intersection.c(list(d,d),do.union=T);
+ sc <- points.within(abs(signal.data[[chr]]+tag.shift),d$s,d$e,return.point.counts=T);
+ cc <- points.within(abs(control.data[[chr]]+tag.shift),d$s,d$e,return.point.counts=T);
+ d$rv <- log2((sc+1)/(cc+1)/bg.weight);
+ return(d);
+ } else {
+ return(d)
+ }
+ })
+}
+
+write.broadpeak.info <- function(bp,fname) {
+ chrl <- names(bp); names(chrl) <- chrl;
+ chrl <- chrl[unlist(lapply(bp,function(d) length(d$s)))>0]
+ md <- do.call(rbind,lapply(chrl,function(chr) {
+ df <- bp[[chr]];
+ cbind(chr,df$s,df$e,".","0",".",df$rv,-1,-1)
+ }))
+ md <- md[order(as.numeric(md[,7]),decreasing=T),]
+ write.table(md,file=fname,col.names=F,row.names=F,quote=F,sep="\t",append=F);
+}
+
+
+get.clusters2 <- function(x,CL) {
+ temp <- which(diff(x) != 0)
+ begin <- c(1, temp + 1)
+ end <- c(temp, length(x))
+ size <- end - begin + 1
+
+ begin <- begin[size >= CL]
+ end <- end[size >= CL]
+ size <- size[size >= CL]
+
+ size <- size[x[end] != 0]
+ begin <- begin[x[end] != 0]
+ end <- end[x[end] != 0]
+
+ return (list(size=size,begin=begin,end=end))
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0d7428f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
+# ChIP-seq processing pipeline
+An R package for anlaysis of ChIP-seq and other functional sequencing data.
+Please see [package homepage](http://compbio.med.harvard.edu/Supplements/ChIP-seq/) for details.
+
+## Requirements
+A unix-flavored OS with R and [Boost C++](http://www.boost.org/) installed.
+
+## Installation
+You can use modtools to install SPP:
+```
+require(devtools)
+devtools::install_github('hms-dbmi/spp', build_vignettes = FALSE)
+```
+Alternatively, download a .tar.gz containing the [latest release](https://github.com/hms-dbmi/spp/releases) and use the standard R installation command, e.g.:
+```
+R CMD INSTALL spp_1.13.tar.gz
+```
+
+Note: if Boost libraries are installed in non-standard location, please specify the location in a BOOST_ROOT environment variable prior to running the installation.
+```
+export BOOST_ROOT=/path/boost_1_58_0/
+```
diff --git a/aclocal.m4 b/aclocal.m4
new file mode 100644
index 0000000..27d063f
--- /dev/null
+++ b/aclocal.m4
@@ -0,0 +1,1560 @@
+# boost.m4: Locate Boost headers and libraries for autoconf-based projects.
+# Copyright (C) 2007-2011, 2014 Benoit Sigoure <tsuna at lrde.epita.fr>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Additional permission under section 7 of the GNU General Public
+# License, version 3 ("GPLv3"):
+#
+# If you convey this file as part of a work that contains a
+# configuration script generated by Autoconf, you may do so under
+# terms of your choice.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+m4_define([_BOOST_SERIAL], [m4_translit([
+# serial 25
+], [#
+], [])])
+
+# Original sources can be found at http://github.com/tsuna/boost.m4
+# You can fetch the latest version of the script by doing:
+# wget http://github.com/tsuna/boost.m4/raw/master/build-aux/boost.m4
+
+# ------ #
+# README #
+# ------ #
+
+# This file provides several macros to use the various Boost libraries.
+# The first macro is BOOST_REQUIRE. It will simply check if it's possible to
+# find the Boost headers of a given (optional) minimum version and it will
+# define BOOST_CPPFLAGS accordingly. It will add an option --with-boost to
+# your configure so that users can specify non standard locations.
+# If the user's environment contains BOOST_ROOT and --with-boost was not
+# specified, --with-boost=$BOOST_ROOT is implicitly used.
+# For more README and documentation, go to http://github.com/tsuna/boost.m4
+# Note: THESE MACROS ASSUME THAT YOU USE LIBTOOL. If you don't, don't worry,
+# simply read the README, it will show you what to do step by step.
+
+m4_pattern_forbid([^_?(BOOST|Boost)_])
+
+
+# _BOOST_SED_CPP(SED-PROGRAM, PROGRAM,
+# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# --------------------------------------------------------
+# Same as AC_EGREP_CPP, but leave the result in conftest.i.
+#
+# SED-PROGRAM is *not* overquoted, as in AC_EGREP_CPP. It is expanded
+# in double-quotes, so escape your double quotes.
+#
+# It could be useful to turn this into a macro which extracts the
+# value of any macro.
+m4_define([_BOOST_SED_CPP],
+[AC_LANG_PUSH([C++])dnl
+AC_LANG_PREPROC_REQUIRE()dnl
+AC_REQUIRE([AC_PROG_SED])dnl
+AC_LANG_CONFTEST([AC_LANG_SOURCE([[$2]])])
+AS_IF([dnl eval is necessary to expand ac_cpp.
+dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell.
+dnl Beware of Windows end-of-lines, for instance if we are running
+dnl some Windows programs under Wine. In that case, boost/version.hpp
+dnl is certainly using "\r\n", but the regular Unix shell will only
+dnl strip `\n' with backquotes, not the `\r'. This results in
+dnl boost_cv_lib_version='1_37\r' for instance, which breaks
+dnl everything else.
+dnl Cannot use 'dnl' after [$4] because a trailing dnl may break AC_CACHE_CHECK
+dnl
+dnl Beware that GCC 5, when expanding macros, may embed # line directives
+dnl a within single line:
+dnl
+dnl # 1 "conftest.cc"
+dnl # 1 "<built-in>"
+dnl # 1 "<command-line>"
+dnl # 1 "conftest.cc"
+dnl # 1 "/opt/local/include/boost/version.hpp" 1 3
+dnl # 2 "conftest.cc" 2
+dnl boost-lib-version =
+dnl # 2 "conftest.cc" 3
+dnl "1_56"
+dnl
+dnl So get rid of the # lines, and glue the remaining ones together.
+(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD |
+ grep -v '#' |
+ tr -d '\r' |
+ tr -s '\n' ' ' |
+ $SED -n -e "$1" >conftest.i 2>&1],
+ [$3],
+ [$4])
+rm -rf conftest*
+AC_LANG_POP([C++])dnl
+])# _BOOST_SED_CPP
+
+
+
+# BOOST_REQUIRE([VERSION], [ACTION-IF-NOT-FOUND])
+# -----------------------------------------------
+# Look for Boost. If version is given, it must either be a literal of the form
+# "X.Y.Z" where X, Y and Z are integers (the ".Z" part being optional) or a
+# variable "$var".
+# Defines the value BOOST_CPPFLAGS. This macro only checks for headers with
+# the required version, it does not check for any of the Boost libraries.
+# On # success, defines HAVE_BOOST. On failure, calls the optional
+# ACTION-IF-NOT-FOUND action if one was supplied.
+# Otherwise aborts with an error message.
+AC_DEFUN_ONCE([BOOST_REQUIRE],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_PROG_GREP])dnl
+echo "$as_me: this is boost.m4[]_BOOST_SERIAL" >&AS_MESSAGE_LOG_FD
+boost_save_IFS=$IFS
+boost_version_req=$1
+IFS=.
+set x $boost_version_req 0 0 0
+IFS=$boost_save_IFS
+shift
+boost_version_req=`expr "$[1]" '*' 100000 + "$[2]" '*' 100 + "$[3]"`
+boost_version_req_string=$[1].$[2].$[3]
+AC_ARG_WITH([boost],
+ [AS_HELP_STRING([--with-boost=DIR],
+ [prefix of Boost $1 @<:@guess@:>@])])dnl
+AC_ARG_VAR([BOOST_ROOT],[Location of Boost installation])dnl
+# If BOOST_ROOT is set and the user has not provided a value to
+# --with-boost, then treat BOOST_ROOT as if it the user supplied it.
+if test x"$BOOST_ROOT" != x; then
+ if test x"$with_boost" = x; then
+ AC_MSG_NOTICE([Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT])
+ with_boost=$BOOST_ROOT
+ else
+ AC_MSG_NOTICE([Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost])
+ fi
+fi
+AC_SUBST([DISTCHECK_CONFIGURE_FLAGS],
+ ["$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"])dnl
+boost_save_CPPFLAGS=$CPPFLAGS
+ AC_CACHE_CHECK([for Boost headers version >= $boost_version_req_string],
+ [boost_cv_inc_path],
+ [boost_cv_inc_path=no
+AC_LANG_PUSH([C++])dnl
+m4_pattern_allow([^BOOST_VERSION$])dnl
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <boost/version.hpp>
+#if !defined BOOST_VERSION
+# error BOOST_VERSION is not defined
+#elif BOOST_VERSION < $boost_version_req
+# error Boost headers version < $boost_version_req
+#endif
+]])])
+ # If the user provided a value to --with-boost, use it and only it.
+ case $with_boost in #(
+ ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \
+ /usr/include C:/Boost/include;; #(
+ *) set x "$with_boost/include" "$with_boost";;
+ esac
+ shift
+ for boost_dir
+ do
+ # Without --layout=system, Boost (or at least some versions) installs
+ # itself in <prefix>/include/boost-<version>. This inner loop helps to
+ # find headers in such directories.
+ #
+ # Any ${boost_dir}/boost-x_xx directories are searched in reverse version
+ # order followed by ${boost_dir}. The final '.' is a sentinel for
+ # searching $boost_dir" itself. Entries are whitespace separated.
+ #
+ # I didn't indent this loop on purpose (to avoid over-indented code)
+ boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \
+ && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \
+ && echo .`
+ for boost_inc in $boost_layout_system_search_list
+ do
+ if test x"$boost_inc" != x.; then
+ boost_inc="$boost_dir/$boost_inc"
+ else
+ boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list
+ fi
+ if test x"$boost_inc" != x; then
+ # We are going to check whether the version of Boost installed
+ # in $boost_inc is usable by running a compilation that
+ # #includes it. But if we pass a -I/some/path in which Boost
+ # is not installed, the compiler will just skip this -I and
+ # use other locations (either from CPPFLAGS, or from its list
+ # of system include directories). As a result we would use
+ # header installed on the machine instead of the /some/path
+ # specified by the user. So in that precise case (trying
+ # $boost_inc), make sure the version.hpp exists.
+ #
+ # Use test -e as there can be symlinks.
+ test -e "$boost_inc/boost/version.hpp" || continue
+ CPPFLAGS="$CPPFLAGS -I$boost_inc"
+ fi
+ AC_COMPILE_IFELSE([], [boost_cv_inc_path=yes], [boost_cv_version=no])
+ if test x"$boost_cv_inc_path" = xyes; then
+ if test x"$boost_inc" != x; then
+ boost_cv_inc_path=$boost_inc
+ fi
+ break 2
+ fi
+ done
+ done
+AC_LANG_POP([C++])dnl
+ ])
+ case $boost_cv_inc_path in #(
+ no)
+ boost_errmsg="cannot find Boost headers version >= $boost_version_req_string"
+ m4_if([$2], [], [AC_MSG_ERROR([$boost_errmsg])],
+ [AC_MSG_NOTICE([$boost_errmsg])])
+ $2
+ ;;#(
+ yes)
+ BOOST_CPPFLAGS=
+ ;;#(
+ *)
+ AC_SUBST([BOOST_CPPFLAGS], ["-I$boost_cv_inc_path"])dnl
+ ;;
+ esac
+ if test x"$boost_cv_inc_path" != xno; then
+ AC_DEFINE([HAVE_BOOST], [1],
+ [Defined if the requested minimum BOOST version is satisfied])
+ AC_CACHE_CHECK([for Boost's header version],
+ [boost_cv_lib_version],
+ [m4_pattern_allow([^BOOST_LIB_VERSION$])dnl
+ _BOOST_SED_CPP([[/^boost-lib-version = /{s///;s/[\" ]//g;p;q;}]],
+ [#include <boost/version.hpp>
+boost-lib-version = BOOST_LIB_VERSION],
+ [boost_cv_lib_version=`cat conftest.i`])])
+ # e.g. "134" for 1_34_1 or "135" for 1_35
+ boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'`
+ case $boost_major_version in #(
+ '' | *[[!0-9]]*)
+ AC_MSG_ERROR([invalid value: boost_major_version='$boost_major_version'])
+ ;;
+ esac
+fi
+CPPFLAGS=$boost_save_CPPFLAGS
+])# BOOST_REQUIRE
+
+
+# BOOST_STATIC()
+# --------------
+# Add the "--enable-static-boost" configure argument. If this argument is given
+# on the command line, static versions of the libraries will be looked up.
+AC_DEFUN([BOOST_STATIC],
+ [AC_ARG_ENABLE([static-boost],
+ [AS_HELP_STRING([--enable-static-boost],
+ [Prefer the static boost libraries over the shared ones [no]])],
+ [enable_static_boost=yes],
+ [enable_static_boost=no])])# BOOST_STATIC
+
+
+# BOOST_FIND_HEADER([HEADER-NAME], [ACTION-IF-NOT-FOUND], [ACTION-IF-FOUND])
+# --------------------------------------------------------------------------
+# Wrapper around AC_CHECK_HEADER for Boost headers. Useful to check for
+# some parts of the Boost library which are only made of headers and don't
+# require linking (such as Boost.Foreach).
+#
+# Default ACTION-IF-NOT-FOUND: Fail with a fatal error unless Boost couldn't be
+# found in the first place, in which case by default a notice is issued to the
+# user. Presumably if we haven't died already it's because it's OK to not have
+# Boost, which is why only a notice is issued instead of a hard error.
+#
+# Default ACTION-IF-FOUND: define the preprocessor symbol HAVE_<HEADER-NAME> in
+# case of success # (where HEADER-NAME is written LIKE_THIS, e.g.,
+# HAVE_BOOST_FOREACH_HPP).
+AC_DEFUN([BOOST_FIND_HEADER],
+[AC_REQUIRE([BOOST_REQUIRE])dnl
+if test x"$boost_cv_inc_path" = xno; then
+ m4_default([$2], [AC_MSG_NOTICE([Boost not available, not searching for $1])])
+else
+AC_LANG_PUSH([C++])dnl
+boost_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+AC_CHECK_HEADER([$1],
+ [m4_default([$3], [AC_DEFINE(AS_TR_CPP([HAVE_$1]), [1],
+ [Define to 1 if you have <$1>])])],
+ [m4_default([$2], [AC_MSG_ERROR([cannot find $1])])])
+CPPFLAGS=$boost_save_CPPFLAGS
+AC_LANG_POP([C++])dnl
+fi
+])# BOOST_FIND_HEADER
+
+
+# BOOST_FIND_LIBS([COMPONENT-NAME], [CANDIDATE-LIB-NAMES],
+# [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+# [CXX-PROLOGUE])
+# --------------------------------------------------------------
+# Look for the Boost library COMPONENT-NAME (e.g., `thread', for
+# libboost_thread) under the possible CANDIDATE-LIB-NAMES (e.g.,
+# "thread_win32 thread"). Check that HEADER-NAME works and check that
+# libboost_LIB-NAME can link with the code CXX-TEST. The optional
+# argument CXX-PROLOGUE can be used to include some C++ code before
+# the `main' function.
+#
+# Invokes BOOST_FIND_HEADER([HEADER-NAME]) (see above).
+#
+# Boost libraries typically come compiled with several flavors (with different
+# runtime options) so PREFERRED-RT-OPT is the preferred suffix. A suffix is one
+# or more of the following letters: sgdpn (in that order). s = static
+# runtime, d = debug build, g = debug/diagnostic runtime, p = STLPort build,
+# n = (unsure) STLPort build without iostreams from STLPort (it looks like `n'
+# must always be used along with `p'). Additionally, PREFERRED-RT-OPT can
+# start with `mt-' to indicate that there is a preference for multi-thread
+# builds. Some sample values for PREFERRED-RT-OPT: (nothing), mt, d, mt-d, gdp
+# ... If you want to make sure you have a specific version of Boost
+# (eg, >= 1.33) you *must* invoke BOOST_REQUIRE before this macro.
+AC_DEFUN([BOOST_FIND_LIBS],
+[AC_REQUIRE([BOOST_REQUIRE])dnl
+AC_REQUIRE([_BOOST_FIND_COMPILER_TAG])dnl
+AC_REQUIRE([BOOST_STATIC])dnl
+AC_REQUIRE([_BOOST_GUESS_WHETHER_TO_USE_MT])dnl
+if test x"$boost_cv_inc_path" = xno; then
+ AC_MSG_NOTICE([Boost not available, not searching for the Boost $1 library])
+else
+dnl The else branch is huge and wasn't intended on purpose.
+AC_LANG_PUSH([C++])dnl
+AS_VAR_PUSHDEF([Boost_lib], [boost_cv_lib_$1])dnl
+AS_VAR_PUSHDEF([Boost_lib_LDFLAGS], [boost_cv_lib_$1_LDFLAGS])dnl
+AS_VAR_PUSHDEF([Boost_lib_LDPATH], [boost_cv_lib_$1_LDPATH])dnl
+AS_VAR_PUSHDEF([Boost_lib_LIBS], [boost_cv_lib_$1_LIBS])dnl
+BOOST_FIND_HEADER([$4])
+boost_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+AC_CACHE_CHECK([for the Boost $1 library], [Boost_lib],
+ [_BOOST_FIND_LIBS($@)])
+case $Boost_lib in #(
+ (no) _AC_MSG_LOG_CONFTEST
+ AC_MSG_ERROR([cannot find the flags to link with Boost $1])
+ ;;
+esac
+AC_SUBST(AS_TR_CPP([BOOST_$1_LDFLAGS]), [$Boost_lib_LDFLAGS])dnl
+AC_SUBST(AS_TR_CPP([BOOST_$1_LDPATH]), [$Boost_lib_LDPATH])dnl
+AC_SUBST([BOOST_LDPATH], [$Boost_lib_LDPATH])dnl
+AC_SUBST(AS_TR_CPP([BOOST_$1_LIBS]), [$Boost_lib_LIBS])dnl
+CPPFLAGS=$boost_save_CPPFLAGS
+AS_VAR_POPDEF([Boost_lib])dnl
+AS_VAR_POPDEF([Boost_lib_LDFLAGS])dnl
+AS_VAR_POPDEF([Boost_lib_LDPATH])dnl
+AS_VAR_POPDEF([Boost_lib_LIBS])dnl
+AC_LANG_POP([C++])dnl
+fi
+])
+
+
+# BOOST_FIND_LIB([LIB-NAME],
+# [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+# [CXX-PROLOGUE])
+# --------------------------------------------------------------
+# Backward compatibility wrapper for BOOST_FIND_LIBS.
+AC_DEFUN([BOOST_FIND_LIB],
+[BOOST_FIND_LIBS([$1], $@)])
+
+
+# _BOOST_FIND_LIBS([LIB-NAME], [CANDIDATE-LIB-NAMES],
+# [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+# [CXX-PROLOGUE])
+# --------------------------------------------------------------
+# Real implementation of BOOST_FIND_LIBS: rely on these local macros:
+# Boost_lib, Boost_lib_LDFLAGS, Boost_lib_LDPATH, Boost_lib_LIBS
+#
+# The algorithm is as follows: first look for a given library name
+# according to the user's PREFERRED-RT-OPT. For each library name, we
+# prefer to use the ones that carry the tag (toolset name). Each
+# library is searched through the various standard paths were Boost is
+# usually installed. If we can't find the standard variants, we try
+# to enforce -mt (for instance on MacOSX, libboost_thread.dylib
+# doesn't exist but there's -obviously- libboost_thread-mt.dylib).
+AC_DEFUN([_BOOST_FIND_LIBS],
+[Boost_lib=no
+ case "$3" in #(
+ (mt | mt-) boost_mt=-mt; boost_rtopt=;; #(
+ (mt* | mt-*) boost_mt=-mt; boost_rtopt=`expr "X$3" : 'Xmt-*\(.*\)'`;; #(
+ (*) boost_mt=; boost_rtopt=$3;;
+ esac
+ if test $enable_static_boost = yes; then
+ boost_rtopt="s$boost_rtopt"
+ fi
+ # Find the proper debug variant depending on what we've been asked to find.
+ case $boost_rtopt in #(
+ (*d*) boost_rt_d=$boost_rtopt;; #(
+ (*[[sgpn]]*) # Insert the `d' at the right place (in between `sg' and `pn')
+ boost_rt_d=`echo "$boost_rtopt" | sed 's/\(s*g*\)\(p*n*\)/\1\2/'`;; #(
+ (*) boost_rt_d='-d';;
+ esac
+ # If the PREFERRED-RT-OPT are not empty, prepend a `-'.
+ test -n "$boost_rtopt" && boost_rtopt="-$boost_rtopt"
+ $boost_guess_use_mt && boost_mt=-mt
+ # Look for the abs path the static archive.
+ # $libext is computed by Libtool but let's make sure it's non empty.
+ test -z "$libext" &&
+ AC_MSG_ERROR([the libext variable is empty, did you invoke Libtool?])
+ boost_save_ac_objext=$ac_objext
+ # Generate the test file.
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <$4>
+$6], [$5])])
+dnl Optimization hacks: compiling C++ is slow, especially with Boost. What
+dnl we're trying to do here is guess the right combination of link flags
+dnl (LIBS / LDFLAGS) to use a given library. This can take several
+dnl iterations before it succeeds and is thus *very* slow. So what we do
+dnl instead is that we compile the code first (and thus get an object file,
+dnl typically conftest.o). Then we try various combinations of link flags
+dnl until we succeed to link conftest.o in an executable. The problem is
+dnl that the various TRY_LINK / COMPILE_IFELSE macros of Autoconf always
+dnl remove all the temporary files including conftest.o. So the trick here
+dnl is to temporarily change the value of ac_objext so that conftest.o is
+dnl preserved accross tests. This is obviously fragile and I will burn in
+dnl hell for not respecting Autoconf's documented interfaces, but in the
+dnl mean time, it optimizes the macro by a factor of 5 to 30.
+dnl Another small optimization: the first argument of AC_COMPILE_IFELSE left
+dnl empty because the test file is generated only once above (before we
+dnl start the for loops).
+ AC_COMPILE_IFELSE([],
+ [ac_objext=do_not_rm_me_plz],
+ [AC_MSG_ERROR([cannot compile a test that uses Boost $1])])
+ ac_objext=$boost_save_ac_objext
+ boost_failed_libs=
+# Don't bother to ident the following nested for loops, only the 2
+# innermost ones matter.
+for boost_lib_ in $2; do
+for boost_tag_ in -$boost_cv_lib_tag ''; do
+for boost_ver_ in -$boost_cv_lib_version ''; do
+for boost_mt_ in $boost_mt -mt ''; do
+for boost_rtopt_ in $boost_rtopt '' -d; do
+ for boost_lib in \
+ boost_$boost_lib_$boost_tag_$boost_mt_$boost_rtopt_$boost_ver_ \
+ boost_$boost_lib_$boost_tag_$boost_rtopt_$boost_ver_ \
+ boost_$boost_lib_$boost_tag_$boost_mt_$boost_ver_ \
+ boost_$boost_lib_$boost_tag_$boost_ver_
+ do
+ # Avoid testing twice the same lib
+ case $boost_failed_libs in #(
+ (*@$boost_lib@*) continue;;
+ esac
+ # If with_boost is empty, we'll search in /lib first, which is not quite
+ # right so instead we'll try to a location based on where the headers are.
+ boost_tmp_lib=$with_boost
+ test x"$with_boost" = x && boost_tmp_lib=${boost_cv_inc_path%/include}
+ for boost_ldpath in "$boost_tmp_lib/lib" '' \
+ /opt/local/lib* /usr/local/lib* /opt/lib* /usr/lib* \
+ "$with_boost" C:/Boost/lib /lib*
+ do
+ # Don't waste time with directories that don't exist.
+ if test x"$boost_ldpath" != x && test ! -e "$boost_ldpath"; then
+ continue
+ fi
+ boost_save_LDFLAGS=$LDFLAGS
+ # Are we looking for a static library?
+ case $boost_ldpath:$boost_rtopt_ in #(
+ (*?*:*s*) # Yes (Non empty boost_ldpath + s in rt opt)
+ Boost_lib_LIBS="$boost_ldpath/lib$boost_lib.$libext"
+ test -e "$Boost_lib_LIBS" || continue;; #(
+ (*) # No: use -lboost_foo to find the shared library.
+ Boost_lib_LIBS="-l$boost_lib";;
+ esac
+ boost_save_LIBS=$LIBS
+ LIBS="$Boost_lib_LIBS $LIBS"
+ test x"$boost_ldpath" != x && LDFLAGS="$LDFLAGS -L$boost_ldpath"
+dnl First argument of AC_LINK_IFELSE left empty because the test file is
+dnl generated only once above (before we start the for loops).
+ _BOOST_AC_LINK_IFELSE([],
+ [Boost_lib=yes], [Boost_lib=no])
+ ac_objext=$boost_save_ac_objext
+ LDFLAGS=$boost_save_LDFLAGS
+ LIBS=$boost_save_LIBS
+ if test x"$Boost_lib" = xyes; then
+ # Check or used cached result of whether or not using -R or
+ # -rpath makes sense. Some implementations of ld, such as for
+ # Mac OSX, require -rpath but -R is the flag known to work on
+ # other systems. https://github.com/tsuna/boost.m4/issues/19
+ AC_CACHE_VAL([boost_cv_rpath_link_ldflag],
+ [case $boost_ldpath in
+ '') # Nothing to do.
+ boost_cv_rpath_link_ldflag=
+ boost_rpath_link_ldflag_found=yes;;
+ *)
+ for boost_cv_rpath_link_ldflag in -Wl,-R, -Wl,-rpath,; do
+ LDFLAGS="$boost_save_LDFLAGS -L$boost_ldpath $boost_cv_rpath_link_ldflag$boost_ldpath"
+ LIBS="$boost_save_LIBS $Boost_lib_LIBS"
+ _BOOST_AC_LINK_IFELSE([],
+ [boost_rpath_link_ldflag_found=yes
+ break],
+ [boost_rpath_link_ldflag_found=no])
+ done
+ ;;
+ esac
+ AS_IF([test "x$boost_rpath_link_ldflag_found" != "xyes"],
+ [AC_MSG_ERROR([Unable to determine whether to use -R or -rpath])])
+ LDFLAGS=$boost_save_LDFLAGS
+ LIBS=$boost_save_LIBS
+ ])
+ test x"$boost_ldpath" != x &&
+ Boost_lib_LDFLAGS="-L$boost_ldpath $boost_cv_rpath_link_ldflag$boost_ldpath"
+ Boost_lib_LDPATH="$boost_ldpath"
+ break 7
+ else
+ boost_failed_libs="$boost_failed_libs@$boost_lib@"
+ fi
+ done
+ done
+done
+done
+done
+done
+done # boost_lib_
+rm -f conftest.$ac_objext
+])
+
+
+
+# --------------------------------------- #
+# Checks for the various Boost libraries. #
+# --------------------------------------- #
+
+# List of boost libraries: http://www.boost.org/libs/libraries.htm
+# The page http://beta.boost.org/doc/libs is useful: it gives the first release
+# version of each library (among other things).
+
+# BOOST_DEFUN(LIBRARY, CODE)
+# --------------------------
+# Define BOOST_<LIBRARY-UPPERCASE> as a macro that runs CODE.
+#
+# Use indir to avoid the warning on underquoted macro name given to AC_DEFUN.
+m4_define([BOOST_DEFUN],
+[m4_indir([AC_DEFUN],
+ m4_toupper([BOOST_$1]),
+[m4_pushdef([BOOST_Library], [$1])dnl
+$2
+m4_popdef([BOOST_Library])dnl
+])
+])
+
+# BOOST_ARRAY()
+# -------------
+# Look for Boost.Array
+BOOST_DEFUN([Array],
+[BOOST_FIND_HEADER([boost/array.hpp])])
+
+
+# BOOST_ASIO()
+# ------------
+# Look for Boost.Asio (new in Boost 1.35).
+BOOST_DEFUN([Asio],
+[AC_REQUIRE([BOOST_SYSTEM])dnl
+BOOST_FIND_HEADER([boost/asio.hpp])])
+
+
+# BOOST_BIND()
+# ------------
+# Look for Boost.Bind.
+BOOST_DEFUN([Bind],
+[BOOST_FIND_HEADER([boost/bind.hpp])])
+
+
+# BOOST_CHRONO()
+# --------------
+# Look for Boost.Chrono.
+BOOST_DEFUN([Chrono],
+[# Do we have to check for Boost.System? This link-time dependency was
+# added as of 1.35.0. If we have a version <1.35, we must not attempt to
+# find Boost.System as it didn't exist by then.
+if test $boost_major_version -ge 135; then
+ BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+boost_filesystem_save_LIBS=$LIBS
+boost_filesystem_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+BOOST_FIND_LIB([chrono], [$1],
+ [boost/chrono.hpp],
+ [boost::chrono::thread_clock d;])
+if test $enable_static_boost = yes && test $boost_major_version -ge 135; then
+ BOOST_CHRONO_LIBS="$BOOST_CHRONO_LIBS $BOOST_SYSTEM_LIBS"
+fi
+LIBS=$boost_filesystem_save_LIBS
+LDFLAGS=$boost_filesystem_save_LDFLAGS
+])# BOOST_CHRONO
+
+
+# BOOST_CONTEXT([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Context. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+#
+# * This library was introduced in Boost 1.51.0
+# * The signatures of make_fcontext() and jump_fcontext were changed in 1.56.0
+# * A dependency on boost_thread appears in 1.57.0
+BOOST_DEFUN([Context],
+[boost_context_save_LIBS=$LIBS
+ boost_context_save_LDFLAGS=$LDFLAGS
+if test $boost_major_version -ge 157; then
+ BOOST_THREAD([$1])
+ m4_pattern_allow([^BOOST_THREAD_(LIBS|LDFLAGS)$])dnl
+ LIBS="$LIBS $BOOST_THREAD_LIBS"
+ LDFLAGS="$LDFLAGS $BOOST_THREAD_LDFLAGS"
+fi
+BOOST_FIND_LIB([context], [$1],
+ [boost/context/all.hpp],[[
+
+// creates a stack
+void * stack_pointer = new void*[4096];
+std::size_t const size = sizeof(void*[4096]);
+
+#if BOOST_VERSION <= 105100
+ctx::make_fcontext(&fc, f);
+return ctx::jump_fcontext(&fcm, &fc, 3) == 6;
+
+#else
+
+fc = ctx::make_fcontext(stack_pointer, size, f);
+return ctx::jump_fcontext(&fcm, fc, 3) == 6;
+
+#endif
+
+
+]],[dnl
+
+#include <boost/version.hpp>
+#if BOOST_VERSION <= 105100
+
+namespace ctx = boost::ctx;
+
+static ctx::fcontext_t fcm, fc;
+
+static void f(intptr_t i) {
+ ctx::jump_fcontext(&fc, &fcm, i * 2);
+}
+
+#elif BOOST_VERSION <= 105500
+
+namespace ctx = boost::context;
+
+// context
+static ctx::fcontext_t fcm, *fc;
+
+// context-function
+static void f(intptr_t i) {
+ ctx::jump_fcontext(fc, &fcm, i * 2);
+}
+
+#else
+
+namespace ctx = boost::context;
+
+// context
+static ctx::fcontext_t fcm, fc;
+
+// context-function
+static void f(intptr_t i) {
+ ctx::jump_fcontext(&fc, fcm, i * 2);
+}
+#endif
+])
+LIBS=$boost_context_save_LIBS
+LDFLAGS=$boost_context_save_LDFLAGS
+])# BOOST_CONTEXT
+
+
+# BOOST_CONVERSION()
+# ------------------
+# Look for Boost.Conversion (cast / lexical_cast)
+BOOST_DEFUN([Conversion],
+[BOOST_FIND_HEADER([boost/cast.hpp])
+BOOST_FIND_HEADER([boost/lexical_cast.hpp])
+])# BOOST_CONVERSION
+
+
+# BOOST_COROUTINE([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Coroutine. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above. This library was introduced in Boost
+# 1.53.0
+BOOST_DEFUN([Coroutine],
+[
+boost_coroutine_save_LIBS=$LIBS
+boost_coroutine_save_LDFLAGS=$LDFLAGS
+# Link-time dependency from coroutine to context
+BOOST_CONTEXT([$1])
+# Starting from Boost 1.55 a dependency on Boost.System is added
+if test $boost_major_version -ge 155; then
+ BOOST_SYSTEM([$1])
+fi
+m4_pattern_allow([^BOOST_(CONTEXT|SYSTEM)_(LIBS|LDFLAGS)])
+LIBS="$LIBS $BOOST_CONTEXT_LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_CONTEXT_LDFLAGS"
+
+# in 1.53 coroutine was a header only library
+if test $boost_major_version -eq 153; then
+ BOOST_FIND_HEADER([boost/coroutine/coroutine.hpp])
+else
+ BOOST_FIND_LIB([coroutine], [$1],
+ [boost/coroutine/coroutine.hpp],
+ [
+ #include <boost/version.hpp>
+ #if BOOST_VERSION <= 105500
+ boost::coroutines::coroutine<int(int)> coro; coro.get();
+ #else
+ boost::coroutines::asymmetric_coroutine<int>::pull_type coro; coro.get();
+ #endif
+ ])
+fi
+# Link-time dependency from coroutine to context, existed only in 1.53, in 1.54
+# coroutine doesn't use context from its headers but from its library.
+if test $boost_major_version -eq 153 || test $enable_static_boost = yes && test $boost_major_version -ge 154; then
+ BOOST_COROUTINE_LIBS="$BOOST_COROUTINE_LIBS $BOOST_CONTEXT_LIBS"
+ BOOST_COROUTINE_LDFLAGS="$BOOST_COROUTINE_LDFLAGS $BOOST_CONTEXT_LDFLAGS"
+fi
+if test $enable_static_boost = yes && test $boost_major_version -ge 155; then
+ BOOST_COROUTINE_LIBS="$BOOST_COROUTINE_LIBS $BOOST_SYSTEM_LIBS"
+ BOOST_COROUTINE_LDFLAGS="$BOOST_COROUTINE_LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+fi
+LIBS=$boost_coroutine_save_LIBS
+LDFLAGS=$boost_coroutine_save_LDFLAGS
+])# BOOST_COROUTINE
+
+
+# BOOST_CRC()
+# -----------
+# Look for Boost.CRC
+BOOST_DEFUN([CRC],
+[BOOST_FIND_HEADER([boost/crc.hpp])
+])# BOOST_CRC
+
+
+# BOOST_DATE_TIME([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Date_Time. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Date_Time],
+[BOOST_FIND_LIB([date_time], [$1],
+ [boost/date_time/posix_time/posix_time.hpp],
+ [boost::posix_time::ptime t;])
+])# BOOST_DATE_TIME
+
+
+# BOOST_FILESYSTEM([PREFERRED-RT-OPT])
+# ------------------------------------
+# Look for Boost.Filesystem. For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+# Do not check for boost/filesystem.hpp because this file was introduced in
+# 1.34.
+BOOST_DEFUN([Filesystem],
+[# Do we have to check for Boost.System? This link-time dependency was
+# added as of 1.35.0. If we have a version <1.35, we must not attempt to
+# find Boost.System as it didn't exist by then.
+if test $boost_major_version -ge 135; then
+ BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+boost_filesystem_save_LIBS=$LIBS
+boost_filesystem_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+BOOST_FIND_LIB([filesystem], [$1],
+ [boost/filesystem/path.hpp], [boost::filesystem::path p;])
+if test $enable_static_boost = yes && test $boost_major_version -ge 135; then
+ BOOST_FILESYSTEM_LIBS="$BOOST_FILESYSTEM_LIBS $BOOST_SYSTEM_LIBS"
+fi
+LIBS=$boost_filesystem_save_LIBS
+LDFLAGS=$boost_filesystem_save_LDFLAGS
+])# BOOST_FILESYSTEM
+
+
+# BOOST_FLYWEIGHT()
+# -----------------
+# Look for Boost.Flyweight.
+BOOST_DEFUN([Flyweight],
+[dnl There's a hidden dependency on pthreads.
+AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl
+BOOST_FIND_HEADER([boost/flyweight.hpp])
+AC_SUBST([BOOST_FLYWEIGHT_LIBS], [$boost_cv_pthread_flag])
+])
+
+
+# BOOST_FOREACH()
+# ---------------
+# Look for Boost.Foreach.
+BOOST_DEFUN([Foreach],
+[BOOST_FIND_HEADER([boost/foreach.hpp])])
+
+
+# BOOST_FORMAT()
+# --------------
+# Look for Boost.Format.
+# Note: we can't check for boost/format/format_fwd.hpp because the header isn't
+# standalone. It can't be compiled because it triggers the following error:
+# boost/format/detail/config_macros.hpp:88: error: 'locale' in namespace 'std'
+# does not name a type
+BOOST_DEFUN([Format],
+[BOOST_FIND_HEADER([boost/format.hpp])])
+
+
+# BOOST_FUNCTION()
+# ----------------
+# Look for Boost.Function
+BOOST_DEFUN([Function],
+[BOOST_FIND_HEADER([boost/function.hpp])])
+
+
+# BOOST_GEOMETRY()
+# ----------------
+# Look for Boost.Geometry (new since 1.47.0).
+BOOST_DEFUN([Geometry],
+[BOOST_FIND_HEADER([boost/geometry.hpp])
+])# BOOST_GEOMETRY
+
+
+# BOOST_GRAPH([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.Graphs. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Graph],
+[boost_graph_save_LIBS=$LIBS
+boost_graph_save_LDFLAGS=$LDFLAGS
+# Link-time dependency from graph to regex was added as of 1.40.0.
+if test $boost_major_version -ge 140; then
+ BOOST_REGEX([$1])
+ m4_pattern_allow([^BOOST_REGEX_(LIBS|LDFLAGS)$])dnl
+ LIBS="$LIBS $BOOST_REGEX_LIBS"
+ LDFLAGS="$LDFLAGS $BOOST_REGEX_LDFLAGS"
+fi
+BOOST_FIND_LIB([graph], [$1],
+ [boost/graph/adjacency_list.hpp], [boost::adjacency_list<> g;])
+LIBS=$boost_graph_save_LIBS
+LDFLAGS=$boost_graph_save_LDFLAGS
+])# BOOST_GRAPH
+
+
+# BOOST_IOSTREAMS([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.IOStreams. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([IOStreams],
+[BOOST_FIND_LIB([iostreams], [$1],
+ [boost/iostreams/device/file_descriptor.hpp],
+ [boost::iostreams::file_descriptor fd; fd.close();])
+])# BOOST_IOSTREAMS
+
+
+# BOOST_HASH()
+# ------------
+# Look for Boost.Functional/Hash
+BOOST_DEFUN([Hash],
+[BOOST_FIND_HEADER([boost/functional/hash.hpp])])
+
+
+# BOOST_LAMBDA()
+# --------------
+# Look for Boost.Lambda
+BOOST_DEFUN([Lambda],
+[BOOST_FIND_HEADER([boost/lambda/lambda.hpp])])
+
+
+# BOOST_LOCALE()
+# --------------
+# Look for Boost.Locale
+BOOST_DEFUN([Locale],
+[
+boost_locale_save_LIBS=$LIBS
+boost_locale_save_LDFLAGS=$LDFLAGS
+# require SYSTEM for boost-1.50.0 and up
+if test $boost_major_version -ge 150; then
+ BOOST_SYSTEM([$1])
+ m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+ LIBS="$LIBS $BOOST_SYSTEM_LIBS"
+ LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+fi # end of the Boost.System check.
+BOOST_FIND_LIB([locale], [$1],
+ [boost/locale.hpp],
+ [[boost::locale::generator gen; std::locale::global(gen(""));]])
+LIBS=$boost_locale_save_LIBS
+LDFLAGS=$boost_locale_save_LDFLAGS
+])# BOOST_LOCALE
+
+# BOOST_LOG([PREFERRED-RT-OPT])
+# -----------------------------
+# Look for Boost.Log. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Log],
+[boost_log_save_LIBS=$LIBS
+boost_log_save_LDFLAGS=$LDFLAGS
+BOOST_SYSTEM([$1])
+BOOST_FILESYSTEM([$1])
+BOOST_DATE_TIME([$1])
+m4_pattern_allow([^BOOST_(SYSTEM|FILESYSTEM|DATE_TIME)_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_DATE_TIME_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_DATE_TIME_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+BOOST_FIND_LIB([log], [$1],
+ [boost/log/core/core.hpp],
+ [boost::log::attribute a; a.get_value();])
+LIBS=$boost_log_save_LIBS
+LDFLAGS=$boost_log_save_LDFLAGS
+])# BOOST_LOG
+
+
+# BOOST_LOG_SETUP([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Log. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Log_Setup],
+[boost_log_setup_save_LIBS=$LIBS
+boost_log_setup_save_LDFLAGS=$LDFLAGS
+BOOST_LOG([$1])
+m4_pattern_allow([^BOOST_LOG_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_LOG_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_LOG_LDFLAGS"
+BOOST_FIND_LIB([log_setup], [$1],
+ [boost/log/utility/setup/from_settings.hpp],
+ [boost::log::basic_settings<char> bs; bs.empty();])
+LIBS=$boost_log_setup_save_LIBS
+LDFLAGS=$boost_log_setup_save_LDFLAGS
+])# BOOST_LOG_SETUP
+
+
+# BOOST_MATH()
+# ------------
+# Look for Boost.Math
+# TODO: This library isn't header-only but it comes in multiple different
+# flavors that don't play well with BOOST_FIND_LIB (e.g, libboost_math_c99,
+# libboost_math_c99f, libboost_math_c99l, libboost_math_tr1,
+# libboost_math_tr1f, libboost_math_tr1l). This macro must be fixed to do the
+# right thing anyway.
+BOOST_DEFUN([Math],
+[BOOST_FIND_HEADER([boost/math/special_functions.hpp])])
+
+
+# BOOST_MPI([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost MPI. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above. Uses MPICXX variable if it is
+# set, otherwise tries CXX
+#
+BOOST_DEFUN([MPI],
+[boost_save_CXX=${CXX}
+boost_save_CXXCPP=${CXXCPP}
+if test x"${MPICXX}" != x; then
+ CXX=${MPICXX}
+ CXXCPP="${MPICXX} -E"
+fi
+BOOST_FIND_LIB([mpi], [$1],
+ [boost/mpi.hpp],
+ [int argc = 0;
+ char **argv = 0;
+ boost::mpi::environment env(argc,argv);])
+CXX=${boost_save_CXX}
+CXXCPP=${boost_save_CXXCPP}
+])# BOOST_MPI
+
+
+# BOOST_MULTIARRAY()
+# ------------------
+# Look for Boost.MultiArray
+BOOST_DEFUN([MultiArray],
+[BOOST_FIND_HEADER([boost/multi_array.hpp])])
+
+
+# BOOST_NUMERIC_UBLAS()
+# --------------------------
+# Look for Boost.NumericUblas (Basic Linear Algebra)
+BOOST_DEFUN([Numeric_Ublas],
+[BOOST_FIND_HEADER([boost/numeric/ublas/vector.hpp])
+])# BOOST_NUMERIC_UBLAS
+
+
+# BOOST_NUMERIC_CONVERSION()
+# --------------------------
+# Look for Boost.NumericConversion (policy-based numeric conversion)
+BOOST_DEFUN([Numeric_Conversion],
+[BOOST_FIND_HEADER([boost/numeric/conversion/converter.hpp])
+])# BOOST_NUMERIC_CONVERSION
+
+
+# BOOST_OPTIONAL()
+# ----------------
+# Look for Boost.Optional
+BOOST_DEFUN([Optional],
+[BOOST_FIND_HEADER([boost/optional.hpp])])
+
+
+# BOOST_PREPROCESSOR()
+# --------------------
+# Look for Boost.Preprocessor
+BOOST_DEFUN([Preprocessor],
+[BOOST_FIND_HEADER([boost/preprocessor/repeat.hpp])])
+
+
+# BOOST_RANGE()
+# --------------------
+# Look for Boost.Range
+BOOST_DEFUN([Range],
+[BOOST_FIND_HEADER([boost/range/adaptors.hpp])])
+
+# BOOST_UNORDERED()
+# -----------------
+# Look for Boost.Unordered
+BOOST_DEFUN([Unordered],
+[BOOST_FIND_HEADER([boost/unordered_map.hpp])])
+
+
+# BOOST_UUID()
+# ------------
+# Look for Boost.Uuid
+BOOST_DEFUN([Uuid],
+[BOOST_FIND_HEADER([boost/uuid/uuid.hpp])])
+
+
+# BOOST_PROGRAM_OPTIONS([PREFERRED-RT-OPT])
+# -----------------------------------------
+# Look for Boost.Program_options. For the documentation of PREFERRED-RT-OPT,
+# see the documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Program_Options],
+[BOOST_FIND_LIB([program_options], [$1],
+ [boost/program_options.hpp],
+ [boost::program_options::options_description d("test");])
+])# BOOST_PROGRAM_OPTIONS
+
+
+
+# _BOOST_PYTHON_CONFIG(VARIABLE, FLAG)
+# ------------------------------------
+# Save VARIABLE, and define it via `python-config --FLAG`.
+# Substitute BOOST_PYTHON_VARIABLE.
+m4_define([_BOOST_PYTHON_CONFIG],
+[AC_SUBST([BOOST_PYTHON_$1],
+ [`python-config --$2 2>/dev/null`])dnl
+boost_python_save_$1=$$1
+$1="$$1 $BOOST_PYTHON_$1"])
+
+
+# BOOST_PYTHON([PREFERRED-RT-OPT])
+# --------------------------------
+# Look for Boost.Python. For the documentation of PREFERRED-RT-OPT,
+# see the documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Python],
+[_BOOST_PYTHON_CONFIG([CPPFLAGS], [includes])
+_BOOST_PYTHON_CONFIG([LDFLAGS], [ldflags])
+_BOOST_PYTHON_CONFIG([LIBS], [libs])
+m4_pattern_allow([^BOOST_PYTHON_MODULE$])dnl
+BOOST_FIND_LIBS([python], [python python3], [$1],
+ [boost/python.hpp],
+ [], [BOOST_PYTHON_MODULE(empty) {}])
+CPPFLAGS=$boost_python_save_CPPFLAGS
+LDFLAGS=$boost_python_save_LDFLAGS
+LIBS=$boost_python_save_LIBS
+])# BOOST_PYTHON
+
+
+# BOOST_REF()
+# -----------
+# Look for Boost.Ref
+BOOST_DEFUN([Ref],
+[BOOST_FIND_HEADER([boost/ref.hpp])])
+
+
+# BOOST_REGEX([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.Regex. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Regex],
+[BOOST_FIND_LIB([regex], [$1],
+ [boost/regex.hpp],
+ [boost::regex exp("*"); boost::regex_match("foo", exp);])
+])# BOOST_REGEX
+
+
+# BOOST_SERIALIZATION([PREFERRED-RT-OPT])
+# ---------------------------------------
+# Look for Boost.Serialization. For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Serialization],
+[BOOST_FIND_LIB([serialization], [$1],
+ [boost/archive/text_oarchive.hpp],
+ [std::ostream* o = 0; // Cheap way to get an ostream...
+ boost::archive::text_oarchive t(*o);])
+])# BOOST_SERIALIZATION
+
+
+# BOOST_SIGNALS([PREFERRED-RT-OPT])
+# ---------------------------------
+# Look for Boost.Signals. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Signals],
+[BOOST_FIND_LIB([signals], [$1],
+ [boost/signal.hpp],
+ [boost::signal<void ()> s;])
+])# BOOST_SIGNALS
+
+
+# BOOST_SIGNALS2()
+# ----------------
+# Look for Boost.Signals2 (new since 1.39.0).
+BOOST_DEFUN([Signals2],
+[BOOST_FIND_HEADER([boost/signals2.hpp])
+])# BOOST_SIGNALS2
+
+
+# BOOST_SMART_PTR()
+# -----------------
+# Look for Boost.SmartPtr
+BOOST_DEFUN([Smart_Ptr],
+[BOOST_FIND_HEADER([boost/scoped_ptr.hpp])
+BOOST_FIND_HEADER([boost/shared_ptr.hpp])
+])
+
+
+# BOOST_STATICASSERT()
+# --------------------
+# Look for Boost.StaticAssert
+BOOST_DEFUN([StaticAssert],
+[BOOST_FIND_HEADER([boost/static_assert.hpp])])
+
+
+# BOOST_STRING_ALGO()
+# -------------------
+# Look for Boost.StringAlgo
+BOOST_DEFUN([String_Algo],
+[BOOST_FIND_HEADER([boost/algorithm/string.hpp])
+])
+
+
+# BOOST_SYSTEM([PREFERRED-RT-OPT])
+# --------------------------------
+# Look for Boost.System. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above. This library was introduced in Boost
+# 1.35.0.
+BOOST_DEFUN([System],
+[BOOST_FIND_LIB([system], [$1],
+ [boost/system/error_code.hpp],
+ [boost::system::error_code e; e.clear();])
+])# BOOST_SYSTEM
+
+
+# BOOST_TEST([PREFERRED-RT-OPT])
+# ------------------------------
+# Look for Boost.Test. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Test],
+[m4_pattern_allow([^BOOST_CHECK$])dnl
+BOOST_FIND_LIB([unit_test_framework], [$1],
+ [boost/test/unit_test.hpp], [BOOST_CHECK(2 == 2);],
+ [using boost::unit_test::test_suite;
+ test_suite* init_unit_test_suite(int argc, char ** argv)
+ { return NULL; }])
+])# BOOST_TEST
+
+
+# BOOST_THREAD([PREFERRED-RT-OPT])
+# ---------------------------------
+# Look for Boost.Thread. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Thread],
+[dnl Having the pthread flag is required at least on GCC3 where
+dnl boost/thread.hpp would complain if we try to compile without
+dnl -pthread on GNU/Linux.
+AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl
+boost_thread_save_LIBS=$LIBS
+boost_thread_save_LDFLAGS=$LDFLAGS
+boost_thread_save_CPPFLAGS=$CPPFLAGS
+# Link-time dependency from thread to system was added as of 1.49.0.
+if test $boost_major_version -ge 149; then
+BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS $boost_cv_pthread_flag"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+CPPFLAGS="$CPPFLAGS $boost_cv_pthread_flag"
+
+# When compiling for the Windows platform, the threads library is named
+# differently. This suffix doesn't exist in new versions of Boost, or
+# possibly new versions of GCC on mingw I am assuming it's Boost's change for
+# now and I am setting version to 1.48, for lack of knowledge as to when this
+# change occurred.
+if test $boost_major_version -lt 148; then
+ case $host_os in
+ (*mingw*) boost_thread_lib_ext=_win32;;
+ esac
+fi
+BOOST_FIND_LIBS([thread], [thread$boost_thread_lib_ext],
+ [$1],
+ [boost/thread.hpp], [boost::thread t; boost::mutex m;])
+
+case $host_os in
+ (*mingw*) boost_thread_w32_socket_link=-lws2_32;;
+esac
+
+BOOST_THREAD_LIBS="$BOOST_THREAD_LIBS $BOOST_SYSTEM_LIBS $boost_cv_pthread_flag $boost_thread_w32_socket_link"
+BOOST_THREAD_LDFLAGS="$BOOST_SYSTEM_LDFLAGS"
+BOOST_CPPFLAGS="$BOOST_CPPFLAGS $boost_cv_pthread_flag"
+LIBS=$boost_thread_save_LIBS
+LDFLAGS=$boost_thread_save_LDFLAGS
+CPPFLAGS=$boost_thread_save_CPPFLAGS
+])# BOOST_THREAD
+
+AU_ALIAS([BOOST_THREADS], [BOOST_THREAD])
+
+
+# BOOST_TOKENIZER()
+# -----------------
+# Look for Boost.Tokenizer
+BOOST_DEFUN([Tokenizer],
+[BOOST_FIND_HEADER([boost/tokenizer.hpp])])
+
+
+# BOOST_TRIBOOL()
+# ---------------
+# Look for Boost.Tribool
+BOOST_DEFUN([Tribool],
+[BOOST_FIND_HEADER([boost/logic/tribool_fwd.hpp])
+BOOST_FIND_HEADER([boost/logic/tribool.hpp])
+])
+
+
+# BOOST_TUPLE()
+# -------------
+# Look for Boost.Tuple
+BOOST_DEFUN([Tuple],
+[BOOST_FIND_HEADER([boost/tuple/tuple.hpp])])
+
+
+# BOOST_TYPETRAITS()
+# --------------------
+# Look for Boost.TypeTraits
+BOOST_DEFUN([TypeTraits],
+[BOOST_FIND_HEADER([boost/type_traits.hpp])])
+
+
+# BOOST_UTILITY()
+# ---------------
+# Look for Boost.Utility (noncopyable, result_of, base-from-member idiom,
+# etc.)
+BOOST_DEFUN([Utility],
+[BOOST_FIND_HEADER([boost/utility.hpp])])
+
+
+# BOOST_VARIANT()
+# ---------------
+# Look for Boost.Variant.
+BOOST_DEFUN([Variant],
+[BOOST_FIND_HEADER([boost/variant/variant_fwd.hpp])
+BOOST_FIND_HEADER([boost/variant.hpp])])
+
+
+# BOOST_POINTER_CONTAINER()
+# ------------------------
+# Look for Boost.PointerContainer
+BOOST_DEFUN([Pointer_Container],
+[BOOST_FIND_HEADER([boost/ptr_container/ptr_deque.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_list.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_vector.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_array.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_set.hpp])
+BOOST_FIND_HEADER([boost/ptr_container/ptr_map.hpp])
+])# BOOST_POINTER_CONTAINER
+
+
+# BOOST_WAVE([PREFERRED-RT-OPT])
+# ------------------------------
+# NOTE: If you intend to use Wave/Spirit with thread support, make sure you
+# call BOOST_THREAD first.
+# Look for Boost.Wave. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+BOOST_DEFUN([Wave],
+[AC_REQUIRE([BOOST_FILESYSTEM])dnl
+AC_REQUIRE([BOOST_DATE_TIME])dnl
+boost_wave_save_LIBS=$LIBS
+boost_wave_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_((FILE)?SYSTEM|DATE_TIME|THREAD)_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_DATE_TIME_LIBS \
+$BOOST_THREAD_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS \
+$BOOST_DATE_TIME_LDFLAGS $BOOST_THREAD_LDFLAGS"
+BOOST_FIND_LIB([wave], [$1],
+ [boost/wave.hpp],
+ [boost::wave::token_id id; get_token_name(id);])
+LIBS=$boost_wave_save_LIBS
+LDFLAGS=$boost_wave_save_LDFLAGS
+])# BOOST_WAVE
+
+
+# BOOST_XPRESSIVE()
+# -----------------
+# Look for Boost.Xpressive (new since 1.36.0).
+BOOST_DEFUN([Xpressive],
+[BOOST_FIND_HEADER([boost/xpressive/xpressive.hpp])])
+
+
+# ----------------- #
+# Internal helpers. #
+# ----------------- #
+
+
+# _BOOST_PTHREAD_FLAG()
+# ---------------------
+# Internal helper for BOOST_THREAD. Computes boost_cv_pthread_flag
+# which must be used in CPPFLAGS and LIBS.
+#
+# Yes, we *need* to put the -pthread thing in CPPFLAGS because with GCC3,
+# boost/thread.hpp will trigger a #error if -pthread isn't used:
+# boost/config/requires_threads.hpp:47:5: #error "Compiler threading support
+# is not turned on. Please set the correct command line options for
+# threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)"
+#
+# Based on ACX_PTHREAD: http://autoconf-archive.cryp.to/acx_pthread.html
+AC_DEFUN([_BOOST_PTHREAD_FLAG],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_LANG_PUSH([C++])dnl
+AC_CACHE_CHECK([for the flags needed to use pthreads], [boost_cv_pthread_flag],
+[ boost_cv_pthread_flag=
+ # The ordering *is* (sometimes) important. Some notes on the
+ # individual items follow:
+ # (none): in case threads are in libc; should be tried before -Kthread and
+ # other compiler flags to prevent continual compiler warnings
+ # -lpthreads: AIX (must check this before -lpthread)
+ # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+ # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+ # -llthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+ # -pthread: GNU Linux/GCC (kernel threads), BSD/GCC (userland threads)
+ # -pthreads: Solaris/GCC
+ # -mthreads: MinGW32/GCC, Lynx/GCC
+ # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+ # doesn't hurt to check since this sometimes defines pthreads too;
+ # also defines -D_REENTRANT)
+ # ... -mt is also the pthreads flag for HP/aCC
+ # -lpthread: GNU Linux, etc.
+ # --thread-safe: KAI C++
+ case $host_os in #(
+ *solaris*)
+ # On Solaris (at least, for some versions), libc contains stubbed
+ # (non-functional) versions of the pthreads routines, so link-based
+ # tests will erroneously succeed. (We need to link with -pthreads/-mt/
+ # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather
+ # a function called by this macro, so we could check for that, but
+ # who knows whether they'll stub that too in a future libc.) So,
+ # we'll just look for -pthreads and -lpthread first:
+ boost_pthread_flags="-pthreads -lpthread -mt -pthread";; #(
+ *)
+ boost_pthread_flags="-lpthreads -Kthread -kthread -llthread -pthread \
+ -pthreads -mthreads -lpthread --thread-safe -mt";;
+ esac
+ # Generate the test file.
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <pthread.h>],
+ [pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0);])])
+ for boost_pthread_flag in '' $boost_pthread_flags; do
+ boost_pthread_ok=false
+dnl Re-use the test file already generated.
+ boost_pthreads__save_LIBS=$LIBS
+ LIBS="$LIBS $boost_pthread_flag"
+ AC_LINK_IFELSE([],
+ [if grep ".*$boost_pthread_flag" conftest.err; then
+ echo "This flag seems to have triggered warnings" >&AS_MESSAGE_LOG_FD
+ else
+ boost_pthread_ok=:; boost_cv_pthread_flag=$boost_pthread_flag
+ fi])
+ LIBS=$boost_pthreads__save_LIBS
+ $boost_pthread_ok && break
+ done
+])
+AC_LANG_POP([C++])dnl
+])# _BOOST_PTHREAD_FLAG
+
+
+# _BOOST_gcc_test(MAJOR, MINOR)
+# -----------------------------
+# Internal helper for _BOOST_FIND_COMPILER_TAG.
+m4_define([_BOOST_gcc_test],
+["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC @ gcc$1$2"])dnl
+
+# _BOOST_mingw_test(MAJOR, MINOR)
+# -----------------------------
+# Internal helper for _BOOST_FIND_COMPILER_TAG.
+m4_define([_BOOST_mingw_test],
+["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC && \
+ (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \
+ || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw$1$2"])dnl
+
+
+# _BOOST_FIND_COMPILER_TAG()
+# --------------------------
+# Internal. When Boost is installed without --layout=system, each library
+# filename will hold a suffix that encodes the compiler used during the
+# build. The Boost build system seems to call this a `tag'.
+AC_DEFUN([_BOOST_FIND_COMPILER_TAG],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_CACHE_CHECK([for the toolset name used by Boost for $CXX],
+ [boost_cv_lib_tag],
+[boost_cv_lib_tag=unknown
+if test x$boost_cv_inc_path != xno; then
+ AC_LANG_PUSH([C++])dnl
+ # The following tests are mostly inspired by boost/config/auto_link.hpp
+ # The list is sorted to most recent/common to oldest compiler (in order
+ # to increase the likelihood of finding the right compiler with the
+ # least number of compilation attempt).
+ # Beware that some tests are sensible to the order (for instance, we must
+ # look for MinGW before looking for GCC3).
+ # I used one compilation test per compiler with a #error to recognize
+ # each compiler so that it works even when cross-compiling (let me know
+ # if you know a better approach).
+ # Known missing tags (known from Boost's tools/build/v2/tools/common.jam):
+ # como, edg, kcc, bck, mp, sw, tru, xlc
+ # I'm not sure about my test for `il' (be careful: Intel's ICC pre-defines
+ # the same defines as GCC's).
+ for i in \
+ _BOOST_mingw_test(5, 3) \
+ _BOOST_gcc_test(5, 3) \
+ _BOOST_mingw_test(5, 2) \
+ _BOOST_gcc_test(5, 2) \
+ _BOOST_mingw_test(5, 1) \
+ _BOOST_gcc_test(5, 1) \
+ _BOOST_mingw_test(5, 0) \
+ _BOOST_gcc_test(5, 0) \
+ _BOOST_mingw_test(4, 10) \
+ _BOOST_gcc_test(4, 10) \
+ _BOOST_mingw_test(4, 9) \
+ _BOOST_gcc_test(4, 9) \
+ _BOOST_mingw_test(4, 8) \
+ _BOOST_gcc_test(4, 8) \
+ _BOOST_mingw_test(4, 7) \
+ _BOOST_gcc_test(4, 7) \
+ _BOOST_mingw_test(4, 6) \
+ _BOOST_gcc_test(4, 6) \
+ _BOOST_mingw_test(4, 5) \
+ _BOOST_gcc_test(4, 5) \
+ _BOOST_mingw_test(4, 4) \
+ _BOOST_gcc_test(4, 4) \
+ _BOOST_mingw_test(4, 3) \
+ _BOOST_gcc_test(4, 3) \
+ _BOOST_mingw_test(4, 2) \
+ _BOOST_gcc_test(4, 2) \
+ _BOOST_mingw_test(4, 1) \
+ _BOOST_gcc_test(4, 1) \
+ _BOOST_mingw_test(4, 0) \
+ _BOOST_gcc_test(4, 0) \
+ "defined __GNUC__ && __GNUC__ == 3 && !defined __ICC \
+ && (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \
+ || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw" \
+ _BOOST_gcc_test(3, 4) \
+ _BOOST_gcc_test(3, 3) \
+ "defined _MSC_VER && _MSC_VER >= 1500 @ vc90" \
+ "defined _MSC_VER && _MSC_VER == 1400 @ vc80" \
+ _BOOST_gcc_test(3, 2) \
+ "defined _MSC_VER && _MSC_VER == 1310 @ vc71" \
+ _BOOST_gcc_test(3, 1) \
+ _BOOST_gcc_test(3, 0) \
+ "defined __BORLANDC__ @ bcb" \
+ "defined __ICC && (defined __unix || defined __unix__) @ il" \
+ "defined __ICL @ iw" \
+ "defined _MSC_VER && _MSC_VER == 1300 @ vc7" \
+ _BOOST_gcc_test(2, 95) \
+ "defined __MWERKS__ && __MWERKS__ <= 0x32FF @ cw9" \
+ "defined _MSC_VER && _MSC_VER < 1300 && !defined UNDER_CE @ vc6" \
+ "defined _MSC_VER && _MSC_VER < 1300 && defined UNDER_CE @ evc4" \
+ "defined __MWERKS__ && __MWERKS__ <= 0x31FF @ cw8"
+ do
+ boost_tag_test=`expr "X$i" : 'X\([[^@]]*\) @ '`
+ boost_tag=`expr "X$i" : 'X[[^@]]* @ \(.*\)'`
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#if $boost_tag_test
+/* OK */
+#else
+# error $boost_tag_test
+#endif
+]])], [boost_cv_lib_tag=$boost_tag; break], [])
+ done
+AC_LANG_POP([C++])dnl
+ case $boost_cv_lib_tag in #(
+ # Some newer (>= 1.35?) versions of Boost seem to only use "gcc" as opposed
+ # to "gcc41" for instance.
+ *-gcc | *'-gcc ') :;; #( Don't re-add -gcc: it's already in there.
+ gcc*)
+ boost_tag_x=
+ case $host_os in #(
+ darwin*)
+ if test $boost_major_version -ge 136; then
+ # The `x' added in r46793 of Boost.
+ boost_tag_x=x
+ fi;;
+ esac
+ # We can specify multiple tags in this variable because it's used by
+ # BOOST_FIND_LIB that does a `for tag in -$boost_cv_lib_tag' ...
+ boost_cv_lib_tag="$boost_tag_x$boost_cv_lib_tag -${boost_tag_x}gcc"
+ ;; #(
+ unknown)
+ AC_MSG_WARN([[could not figure out which toolset name to use for $CXX]])
+ boost_cv_lib_tag=
+ ;;
+ esac
+fi])dnl end of AC_CACHE_CHECK
+])# _BOOST_FIND_COMPILER_TAG
+
+
+# _BOOST_GUESS_WHETHER_TO_USE_MT()
+# --------------------------------
+# Compile a small test to try to guess whether we should favor MT (Multi
+# Thread) flavors of Boost. Sets boost_guess_use_mt accordingly.
+AC_DEFUN([_BOOST_GUESS_WHETHER_TO_USE_MT],
+[# Check whether we do better use `mt' even though we weren't ask to.
+AC_LANG_PUSH([C++])dnl
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#if defined _REENTRANT || defined _MT || defined __MT__
+/* use -mt */
+#else
+# error MT not needed
+#endif
+]])], [boost_guess_use_mt=:], [boost_guess_use_mt=false])
+AC_LANG_POP([C++])dnl
+])
+
+# _BOOST_AC_LINK_IFELSE(PROGRAM, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# -------------------------------------------------------------------
+# Fork of _AC_LINK_IFELSE that preserves conftest.o across calls. Fragile,
+# will break when Autoconf changes its internals. Requires that you manually
+# rm -f conftest.$ac_objext in between to really different tests, otherwise
+# you will try to link a conftest.o left behind by a previous test.
+# Used to aggressively optimize BOOST_FIND_LIB (see the big comment in this
+# macro).
+#
+# Don't use "break" in the actions, as it would short-circuit some code
+# this macro runs after the actions.
+m4_define([_BOOST_AC_LINK_IFELSE],
+[m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl
+rm -f conftest$ac_exeext
+boost_save_ac_ext=$ac_ext
+boost_use_source=:
+# If we already have a .o, re-use it. We change $ac_ext so that $ac_link
+# tries to link the existing object file instead of compiling from source.
+test -f conftest.$ac_objext && ac_ext=$ac_objext && boost_use_source=false &&
+ _AS_ECHO_LOG([re-using the existing conftest.$ac_objext])
+AS_IF([_AC_DO_STDERR($ac_link) && {
+ test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_executable_p conftest$ac_exeext
+dnl FIXME: use AS_TEST_X instead when 2.61 is widespread enough.
+ }],
+ [$2],
+ [if $boost_use_source; then
+ _AC_MSG_LOG_CONFTEST
+ fi
+ $3])
+ac_objext=$boost_save_ac_objext
+ac_ext=$boost_save_ac_ext
+dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization)
+dnl information created by the PGI compiler (conftest_ipa8_conftest.oo),
+dnl as it would interfere with the next link command.
+rm -f core conftest.err conftest_ipa8_conftest.oo \
+ conftest$ac_exeext m4_ifval([$1], [conftest.$ac_ext])[]dnl
+])# _BOOST_AC_LINK_IFELSE
+
+# Local Variables:
+# mode: autoconf
+# End:
diff --git a/configure b/configure
new file mode 100755
index 0000000..9f5daa8
--- /dev/null
+++ b/configure
@@ -0,0 +1,4568 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.68 for SPP 1.7.
+#
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='print -r --'
+ as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='printf %s\n'
+ as_echo_n='printf %s'
+else
+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+ as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+ as_echo_n='/usr/ucb/echo -n'
+ else
+ as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+ as_echo_n_body='eval
+ arg=$1;
+ case $arg in #(
+ *"$as_nl"*)
+ expr "X$arg" : "X\\(.*\\)$as_nl";
+ arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+ esac;
+ expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+ '
+ export as_echo_n_body
+ as_echo_n='sh -c $as_echo_n_body as_echo'
+ fi
+ export as_echo_body
+ as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+ (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+ PATH_SEPARATOR=';'
+ }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there. '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+if test "x$CONFIG_SHELL" = x; then
+ as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '\${1+\"\$@\"}'='\"\$@\"'
+ setopt NO_GLOB_SUBST
+else
+ case \`(set -o) 2>/dev/null\` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+"
+ as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
+
+else
+ exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1"
+ as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+ as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+ eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+ test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+ if (eval "$as_required") 2>/dev/null; then :
+ as_have_required=yes
+else
+ as_have_required=no
+fi
+ if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
+
+else
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ as_found=:
+ case $as_dir in #(
+ /*)
+ for as_base in sh bash ksh sh5; do
+ # Try only shells that exist, to save several forks.
+ as_shell=$as_dir/$as_base
+ if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+ { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
+ CONFIG_SHELL=$as_shell as_have_required=yes
+ if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
+ break 2
+fi
+fi
+ done;;
+ esac
+ as_found=false
+done
+$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+ { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
+ CONFIG_SHELL=$SHELL as_have_required=yes
+fi; }
+IFS=$as_save_IFS
+
+
+ if test "x$CONFIG_SHELL" != x; then :
+ # We cannot yet assume a decent shell, so we have to provide a
+ # neutralization value for shells without unset; and this also
+ # works around shells that cannot unset nonexistent variables.
+ # Preserve -v and -x to the replacement shell.
+ BASH_ENV=/dev/null
+ ENV=/dev/null
+ (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+ export CONFIG_SHELL
+ case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+ esac
+ exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"}
+fi
+
+ if test x$as_have_required = xno; then :
+ $as_echo "$0: This script requires a shell more modern than all"
+ $as_echo "$0: the shells that I found on your system."
+ if test x${ZSH_VERSION+set} = xset ; then
+ $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+ $as_echo "$0: be upgraded to zsh 4.3.4 or later."
+ else
+ $as_echo "$0: Please tell bug-autoconf at gnu.org about your system,
+$0: including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
+ fi
+ exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+ { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+ return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+ set +e
+ as_fn_set_status $1
+ exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || eval $as_mkdir_p || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+ eval 'as_fn_append ()
+ {
+ eval $1+=\$2
+ }'
+else
+ as_fn_append ()
+ {
+ eval $1=\$$1\$2
+ }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+ eval 'as_fn_arith ()
+ {
+ as_val=$(( $* ))
+ }'
+else
+ as_fn_arith ()
+ {
+ as_val=`expr "$@" || test $? -eq 1`
+ }
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+ fi
+ $as_echo "$as_me: error: $2" >&2
+ as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+ as_lineno_1=$LINENO as_lineno_1a=$LINENO
+ as_lineno_2=$LINENO as_lineno_2a=$LINENO
+ eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+ test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+ # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+ case `echo 'xy\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ xy) ECHO_C='\c';;
+ *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
+ ECHO_T=' ';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -p'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -p'
+ elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+ else
+ as_ln_s='cp -p'
+ fi
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p='mkdir -p "$as_dir"'
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+ as_test_x='test -x'
+else
+ if ls -dL / >/dev/null 2>&1; then
+ as_ls_L_option=L
+ else
+ as_ls_L_option=
+ fi
+ as_test_x='
+ eval sh -c '\''
+ if test -d "$1"; then
+ test -d "$1/.";
+ else
+ case $1 in #(
+ -*)set "./$1";;
+ esac;
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+ ???[sx]*):;;*)false;;esac;fi
+ '\'' sh
+ '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='SPP'
+PACKAGE_TARNAME='spp'
+PACKAGE_VERSION='1.7'
+PACKAGE_STRING='SPP 1.7'
+PACKAGE_BUGREPORT=''
+PACKAGE_URL=''
+
+ac_subst_vars='LTLIBOBJS
+LIBOBJS
+SED
+CXXCPP
+BOOST_CPPFLAGS
+DISTCHECK_CONFIGURE_FLAGS
+BOOST_ROOT
+GREP
+ac_ct_CXX
+CXXFLAGS
+CXX
+HAVE_LIBBZ2
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CFLAGS
+CC
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+with_boost
+'
+ ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CXX
+CXXFLAGS
+CCC
+BOOST_ROOT
+CXXCPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval $ac_prev=\$ac_option
+ ac_prev=
+ continue
+ fi
+
+ case $ac_option in
+ *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+ *=) ac_optarg= ;;
+ *) ac_optarg=yes ;;
+ esac
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_dashdash$ac_option in
+ --)
+ ac_dashdash=yes ;;
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=*)
+ datadir=$ac_optarg ;;
+
+ -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+ | --dataroo | --dataro | --datar)
+ ac_prev=datarootdir ;;
+ -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+ datarootdir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid feature name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"enable_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval enable_$ac_useropt=no ;;
+
+ -docdir | --docdir | --docdi | --doc | --do)
+ ac_prev=docdir ;;
+ -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+ docdir=$ac_optarg ;;
+
+ -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+ ac_prev=dvidir ;;
+ -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+ dvidir=$ac_optarg ;;
+
+ -enable-* | --enable-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid feature name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"enable_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval enable_$ac_useropt=\$ac_optarg ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+ ac_prev=htmldir ;;
+ -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+ | --ht=*)
+ htmldir=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localedir | --localedir | --localedi | --localed | --locale)
+ ac_prev=localedir ;;
+ -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+ localedir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst | --locals)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+ ac_prev=pdfdir ;;
+ -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+ pdfdir=$ac_optarg ;;
+
+ -psdir | --psdir | --psdi | --psd | --ps)
+ ac_prev=psdir ;;
+ -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+ psdir=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid package name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"with_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval with_$ac_useropt=\$ac_optarg ;;
+
+ -without-* | --without-*)
+ ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+ as_fn_error $? "invalid package name: $ac_useropt"
+ ac_useropt_orig=$ac_useropt
+ ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+ case $ac_user_opts in
+ *"
+"with_$ac_useropt"
+"*) ;;
+ *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+ ac_unrecognized_sep=', ';;
+ esac
+ eval with_$ac_useropt=no ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ case $ac_envvar in #(
+ '' | [0-9]* | *[!_$as_cr_alnum]* )
+ as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+ esac
+ eval $ac_envvar=\$ac_optarg
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+ case $enable_option_checking in
+ no) ;;
+ fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+ *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+ esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
+ datadir sysconfdir sharedstatedir localstatedir includedir \
+ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+ libdir localedir mandir
+do
+ eval ac_val=\$$ac_var
+ # Remove trailing slashes.
+ case $ac_val in
+ */ )
+ ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+ eval $ac_var=\$ac_val;;
+ esac
+ # Be sure to have absolute directory names.
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) continue;;
+ NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+ esac
+ as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used" >&2
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+ as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+ as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then the parent directory.
+ ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_myself" : 'X\(//\)[^/]' \| \
+ X"$as_myself" : 'X\(//\)$' \| \
+ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r "$srcdir/$ac_unique_file"; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+ test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+ as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+ cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+ pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+ srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+ eval ac_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_env_${ac_var}_value=\$${ac_var}
+ eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures SPP 1.7 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking ...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --datarootdir=DIR read-only arch.-independent data root [PREFIX/share]
+ --datadir=DIR read-only architecture-independent data [DATAROOTDIR]
+ --infodir=DIR info documentation [DATAROOTDIR/info]
+ --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
+ --mandir=DIR man documentation [DATAROOTDIR/man]
+ --docdir=DIR documentation root [DATAROOTDIR/doc/spp]
+ --htmldir=DIR html documentation [DOCDIR]
+ --dvidir=DIR dvi documentation [DOCDIR]
+ --pdfdir=DIR pdf documentation [DOCDIR]
+ --psdir=DIR ps documentation [DOCDIR]
+_ACEOF
+
+ cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+ case $ac_init_help in
+ short | recursive ) echo "Configuration of SPP 1.7:";;
+ esac
+ cat <<\_ACEOF
+
+Optional Packages:
+ --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-boost=DIR prefix of Boost 1.41.0 [guess]
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ LIBS libraries to pass to the linker, e.g. -l<library>
+ CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+ you have headers in a nonstandard directory <include dir>
+ CXX C++ compiler command
+ CXXFLAGS C++ compiler flags
+ BOOST_ROOT Location of Boost installation
+ CXXCPP C++ preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to the package provider.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d "$ac_dir" ||
+ { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+ continue
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+ cd "$ac_dir" || { ac_status=$?; continue; }
+ # Check for guested configure.
+ if test -f "$ac_srcdir/configure.gnu"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+ elif test -f "$ac_srcdir/configure"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure" --help=recursive
+ else
+ $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi || ac_status=$?
+ cd "$ac_pwd" || { ac_status=$?; break; }
+ done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+ cat <<\_ACEOF
+SPP configure 1.7
+generated by GNU Autoconf 2.68
+
+Copyright (C) 2010 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext
+ if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext conftest$ac_exeext
+ if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+ # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+ # interfere with the next link command; also delete a directory that is
+ # left behind by Apple's compiler. We do this before executing the actions.
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_cxx_try_compile LINENO
+# ----------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext
+ if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_cxx_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_compile
+
+# ac_fn_cxx_try_cpp LINENO
+# ------------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_cpp ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } > conftest.i && {
+ test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" ||
+ test ! -s conftest.err
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_cpp
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by SPP $as_me 1.7, which was
+generated by GNU Autoconf 2.68. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ $as_echo "PATH: $as_dir"
+ done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *\'*)
+ ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+ 2)
+ as_fn_append ac_configure_args1 " '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ as_fn_append ac_configure_args " '$ac_arg'"
+ ;;
+ esac
+ done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ $as_echo "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+(
+ for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+ *) { eval $ac_var=; unset $ac_var;} ;;
+ esac ;;
+ esac
+ done
+ (set) 2>&1 |
+ case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ sed -n \
+ "s/'\''/'\''\\\\'\'''\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+ ;; #(
+ *)
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+)
+ echo
+
+ $as_echo "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ $as_echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ $as_echo "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ $as_echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ $as_echo "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+ echo
+ cat confdefs.h
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ $as_echo "$as_me: caught signal $ac_signal"
+ $as_echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core *.core core.conftest.* &&
+ rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+$as_echo "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_URL "$PACKAGE_URL"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+ # We do not want a PATH search for config.site.
+ case $CONFIG_SITE in #((
+ -*) ac_site_file1=./$CONFIG_SITE;;
+ */*) ac_site_file1=$CONFIG_SITE;;
+ *) ac_site_file1=./$CONFIG_SITE;;
+ esac
+elif test "x$prefix" != xNONE; then
+ ac_site_file1=$prefix/share/config.site
+ ac_site_file2=$prefix/etc/config.site
+else
+ ac_site_file1=$ac_default_prefix/share/config.site
+ ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+ test "x$ac_site_file" = xNONE && continue
+ if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file" \
+ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special files
+ # actually), so we avoid doing that. DJGPP emulates it as a regular file.
+ if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . "$cache_file";;
+ *) . "./$cache_file";;
+ esac
+ fi
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val=\$ac_cv_env_${ac_var}_value
+ eval ac_new_val=\$ac_env_${ac_var}_value
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ # differences in whitespace do not lead to failure.
+ ac_old_val_w=`echo x $ac_old_val`
+ ac_new_val_w=`echo x $ac_new_val`
+ if test "$ac_old_val_w" != "$ac_new_val_w"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ ac_cache_corrupted=:
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+ eval $ac_var=\$ac_old_val
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5
+$as_echo "$as_me: former value: \`$ac_old_val'" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5
+$as_echo "$as_me: current value: \`$ac_new_val'" >&2;}
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ fi
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl.exe
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl.exe
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CC" && break
+done
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+ * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+ esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link_default") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile. We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+ then :; else
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ fi
+ # We set ac_cv_exeext here because the later test for it is not
+ # safe: cross compilers may not add the suffix if given an `-o'
+ # argument, so we may need to know it at that point already.
+ # Even if this section looks crufty: it has the advantage of
+ # actually working.
+ break;;
+ * )
+ break;;
+ esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+ ac_file=''
+fi
+if test -z "$ac_file"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+ { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+ if { ac_try='./conftest$ac_cv_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if ${ac_cv_objext+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then :
+ for ac_file in conftest.o conftest.obj conftest.*; do
+ test -f "$ac_file" || continue;
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if ${ac_cv_c_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GCC=yes
+else
+ GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if ${ac_cv_prog_cc_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_c_werror_flag=$ac_c_werror_flag
+ ac_c_werror_flag=yes
+ ac_cv_prog_cc_g=no
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+else
+ CFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+else
+ ac_c_werror_flag=$ac_save_c_werror_flag
+ CFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if ${ac_cv_prog_cc_c89+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+ inside strings and character constants. */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext
+ test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+ x)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+ xno)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+ *)
+ CC="$CC $ac_cv_prog_cc_c89"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+if test "x$ac_cv_prog_cc_c89" != xno; then :
+
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BZ2_bzDecompressInit in -lbz2" >&5
+$as_echo_n "checking for BZ2_bzDecompressInit in -lbz2... " >&6; }
+if ${ac_cv_lib_bz2_BZ2_bzDecompressInit+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lbz2 $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char BZ2_bzDecompressInit ();
+int
+main ()
+{
+return BZ2_bzDecompressInit ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_bz2_BZ2_bzDecompressInit=yes
+else
+ ac_cv_lib_bz2_BZ2_bzDecompressInit=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bz2_BZ2_bzDecompressInit" >&5
+$as_echo "$ac_cv_lib_bz2_BZ2_bzDecompressInit" >&6; }
+if test "x$ac_cv_lib_bz2_BZ2_bzDecompressInit" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBBZ2 1
+_ACEOF
+
+ LIBS="-lbz2 $LIBS"
+
+fi
+
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+ if test -n "$CCC"; then
+ CXX=$CCC
+ else
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CXX+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CXX"; then
+ ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5
+$as_echo "$CXX" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CXX" && break
+ done
+fi
+if test -z "$CXX"; then
+ ac_ct_CXX=$CXX
+ for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CXX"; then
+ ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_ac_ct_CXX="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5
+$as_echo "$ac_ct_CXX" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CXX" && break
+done
+
+ if test "x$ac_ct_CXX" = x; then
+ CXX="g++"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CXX=$ac_ct_CXX
+ fi
+fi
+
+ fi
+fi
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
+$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5
+$as_echo "$ac_cv_cxx_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GXX=yes
+else
+ GXX=
+fi
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
+$as_echo_n "checking whether $CXX accepts -g... " >&6; }
+if ${ac_cv_prog_cxx_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+ ac_cxx_werror_flag=yes
+ ac_cv_prog_cxx_g=no
+ CXXFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_cv_prog_cxx_g=yes
+else
+ CXXFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+else
+ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+ CXXFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_cv_prog_cxx_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5
+$as_echo "$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+ CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+ if test "$GXX" = yes; then
+ CXXFLAGS="-g -O2"
+ else
+ CXXFLAGS="-g"
+ fi
+else
+ if test "$GXX" = yes; then
+ CXXFLAGS="-O2"
+ else
+ CXXFLAGS=
+ fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -z "$GREP"; then
+ ac_path_GREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_GREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_GREP"; then
+ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
+$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
+if test -z "$CXXCPP"; then
+ if ${ac_cv_prog_CXXCPP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ # Double quotes because CXXCPP needs to be expanded
+ for CXXCPP in "$CXX -E" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+ break
+fi
+
+ done
+ ac_cv_prog_CXXCPP=$CXXCPP
+
+fi
+ CXXCPP=$ac_cv_prog_CXXCPP
+else
+ ac_cv_prog_CXXCPP=$CXXCPP
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5
+$as_echo "$CXXCPP" >&6; }
+ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+
+else
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_cxx_try_cpp "$LINENO"; then :
+ # Broken: success on invalid input.
+continue
+else
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
+$as_echo_n "checking for a sed that does not truncate output... " >&6; }
+if ${ac_cv_path_SED+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
+ for ac_i in 1 2 3 4 5 6 7; do
+ ac_script="$ac_script$as_nl$ac_script"
+ done
+ echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed
+ { ac_script=; unset ac_script;}
+ if test -z "$SED"; then
+ ac_path_SED_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in sed gsed; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+# Check for GNU ac_path_SED and select it if it is found.
+ # Check for GNU $ac_path_SED
+case `"$ac_path_SED" --version 2>&1` in
+*GNU*)
+ ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo '' >> "conftest.nl"
+ "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_SED_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_SED="$ac_path_SED"
+ ac_path_SED_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_SED_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_SED"; then
+ as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
+ fi
+else
+ ac_cv_path_SED=$SED
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5
+$as_echo "$ac_cv_path_SED" >&6; }
+ SED="$ac_cv_path_SED"
+ rm -f conftest.sed
+
+echo "$as_me: this is boost.m4 serial 25" >&5
+boost_save_IFS=$IFS
+boost_version_req=1.41.0
+IFS=.
+set x $boost_version_req 0 0 0
+IFS=$boost_save_IFS
+shift
+boost_version_req=`expr "$1" '*' 100000 + "$2" '*' 100 + "$3"`
+boost_version_req_string=$1.$2.$3
+
+# Check whether --with-boost was given.
+if test "${with_boost+set}" = set; then :
+ withval=$with_boost;
+fi
+# If BOOST_ROOT is set and the user has not provided a value to
+# --with-boost, then treat BOOST_ROOT as if it the user supplied it.
+if test x"$BOOST_ROOT" != x; then
+ if test x"$with_boost" = x; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT" >&5
+$as_echo "$as_me: Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT" >&6;}
+ with_boost=$BOOST_ROOT
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost" >&5
+$as_echo "$as_me: Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost" >&6;}
+ fi
+fi
+DISTCHECK_CONFIGURE_FLAGS="$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"
+boost_save_CPPFLAGS=$CPPFLAGS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Boost headers version >= $boost_version_req_string" >&5
+$as_echo_n "checking for Boost headers version >= $boost_version_req_string... " >&6; }
+if ${boost_cv_inc_path+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ boost_cv_inc_path=no
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <boost/version.hpp>
+#if !defined BOOST_VERSION
+# error BOOST_VERSION is not defined
+#elif BOOST_VERSION < $boost_version_req
+# error Boost headers version < $boost_version_req
+#endif
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ # If the user provided a value to --with-boost, use it and only it.
+ case $with_boost in #(
+ ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \
+ /usr/include C:/Boost/include;; #(
+ *) set x "$with_boost/include" "$with_boost";;
+ esac
+ shift
+ for boost_dir
+ do
+ # Without --layout=system, Boost (or at least some versions) installs
+ # itself in <prefix>/include/boost-<version>. This inner loop helps to
+ # find headers in such directories.
+ #
+ # Any ${boost_dir}/boost-x_xx directories are searched in reverse version
+ # order followed by ${boost_dir}. The final '.' is a sentinel for
+ # searching $boost_dir" itself. Entries are whitespace separated.
+ #
+ # I didn't indent this loop on purpose (to avoid over-indented code)
+ boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \
+ && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \
+ && echo .`
+ for boost_inc in $boost_layout_system_search_list
+ do
+ if test x"$boost_inc" != x.; then
+ boost_inc="$boost_dir/$boost_inc"
+ else
+ boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list
+ fi
+ if test x"$boost_inc" != x; then
+ # We are going to check whether the version of Boost installed
+ # in $boost_inc is usable by running a compilation that
+ # #includes it. But if we pass a -I/some/path in which Boost
+ # is not installed, the compiler will just skip this -I and
+ # use other locations (either from CPPFLAGS, or from its list
+ # of system include directories). As a result we would use
+ # header installed on the machine instead of the /some/path
+ # specified by the user. So in that precise case (trying
+ # $boost_inc), make sure the version.hpp exists.
+ #
+ # Use test -e as there can be symlinks.
+ test -e "$boost_inc/boost/version.hpp" || continue
+ CPPFLAGS="$CPPFLAGS -I$boost_inc"
+ fi
+ if ac_fn_cxx_try_compile "$LINENO"; then :
+ boost_cv_inc_path=yes
+else
+ boost_cv_version=no
+fi
+rm -f core conftest.err conftest.$ac_objext
+ if test x"$boost_cv_inc_path" = xyes; then
+ if test x"$boost_inc" != x; then
+ boost_cv_inc_path=$boost_inc
+ fi
+ break 2
+ fi
+ done
+ done
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $boost_cv_inc_path" >&5
+$as_echo "$boost_cv_inc_path" >&6; }
+ case $boost_cv_inc_path in #(
+ no)
+ boost_errmsg="cannot find Boost headers version >= $boost_version_req_string"
+ as_fn_error $? "$boost_errmsg" "$LINENO" 5
+
+ ;;#(
+ yes)
+ BOOST_CPPFLAGS=
+ ;;#(
+ *)
+ BOOST_CPPFLAGS="-I$boost_cv_inc_path"
+ ;;
+ esac
+ if test x"$boost_cv_inc_path" != xno; then
+
+$as_echo "#define HAVE_BOOST 1" >>confdefs.h
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Boost's header version" >&5
+$as_echo_n "checking for Boost's header version... " >&6; }
+if ${boost_cv_lib_version+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <boost/version.hpp>
+boost-lib-version = BOOST_LIB_VERSION
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ grep -v '#' |
+ tr -d '\r' |
+ tr -s '\n' ' ' |
+ $SED -n -e "/^boost-lib-version = /{s///;s/[\" ]//g;p;q;}" >conftest.i 2>&1; then :
+ boost_cv_lib_version=`cat conftest.i`
+fi
+rm -rf conftest*
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $boost_cv_lib_version" >&5
+$as_echo "$boost_cv_lib_version" >&6; }
+ # e.g. "134" for 1_34_1 or "135" for 1_35
+ boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'`
+ case $boost_major_version in #(
+ '' | *[!0-9]*)
+ as_fn_error $? "invalid value: boost_major_version='$boost_major_version'" "$LINENO" 5
+ ;;
+ esac
+fi
+CPPFLAGS=$boost_save_CPPFLAGS
+
+BOOST_CPPFLAGS="${BOOST_CPPFLAGS}"
+
+ac_config_files="$ac_config_files src/Makevars"
+
+cp confdefs.h src/config.h
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+ for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+ *) { eval $ac_var=; unset $ac_var;} ;;
+ esac ;;
+ esac
+ done
+
+ (set) 2>&1 |
+ case $as_nl`(ac_space=' '; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ # `set' does not quote correctly, so add quotes: double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \.
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;; #(
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+) |
+ sed '
+ /^ac_cv_env_/b end
+ t clear
+ :clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+ if test -w "$cache_file"; then
+ if test "x$cache_file" != "x/dev/null"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+ if test ! -f "$cache_file" || test -h "$cache_file"; then
+ cat confcache >"$cache_file"
+ else
+ case $cache_file in #(
+ */* | ?:*)
+ mv -f confcache "$cache_file"$$ &&
+ mv -f "$cache_file"$$ "$cache_file" ;; #(
+ *)
+ mv -f confcache "$cache_file" ;;
+ esac
+ fi
+ fi
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then branch to the quote section. Otherwise,
+# look for a macro that doesn't take arguments.
+ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
+t clear
+:clear
+s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g
+t quote
+s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g
+t quote
+b any
+:quote
+s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\[/\\&/g
+s/\]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+ g
+ s/^\n//
+ s/\n/ /g
+ p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
+
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+ ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+ # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
+ # will be set to the directory where LIBOBJS objects are built.
+ as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+ as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+ emulate sh
+ NULLCMD=:
+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in #(
+ *posix*) :
+ set -o posix ;; #(
+ *) :
+ ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='print -r --'
+ as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+ as_echo='printf %s\n'
+ as_echo_n='printf %s'
+else
+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+ as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+ as_echo_n='/usr/ucb/echo -n'
+ else
+ as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+ as_echo_n_body='eval
+ arg=$1;
+ case $arg in #(
+ *"$as_nl"*)
+ expr "X$arg" : "X\\(.*\\)$as_nl";
+ arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+ esac;
+ expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+ '
+ export as_echo_n_body
+ as_echo_n='sh -c $as_echo_n_body as_echo'
+ fi
+ export as_echo_body
+ as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+ (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+ PATH_SEPARATOR=';'
+ }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+ done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there. '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+ fi
+ $as_echo "$as_me: error: $2" >&2
+ as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+ return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+ set +e
+ as_fn_set_status $1
+ exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+ { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+ eval 'as_fn_append ()
+ {
+ eval $1+=\$2
+ }'
+else
+ as_fn_append ()
+ {
+ eval $1=\$$1\$2
+ }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+ eval 'as_fn_arith ()
+ {
+ as_val=$(( $* ))
+ }'
+else
+ as_fn_arith ()
+ {
+ as_val=`expr "$@" || test $? -eq 1`
+ }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+ case `echo 'xy\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ xy) ECHO_C='\c';;
+ *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
+ ECHO_T=' ';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -p'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -p'
+ elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+ else
+ as_ln_s='cp -p'
+ fi
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || eval $as_mkdir_p || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p='mkdir -p "$as_dir"'
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+ as_test_x='test -x'
+else
+ if ls -dL / >/dev/null 2>&1; then
+ as_ls_L_option=L
+ else
+ as_ls_L_option=
+ fi
+ as_test_x='
+ eval sh -c '\''
+ if test -d "$1"; then
+ test -d "$1/.";
+ else
+ case $1 in #(
+ -*)set "./$1";;
+ esac;
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+ ???[sx]*):;;*)false;;esac;fi
+ '\'' sh
+ '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by SPP $as_me 1.7, which was
+generated by GNU Autoconf 2.68. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration. Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number and configuration settings, then exit
+ --config print configuration, then exit
+ -q, --quiet, --silent
+ do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Report bugs to the package provider."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
+ac_cs_version="\\
+SPP config.status 1.7
+configured by $0, generated by GNU Autoconf 2.68,
+ with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2010 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=?*)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ --*=)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=
+ ac_shift=:
+ ;;
+ *)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+ $as_echo "$ac_cs_version"; exit ;;
+ --config | --confi | --conf | --con | --co | --c )
+ $as_echo "$ac_cs_config"; exit ;;
+ --debug | --debu | --deb | --de | --d | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ case $ac_optarg in
+ *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ '') as_fn_error $? "missing file argument" ;;
+ esac
+ as_fn_append CONFIG_FILES " '$ac_optarg'"
+ ac_need_defaults=false;;
+ --he | --h | --help | --hel | -h )
+ $as_echo "$ac_cs_usage"; exit ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+ *) as_fn_append ac_config_targets " $1"
+ ac_need_defaults=false ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+ set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+ shift
+ \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+ CONFIG_SHELL='$SHELL'
+ export CONFIG_SHELL
+ exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+ $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+ case $ac_config_target in
+ "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;;
+
+ *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+ esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+ tmp= ac_tmp=
+ trap 'exit_status=$?
+ : "${ac_tmp:=$tmp}"
+ { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+ trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+ test -d "$tmp"
+} ||
+{
+ tmp=./conf$$-$RANDOM
+ (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+ eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+ ac_cs_awk_cr='\\r'
+else
+ ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+ echo "cat >conf$$subs.awk <<_ACEOF" &&
+ echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+ echo "_ACEOF"
+} >conf$$subs.sh ||
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+ . ./conf$$subs.sh ||
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+ ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+ if test $ac_delim_n = $ac_delim_num; then
+ break
+ elif $ac_last_try; then
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ else
+ ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+ fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+ N
+ s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+ for (key in S) S_is_set[key] = 1
+ FS = ""
+
+}
+{
+ line = $ 0
+ nfields = split(line, field, "@")
+ substed = 0
+ len = length(field[1])
+ for (i = 2; i < nfields; i++) {
+ key = field[i]
+ keylen = length(key)
+ if (S_is_set[key]) {
+ value = S[key]
+ line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+ len += length(value) + length(field[++i])
+ substed = 1
+ } else
+ len += 1 + keylen
+ }
+
+ print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+ sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+ cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
+h
+s///
+s/^/:/
+s/[ ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[ ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[ ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+
+eval set X " :F $CONFIG_FILES "
+shift
+for ac_tag
+do
+ case $ac_tag in
+ :[FHLC]) ac_mode=$ac_tag; continue;;
+ esac
+ case $ac_mode$ac_tag in
+ :[FHL]*:*);;
+ :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+ :[FH]-) ac_tag=-:-;;
+ :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+ esac
+ ac_save_IFS=$IFS
+ IFS=:
+ set x $ac_tag
+ IFS=$ac_save_IFS
+ shift
+ ac_file=$1
+ shift
+
+ case $ac_mode in
+ :L) ac_source=$1;;
+ :[FH])
+ ac_file_inputs=
+ for ac_f
+ do
+ case $ac_f in
+ -) ac_f="$ac_tmp/stdin";;
+ *) # Look for the file first in the build tree, then in the source tree
+ # (if the path is not absolute). The absolute path cannot be DOS-style,
+ # because $ac_f cannot contain `:'.
+ test -f "$ac_f" ||
+ case $ac_f in
+ [\\/$]*) false;;
+ *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+ esac ||
+ as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+ esac
+ case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+ as_fn_append ac_file_inputs " '$ac_f'"
+ done
+
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ configure_input='Generated from '`
+ $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+ `' by configure.'
+ if test x"$ac_file" != x-; then
+ configure_input="$ac_file. $configure_input"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+ fi
+ # Neutralize special characters interpreted by sed in replacement strings.
+ case $configure_input in #(
+ *\&* | *\|* | *\\* )
+ ac_sed_conf_input=`$as_echo "$configure_input" |
+ sed 's/[\\\\&|]/\\\\&/g'`;; #(
+ *) ac_sed_conf_input=$configure_input;;
+ esac
+
+ case $ac_tag in
+ *:-:* | *:-) cat >"$ac_tmp/stdin" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+ esac
+ ;;
+ esac
+
+ ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ as_dir="$ac_dir"; as_fn_mkdir_p
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+ case $ac_mode in
+ :F)
+ #
+ # CONFIG_FILE
+ #
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+ p
+ q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ ac_datarootdir_hack='
+ s&@datadir@&$datadir&g
+ s&@docdir@&$docdir&g
+ s&@infodir@&$infodir&g
+ s&@localedir@&$localedir&g
+ s&@mandir@&$mandir&g
+ s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+ { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+ { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \
+ "$ac_tmp/out"`; test -z "$ac_out"; } &&
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined" >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined" >&2;}
+
+ rm -f "$ac_tmp/stdin"
+ case $ac_file in
+ -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+ *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+ esac \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+
+
+
+ esac
+
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+ as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/configure.ac b/configure.ac
new file mode 100755
index 0000000..4dfe0cd
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,9 @@
+AC_INIT([SPP], 1.7)
+AC_CHECK_LIB(bz2, BZ2_bzDecompressInit)
+AC_SUBST(HAVE_LIBBZ2)
+BOOST_REQUIRE([1.41.0])
+BOOST_CPPFLAGS="${BOOST_CPPFLAGS}"
+AC_SUBST(BOOST_CPPFLAGS)
+AC_CONFIG_FILES([src/Makevars])
+cp confdefs.h src/config.h
+AC_OUTPUT
diff --git a/man/add.broad.peak.regions.Rd b/man/add.broad.peak.regions.Rd
new file mode 100644
index 0000000..24355db
--- /dev/null
+++ b/man/add.broad.peak.regions.Rd
@@ -0,0 +1,27 @@
+\name{add.broad.peak.regions}
+\alias{add.broad.peak.regions}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate chromosome-wide profiles of smoothed tag density }
+\description{
+ Looks for broader regions of enrichment associated with the determined
+ peak positions, adds them to the $npl data as $rs, $re columns.
+}
+\usage{
+add.broad.peak.regions(signal.tags, control.tags, binding.postions,window.size=500,z.thr=2)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output
+ of \code{\link{select.informative.tags}} }
+ \item{control.tags}{ optionall control (input) tags }
+ \item{binding.positions}{ output of find.binding.positions call }
+ \item{window.size}{ window size to be used in calculating enrichment }
+ \item{z.thr}{ Z-score corresponding to the Poisson ratio threshold
+ used to flag significantly enriched windows}
+}
+\value{
+ A structure identical to binding.postions with two additional columns
+ added (rs and re) corresponding to start and end of the associated
+ significantly enriched region. If no region was associated with a
+ particular peak, NAs values are reported.
+}
diff --git a/man/find.binding.positions.Rd b/man/find.binding.positions.Rd
new file mode 100644
index 0000000..5b67e88
--- /dev/null
+++ b/man/find.binding.positions.Rd
@@ -0,0 +1,128 @@
+\name{find.binding.positions}
+\alias{find.binding.positions}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Determine significant point protein binding positions (peaks) }
+\description{
+ Given the signal and optional control (input) data, determine location of the
+ statistically significant point binding positions. If the control data
+ is not provided, the statistical significance can be assessed based on
+ tag randomization. The method also provides options for masking
+ regions exhibiting strong signals within the control data.
+}
+\usage{
+find.binding.positions(signal.data, e.value = NULL, fdr = NULL, masked.data = NULL, control.data = NULL, min.dist = 200, window.size = 4e+07, cluster = NULL, debug = T, n.randomizations = 3, shuffle.window = 1, min.thr = 0, topN = NULL, tag.count.whs = 100, enrichment.z = 2, method = tag.wtd, tec.filter = T, tec.window.size = 10000, tec.masking.window.size=tec.window.size, tec.z = 5, tec.poisson.z=5,tec.poisson.ratio=5, n.control.samples = 1, enrichment.background.scales = c(1, 5, 10), b [...]
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ ~~ tag data ~~
+ \item{signal.data}{ signal tag vector list }
+ \item{control.data}{ optional control (input) tag vector list }
+
+ ~~ position stringency criteria ~~
+ \item{e.value}{ E-value defining the desired statistical significance
+ of binding positions. }
+ \item{fdr}{ FDR defining statistical significance of binding positions }
+ \item{topN}{ instead of determining statistical significance
+ thresholds, return the specified number of highest-scoring
+ positions}
+
+ ~~ other params ~~
+ \item{whs}{ window half-sized that should be used for binding
+ detection (e.g. determined from cross-correlation profiles)}
+ \item{masked.data}{ optional set of coordinates that should be masked
+ (e.g. known non-unique regions) }
+ \item{min.dist}{ minimal distance that must separate detected binding
+ positions. In case multiple binding positions are detected within
+ such distance, the position with the highest score is returned. }
+ \item{window.size}{ size of the window used to segment the chromosome
+ during calculations to reduce memory usage. }
+ \item{cluster}{ optional \code{snow} cluster to parallelize the
+ processing on }
+ \item{min.thr}{ minimal score requirement for a peak }
+ \item{background.density.scaling}{ If TRUE, regions of significant tag
+ enrichment will be masked out when calculating size ratio of the
+ signal to control datasets (to estimate ratio of the background tag
+ density). If FALSE, the dataset ratio will be equal to the ratio of
+ the number of tags in each dataset.}
+
+ ~~ randomized controls ~~
+ \item{n.randomizations}{ number of tag randomziations that should be
+ performed (when the control data is not provided) }
+ \item{use.randomized.controls}{ Use randomized tag control, even if
+ \code{control.data} is supplied. }
+ \item{shuffle.window}{ during tag randomizations, tags will be split
+ into groups of \code{shuffle.window} and will be maintained
+ together throughout the randomization. }
+
+ ~~ fold-enrichment confidence intervals
+ \item{tag.count.whs}{ half-size of a window used to assess fold
+ enrichment of a binding position}
+ \item{enrichment.z}{ Z-score used to define the significance level of
+ the fold-enrichment confidence intervals }
+ \item{enrichment.background.scales}{ In estimating the peak
+ fold-enrichment confidence intervals, the background tag density is
+ estimated based on windows with half-sizes of
+ \code{2*tag.count.whs*enrichment.background.scales}. }
+ \item{method}{ either \code{tag.wtd} for WTD method, or
+ \code{tag.lwcc} for MTC method}
+ \item{mle.filter}{ If turned on, will exclude predicted positions
+ whose MLE enrichment ratio (for any of the background scales) is
+ below a specified min.mle.threshold }
+ \item{min.mle.threshold}{ MLE enrichment ratio threshold that each
+ predicted position must exceed if mle.filter is turned on. }
+
+ ~~ masking regions of significant control enrichment ~~
+ \item{tec.filter}{ Whether to mask out the regions exhibiting
+ significant enrichment in the control data in doing other
+ calculations. The regions are identified using Poisson statistics
+ within sliding windows, either relative to the scaled signal (tec.z), or
+ relative to randomly-distributed expectation (tec.poisson.z).}
+ \item{tec.window.size}{ size of the window used to determine
+ significantly enrichent control regions }
+ \item{tec.masking.window.size}{ size of the window used to mask
+ the area around significantly enrichent control regions }
+ \item{tec.z}{ Z-score defining statistical stringency by which a given
+ window is determined to be significantly higher in the input than in
+ the signal, and masked if that is the case.}
+ \item{tec.poisson.z}{ Z-score defining statistical stringency by which a given
+ window is determined to be significantly higher than the
+ tec.poisson.ratio above the expected uniform input background. }
+ \item{tec.poisson.ratio}{ Fold ratio by which input must exceed the
+ level expected from the uniform distribution. }
+
+
+
+
+}
+\value{
+ \item{npl}{A per-chromosome list containing data frames describing
+ determined binding positions. Column description:
+ \item{x}{ position }
+ \item{y}{ score }
+ \item{evalue}{ E-value }
+ \item{fdr}{ FDR. For peaks higher than the maximum control peak,
+ the highest dataset FDR is reported }
+ \item{enr}{ lower bound of the fold-enrichment ratio confidence
+ interval. This is the estimate determined using scale of
+ 1. Estimates corresponding to higher scales are returned in other enr columns
+ with scale appearing in the name.}
+ \item{enr.mle}{ enrichment ratio maximum likely estimate }
+ }
+ \item{thr}{ info on the chosen statistical threshold of the peak scores}
+}
+
+\examples{
+ # find binding positions using WTD method, 200bp half-window size,
+control data, 1% FDR
+ bp <-
+find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.wtd,whs=200);
+
+ # find binding positions using MTC method, using 5 tag randomizations,
+ # keeping pairs of tag positions together (shuffle.window=2)
+ bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.lwcc,whs=200,use.randomized.controls=T,n.randomizations=5,shuffle.window=2)
+
+ # print out the number of determined positions
+ print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks"));
+
+
+}
\ No newline at end of file
diff --git a/man/get.binding.characteristics.Rd b/man/get.binding.characteristics.Rd
new file mode 100644
index 0000000..07f2ae4
--- /dev/null
+++ b/man/get.binding.characteristics.Rd
@@ -0,0 +1,55 @@
+\name{get.binding.characteristics}
+\alias{get.binding.characteristics}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate characteristics of observed DNA-binding signal from
+ cross-correlation profiles }
+\description{
+ The methods calculates strand cross-correlation profile to determine binding
+ peak separation distance and approximate window size that should be used
+ for binding detection. If quality scores were given for the tags,
+ which quality bins improve the cross-correlation pattern.
+}
+\usage{
+get.binding.characteristics(data, srange = c(50, 500), bin = 5, cluster = NULL, debug = F, min.tag.count = 1000, acceptance.z.score = 3, remove.tag.anomalies = T, anomalies.z = 5,accept.all.tags=F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{data}{ Tag/quality data: output of \code{read.eland.tags} or similar function }
+ \item{srange}{ A range within which the binding peak separation is
+ expected to fall. Should be larger than probe size to avoid artifacts. }
+ \item{bin}{ Resolution (in basepairs) at which cross-corrrelation
+ should be calculated. bin=1 is ideal, but takes longer to calculate. }
+ \item{cluster}{ optional snow cluster for parallel processing }
+ \item{debug}{ whether to print debug messages }
+ \item{min.tag.count}{ minimal number of tags on the chromosome to be
+ considered in the cross-correlation calculations }
+ \item{acceptance.z.score}{ A Z-score used to determine if a given tag
+ quality bin provides significant improvement to the strand cross-correlation }
+ \item{remove.tag.anomalies}{ Whether to remove singular tag count peaks prior to
+ calculation. This is recommended, since such positions may distort the
+ cross-correlation profile and increase the necessary computational time. }
+ \item{anomalies.z}{ Z-score for determining if the number of tags at a
+ given position is significantly higher about background, and should be
+ considered an anomaly.}
+ \item{accept.all.tags}{ Whether tag alignment quality calculations
+ should be skipped and all available tags should be accepted in the
+ downstream analysis.}
+}
+\value{
+ \item{cross.correlation }{ Cross-correlation profile as an $x/$y data.frame}
+ \item{peak }{Position ($x) and height ($y) of automatically detected
+ cross-correlation peak.}
+ \item{whs} { Optimized window half-size for binding detection (based
+ on the width of the cross-correlation peak) }
+ \item{quality.bin.acceptance} { A list structure, describing the
+ effect of inclusion of different tag quality bins on
+ cross-correlation, and a resolution on which bins should be
+ considered.
+ \item{informative.bins} { A boolean vector indicating whether the
+ inclusion of tags from the tag quality bin specified in the name
+ attribute significantly increases cross-correlation profile near
+ the peak.}
+ \item{quality.cc} { A list giving the cross-correlation profile
+ after the inclusion of the tags from different quality bins }
+ }
+}
diff --git a/man/get.broad.enrichment.clusters.Rd b/man/get.broad.enrichment.clusters.Rd
new file mode 100644
index 0000000..1a6cff0
--- /dev/null
+++ b/man/get.broad.enrichment.clusters.Rd
@@ -0,0 +1,27 @@
+\name{get.broad.enrichment.clusters}
+\alias{get.broad.enrichment.clusters}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Determine broad clusters of enrichment }
+\description{
+ Scan chromosomes with a pre-defined window size, comparing scaled ChIP
+ and input tag coutns to see if their ratio exceeds that expected from
+ a Poisson process (normalized for dataset size).
+}
+\usage{
+get.broad.enrichment.clusters(chip.tags, input.tags, window.size=1e3,z.thr=3,tag.shift=146/2)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{chip.tags}{ foreground tag vector list }
+ \item{input.tags}{ background tag vector list }
+ \item{window.size}{ window size to be used for tag counting }
+ \item{z.thr}{ Z-score to be used as a significance threshold }
+ \item{tag.shift}{ number of base pairs by which positive and negative
+ tag coordinates should be shifted towards eachother (half of binding
+ peak separation distance)}
+}
+\value{
+ A list of elements corresponding to chromosomes, with each element
+ being an $s/$e/$rv data.frame giving the starting, ending positions and the log2
+ enrichment estimate for that region.
+}
diff --git a/man/get.conservative.fold.enrichment.profile.Rd b/man/get.conservative.fold.enrichment.profile.Rd
new file mode 100644
index 0000000..0b20432
--- /dev/null
+++ b/man/get.conservative.fold.enrichment.profile.Rd
@@ -0,0 +1,59 @@
+\name{get.conservative.fold.enrichment.profile}
+\alias{get.conservative.fold.enrichment.profile}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Estimate minimal fold enrichment/depletion along the chromosomes }
+\description{
+ The method provides a statistical assessment of enrichment/depletion
+ along the chromosomes. To assess tag density enrichment/depletion, a
+ sliding window of a specified size (\code{fws}) is used to calculate
+ the density of the foreground tags (\code{ftl}). Multiple, typically
+ larger windows are used to estimate background tag (\code{btl}) density around the
+ same location. The densities are compared as ratios of two Poisson
+ processes to estimate lower bound of foreground enrichment, or upper
+ bound of foreground depletion. If multiple window sizes were used to
+ estimate the background tag density, the most conservative one is
+ chosen for each point.
+}
+\usage{
+get.conservative.fold.enrichment.profile(ftl, btl, fws, bwsl = c(1, 5, 25, 50) * fws, step = 50, tag.shift = 146/2, alpha = 0.05, use.most.informative.scale = F, quick.calculation = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{ftl}{ foreground tag vector list }
+ \item{btl}{ background tag vector list }
+ \item{fws}{ foreground window size }
+ \item{bwsl}{ background window scales. The size(s) of background windows
+ will be \code{fws*bwsl}. }
+ \item{step}{ spacing between positions at which the
+ enrichment/depletion is evaluated }
+ \item{tag.shift}{ number of basepairs by which positive and negative
+ tag coordinates should be shifted towards eachother (half of binding
+ peak separation distance)}
+ \item{alpha}{ desired level of statistical significance }
+ \item{use.most.informative.scale}{ for each position, instead of
+ evaluating enrichment ratio bounds for all background window scales,
+ choose the one with the highest observed density to speed up the calculations}
+ \item{quick.calculation}{ Use square root transformation method
+ instead of a Bayesian method. This speeds up the caclulation
+ considerably and is turned on by default. }
+ \item{background.density.scaling}{ If TRUE, regions of significant tag
+ enrichment will be masked out when calculating size ratio of the
+ signal to control datasets (to estimate ratio of the background tag
+ density). If FALSE, the dataset ratio will be equal to the ratio of
+ the number of tags in each dataset.}
+}
+\value{
+ A list of elements corresponding to chromosomes, with each element
+ being an $x/$y data.frame giving the position and the log2
+ conservative estimate of enrichment/depletion fold ratios around that
+ position.
+ Use \code{\link{writewig}} to output the structure to a WIG
+ file.
+}
+\references{ R.M.Price, D.G. Bonett "Estimating the ratio fo two Poisson
+ rates", Comp. Stat & Data Anal. 32(2000) 345}
+\seealso{ \code{\link{get.smoothed.tag.density}} }
+\examples{
+ enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data,input.data,fws=2*binding.characteristics$whs,step=100,alpha=0.01);
+ writewig(enrichment.estimates,"example.enrichment.estimates.wig","Example conservative fold-enrichment/depletion estimates shown on log2 scale");
+}
diff --git a/man/get.mser.Rd b/man/get.mser.Rd
new file mode 100644
index 0000000..cf60fe8
--- /dev/null
+++ b/man/get.mser.Rd
@@ -0,0 +1,46 @@
+\name{get.mser}
+\alias{get.mser}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate minimal saturated enrichment fold ratio }
+\description{
+ Determine if the dataset has reached absolute saturation, or otherwise
+ find minimal fold enrichment ratio above which the detection of peaks
+ has stabilized enough to meet the saturation criteria.
+}
+\usage{
+get.mser(signal.data, control.data, n.chains = 5, step.size = 1e+05, chains = NULL, cluster = NULL, test.agreement = 0.99, return.chains = F, enrichment.background.scales = c(1), n.steps = 1, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{signal.data}{ signal tag vector list }
+ \item{control.data}{ control tag vector list }
+ \item{n.chains}{ number of dataset subsamples to use }
+ \item{step.size}{ subsampling step describing the saturation
+ criteria. The criteria requires the set of detected binding sites to
+ be stable (as described by the \code{test.agreement} param) when the
+ number of tags in the dataset is reduced by \code{step.size}. The
+ value can either be an integer above one, in which case it specifies a fixed
+ number of tags, or a real value below one, in which case it
+ specifies the fraction of tags that should be removed (e.g. 0.1 will
+ remove 10% of tags).
+ }
+ \item{test.agreement}{ Fraction of the detected peaks that should
+ agree between the full and subsampled datasets. }
+ \item{chains}{ optional parameter, giving pre-calculated chains }
+ \item{cluster}{ optional \code{snow} cluster to parallelize processing }
+
+ \item{return.chains}{ whether subsampled dataset results should be returned as
+ well }
+ \item{enrichment.background.scales}{ one or multiple window scales at
+ which the background tag density should be assessed. See
+ \code{enrichment.background.scales} in
+ \code{\link{find.binding.positions}}. If multiple scales are provided,
+ multiple MSER estimates will be returned.}
+ \item{\dots}{ additional parameters should be the same as those passed
+ to the \code{\link{find.binding.positions}}}
+}
+\value{
+ A single, or multple (if multiple \code{enrichment.background.scales} were
+ provided) MSER value. A value of 1 or very close to it implies that
+ the dataset has reached absolute saturation based on the given criteria.
+}
diff --git a/man/get.mser.interpolation.Rd b/man/get.mser.interpolation.Rd
new file mode 100644
index 0000000..e10b81e
--- /dev/null
+++ b/man/get.mser.interpolation.Rd
@@ -0,0 +1,56 @@
+\name{get.mser.interpolation}
+\alias{get.mser.interpolation}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Interpolate MSER dependency on the tag count }
+\description{
+ MSER generally decreases with increasing sequencing depth. This
+ function interpolates the dependency of MSER on tag counts as a
+ log-log linear function. The log-log fit is used to estimate the depth
+ of sequencing required to reach desired \code{target.fold.enrichment}.
+}
+\usage{
+get.mser.interpolation(signal.data, control.data, target.fold.enrichment = 5, n.chains = 10, n.steps = 6, step.size = 1e+05, chains = NULL, test.agreement = 0.99, return.chains = F, enrichment.background.scales = c(1), excluded.steps = c(seq(2, n.steps - 2)), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{signal.data}{ signal chromosome tag vector list }
+ \item{control.data}{ control chromosome tag vector list }
+ \item{target.fold.enrichment}{ target MSER for which the depth should
+ be estimated}
+ \item{n.steps}{ number of steps in each subset chain. }
+ \item{step.size}{ Either number of tags or fraction of the dataset
+ size, see \code{step.size} parameter for \code{\link{get.mser}}. }
+ \item{test.agreement}{ Fraction of the detected peaks that should
+ agree between the full and subsampled datasets. See \code{test.agreement} parameter for \code{\link{get.mser}}}
+ \item{n.chains}{ number of random subset chains }
+ \item{chains}{ optional structure of pre-calculated chains
+ (e.g. generated by an earlier call with \code{return.chains=T}.}
+
+ \item{return.chains}{ whether to return peak predictions calculated on
+ random chains. These can be passed back using \code{chains} argument
+ to skip subsampling/prediction steps, and just recalculate the depth
+ estimate for a different MSER.}
+ \item{enrichment.background.scales}{ see \code{enrichment.background.scales} parameter for \code{\link{get.mser}} }
+ \item{excluded.steps}{ Intermediate subsampling steps that should be excluded from
+ the chains to speed up the calculation. By default, all intermediate
+ steps except for first two and last two are skipped. Adding
+ intermediate steps improves interpolation at the expense of
+ computational time.}
+ \item{\dots}{ additional parameters are passed to \code{\link{get.mser}} }
+}
+\details{
+ To simulate sequencing growth, the method calculates peak predictions
+ on random chains. Each chain is produced by sequential random
+ subsampling of the original data. The number of steps in the chain
+ indicates how many times the random subsampling will be performed.
+}
+\value{
+ Normally reurns a list, specifying for each backgroundscale:
+ \item{prediction}{estimated sequencing depth required to reach
+ specified target MSER}
+ \item{log10.fit}{linear fit model, a result of \code{lm()} call}
+
+ If \code{return.chains=T}, the above structure is returned under
+ \code{interpolation} field, along with \code{chains} field containing
+ results of \code{\link{find.binding.positions}} calls on subsampled chains.
+}
diff --git a/man/get.smoothed.enrichment.mle.Rd b/man/get.smoothed.enrichment.mle.Rd
new file mode 100644
index 0000000..fe80329
--- /dev/null
+++ b/man/get.smoothed.enrichment.mle.Rd
@@ -0,0 +1,35 @@
+\name{get.smoothed.enrichment.mle}
+\alias{get.smoothed.enrichment.mle}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate chromosome-wide profiles of smoothed enrichment estimate }
+\description{
+ Given signal and control tag positions, the method calculates log2
+ signal to control enrichment esimates (maximum likelihood) for each
+ chromosome, based on the smoothed tag density profile (see \link{get.smoothed.tag.density}).
+}
+\usage{
+get.smoothed.enrichment.mle(signal.tags, control.tags, bandwidth = 150,tag.shift = 146/2, step = 50)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output
+ of \code{\link{select.informative.tags}} }
+ \item{control.tags}{ control (input) tags }
+ \item{pseudocount}{ pseudocount value to be added to tag density -
+ defaults to 1 }
+ other parameters (such as bandwidth, step.size and tag.shift) are
+ passed to \link{get.smoothed.tag.density} - see appropriate reference
+ for details.
+}
+\value{
+ A list of elements corresponding to chromosomes, with each element
+ being an $x/$y data.frame giving the position and associated
+ log2 signal/control enrichment estimate.
+}
+\seealso{ \code{\link{writewig}} }
+\examples{
+ # get smoothed enrichment estimate profile using 500bp bandwidth at
+ # 50bp steps
+ smoothed.M <- get.smoothed.enrichment.mle(chip.data,bandwidth=500,step=50);
+ writewig(smoothed.M,"example.smoothedM.wig","Example smoothed log2 intensity ratio estimate");
+}
\ No newline at end of file
diff --git a/man/get.smoothed.tag.density.Rd b/man/get.smoothed.tag.density.Rd
new file mode 100644
index 0000000..9807249
--- /dev/null
+++ b/man/get.smoothed.tag.density.Rd
@@ -0,0 +1,45 @@
+\name{get.smoothed.tag.density}
+\alias{get.smoothed.tag.density}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate chromosome-wide profiles of smoothed tag density }
+\description{
+ Given tag positions, the method calculates for each chromosome a tag
+ density profile, smoothed by the Gaussian kernel. If the optional
+ control tags are provided, the difference between ChIP and control tag
+ density is returned.
+}
+\usage{
+get.smoothed.tag.density(signal.tags, control.tags = NULL, bandwidth = 150, bg.weight = NULL, tag.shift = 146/2, step = round(bandwidth/3))
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output
+ of \code{\link{select.informative.tags}} }
+ \item{control.tags}{ optional control (input) tags }
+ \item{bandwidth}{ standard deviation of the Gaussian kernel }
+ \item{bg.weight}{ optional weight by which the background density
+ should be multipled for scaling. If not supplied, the weight is
+ calculated based on the ratio of the reduced ChIP to input dataset sizes. }
+ \item{tag.shift}{ Distance by which the positive and negative strand
+ tags should be shifted towards eachother. This
+ normally corresponds to the half of the cross-correlation peak
+ position (e.g. \code{get.binding.characteristics()}$peak$x/2) }
+ \item{step}{ The distance between the regularly spaced points for
+ which the values should be calculated. }
+ \item{background.density.scaling}{ If TRUE, regions of significant tag
+ enrichment will be masked out when calculating size ratio of the
+ signal to control datasets (to estimate ratio of the background tag
+ density). If FALSE, the dataset ratio will be equal to the ratio of
+ the number of tags in each dataset.}
+}
+\value{
+ A list of elements corresponding to chromosomes, with each element
+ being an $x/$y data.frame giving the position and associated tag
+ density. Use \code{\link{writewig}} to output the structure to a WIG
+ file.
+}
+\seealso{ \code{\link{writewig}} }
+\examples{
+ smoothed.density <- get.smoothed.tag.density(chip.data,control.tags=input.data,bandwidth=200,step=100,tag.shift=round(binding.characteristics$peak$x/2));
+ writewig(smoothed.density,"example.density.wig","Example smoothed, background-subtracted tag density");
+}
\ No newline at end of file
diff --git a/man/output.binding.results.Rd b/man/output.binding.results.Rd
new file mode 100644
index 0000000..eddfe35
--- /dev/null
+++ b/man/output.binding.results.Rd
@@ -0,0 +1,24 @@
+\name{output.binding.results}
+\alias{output.binding.results}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Write out determined binding peaks into a text file table }
+\description{
+ Writes out determined binding positions into a text file. The file
+ will contain a table with each row corresponding to a detected
+ position, with the following columns:
+ \item{chr}{ chromosome or target sequence }
+ \item{pos}{ position of detected binding site on the chromosome/sequence}
+ \item{score}{a score reflecting magnitude of the binding}
+ \item{Evalue}{E-value corresponding to the peak magnitude}
+ \item{FDR}{FDR corresponding to the peak magnitude}
+ \item{enrichment.lb}{lower bound of the fold-enrichment ratio}
+ \item{enrichment.mle}{maximum likelihood estimate of the fold-enrichment ratio}
+}
+\usage{
+output.binding.results(results, filename)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{results}{ output of the \code{\link{find.binding.positions}} }
+ \item{filename}{ file name }
+}
diff --git a/man/read.bam.tags.Rd b/man/read.bam.tags.Rd
new file mode 100644
index 0000000..c4a579e
--- /dev/null
+++ b/man/read.bam.tags.Rd
@@ -0,0 +1,24 @@
+\name{read.bam.tags}
+\alias{read.bam.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read BAM alignment file }
+\description{
+ Reads in aligned reads from BAM file. Note: no split (non-unique)
+ alignemnts should be reported in the BAM file.
+}
+\usage{
+read.bam.tags(filename, read.tag.names = F, fix.chromosome.names = F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{filename}{ BAM file }
+ \item{read.tag.names}{ Whether the tag names should be read in }
+ \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+ the sequence names }
+}
+\value{
+ \item{tags }{ A vector of 5' tag coordinates, with negative values
+ corresponding to tags mapped to the negative strand. }
+ \item{quality }{ Number of mismatches }
+ \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
diff --git a/man/read.bin.maqmap.tags.Rd b/man/read.bin.maqmap.tags.Rd
new file mode 100644
index 0000000..8260d61
--- /dev/null
+++ b/man/read.bin.maqmap.tags.Rd
@@ -0,0 +1,23 @@
+\name{read.bin.maqmap.tags}
+\alias{read.bin.maqmap.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read MAQ binary alignment map file }
+\description{
+ Reads in MAQ binary map alignment result file
+}
+\usage{
+read.bin.maqmap.tags(filename, read.tag.names = F, fix.chromosome.names = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{filename}{ MAQ map output file (binary) }
+ \item{read.tag.names}{ Whether the tag names should be read in }
+ \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+ the sequence names }
+}
+\value{
+ \item{tags }{ A vector of 5' tag coordinates, with negative values
+ corresponding to tags mapped to the negative strand. }
+ \item{quality }{ Number of mismatches }
+ \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
diff --git a/man/read.bowtie.tags.Rd b/man/read.bowtie.tags.Rd
new file mode 100644
index 0000000..678e9fc
--- /dev/null
+++ b/man/read.bowtie.tags.Rd
@@ -0,0 +1,23 @@
+\name{read.bowtie.tags}
+\alias{read.bowtie.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read bowtie text alignment output file }
+\description{
+ Reads in bowtie alignment results in text format
+}
+\usage{
+read.bowtie.tags(filename, read.tag.names = F, fix.chromosome.names = F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{filename}{ bowtie text output file }
+ \item{read.tag.names}{ Whether the tag names should be read in }
+ \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+ the sequence names }
+}
+\value{
+ \item{tags }{ A vector of 5' tag coordinates, with negative values
+ corresponding to tags mapped to the negative strand. }
+ \item{quality }{ Number of mismatches }
+ \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
diff --git a/man/read.eland.tags.Rd b/man/read.eland.tags.Rd
new file mode 100644
index 0000000..aa29d6b
--- /dev/null
+++ b/man/read.eland.tags.Rd
@@ -0,0 +1,30 @@
+\name{read.eland.tags}
+\alias{read.eland.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read eland output file }
+\description{
+ Reads in ELAND output file, returning 5'-end tag coordinates and
+ number of mismatches associated with each mapped tag.
+}
+\usage{
+read.eland.tags(filename, read.tag.names = F, fix.chromosome.names = T, max.eland.tag.length = -1,extended=F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{filename}{ ELAND output file }
+ \item{read.tag.names}{ Whether the tag names should be read in }
+ \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+ the sequence names }
+ \item{max.eland.tag.length}{ Specifies max length of the tag sequence
+ considered by ELAND. This needs to be specified if the tags are
+ longer than the sequences considred by ELAND during alignment. }
+ \item{extended}{ Whether the file is written out in "extended" format
+ provided in GA pipeline 1.0. }
+ \item{multi}{ Whether the file is written in "multi" format, showing multiple alignments of the reads }
+}
+\value{
+ \item{tags }{ A vector of 5' tag coordinates, with negative values
+ corresponding to tags mapped to the negative strand. }
+ \item{quality }{ Number of mismatches }
+ \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
diff --git a/man/read.maqmap.tags.Rd b/man/read.maqmap.tags.Rd
new file mode 100644
index 0000000..31c5309
--- /dev/null
+++ b/man/read.maqmap.tags.Rd
@@ -0,0 +1,23 @@
+\name{read.maqmap.tags}
+\alias{read.maqmap.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read MAQ text alignment output file }
+\description{
+ Reads in MAQ alignment results in text format (that results from "maq mapview" command.)
+}
+\usage{
+read.maqmap.tags(filename, read.tag.names = F, fix.chromosome.names = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{filename}{ MAQ text output file }
+ \item{read.tag.names}{ Whether the tag names should be read in }
+ \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+ the sequence names }
+}
+\value{
+ \item{tags }{ A vector of 5' tag coordinates, with negative values
+ corresponding to tags mapped to the negative strand. }
+ \item{quality }{ Number of mismatches }
+ \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
diff --git a/man/read.meland.tags.Rd b/man/read.meland.tags.Rd
new file mode 100644
index 0000000..c21a815
--- /dev/null
+++ b/man/read.meland.tags.Rd
@@ -0,0 +1,29 @@
+\name{read.meland.tags}
+\alias{read.meland.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read modified BED tag alignment file that contains variable match
+ length information }
+\description{
+ Reads in an extended BED tag alignment file. An example line given below:
+ \code{49 . U1 . 1 . . 23 chr2 -234567}
+ The line above specifies a 23-bp portion of the tag tag with id 49 was
+ aligned with 1 mismatch to the negative strand of chr2 at position 234567.
+}
+\usage{
+read.meland.tags(filename, read.tag.names = F, fix.chromosome.names = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{filename}{ name of the extended BED file }
+ \item{read.tag.names}{ whether to read in tag names }
+ \item{fix.chromosome.names}{ whether to remove ".fa" from the sequence
+ name ends. }
+}
+\value{
+ \item{tags }{ A vector of 5' tag coordinates, with negative values
+ corresponding to tags mapped to the negative strand. }
+ \item{quality }{ Quality expressed as a float x.y, where x is
+ tag.length - aligned.tag.portion.length, and y is the number of
+ mismatches (must be less than 10). }
+ \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
diff --git a/man/remove.local.tag.anomalies.Rd b/man/remove.local.tag.anomalies.Rd
new file mode 100644
index 0000000..36c7818
--- /dev/null
+++ b/man/remove.local.tag.anomalies.Rd
@@ -0,0 +1,50 @@
+\name{remove.local.tag.anomalies}
+\alias{remove.local.tag.anomalies}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Restrict or remove positions with too many tags relative to
+ local background. }
+\description{
+ In Solexa ChIP-seq experiments some anomalous positions contain
+ extremely high number of tags at the exact coordinates. The function
+ scans the chromosomes, determining local tag density based on a
+ provided \code{window.size}, doing two types of corrections:
+ 1. removing all tags from positions that exceed local density by
+ \code{eliminate.fold}; 2. reducing the tag count at positions
+ exceeding \code{cap.fold} to the maximal allowed count. The
+ statistical significance of counts exceeding either of these two
+ threshold densities is calculated based on Poisson model, with
+ confidence interval determined by the \code{z.threshold} Z-score parameter.
+}
+\usage{
+remove.local.tag.anomalies(tags, window.size = 200, eliminate.fold = 10, cap.fold = 4, z.threshold = 3)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{tags}{ Chromosome-list of tag vectors }
+ \item{window.size}{ Size of the window used to assess local
+ density. Increasing the window size considerably beyond the size of
+ the binding features will result in flattened profiles, with bound
+ positions exhibiting a difference of just 1 tag beyond the background. }
+ \item{eliminate.fold}{ Threshold definining fold-over background
+ density above which the position is considered anomalous and removed
+ completely.}
+ \item{cap.fold}{ Threshold fold-over background density above which
+ the position is capped to the maximum statistically likely given
+ local tag density }
+ \item{z.threshold}{ Z-score used to assess significance of a given
+ position exceeding either of the two density thresholds. }
+ \item{var.base}{ Minimal level of tag count variance. This is used to
+ guard against cases where variance of provided tags is near 0, as in
+ the case of datasets filtered to unclude only unique reads. Defaults
+ at 0.1}
+}
+\value{
+ A modified chromosome-wise tag vector list.
+}
+\references{ ~put references to the literature/web site here ~ }
+
+\note{ ~~further notes~~
+ Increasing window.size to very large values will result in flat
+ profiles similar to those described by Zhang et al. "Model-based
+ Analysis of ChIP-Seq (MACS)." Genome Biol. 2008 Sep 17;9(9):R137.
+}
diff --git a/man/select.informative.tags.Rd b/man/select.informative.tags.Rd
new file mode 100644
index 0000000..73a4155
--- /dev/null
+++ b/man/select.informative.tags.Rd
@@ -0,0 +1,29 @@
+\name{select.informative.tags}
+\alias{select.informative.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Choose informative tags }
+\description{
+ For datasets with tag alignment quality information (e.g. number of
+ mismatches for Eland alignments),
+ \code{\link{get.binding.characteristics}} determines whether inclusion
+ of tags from each specific quality bin improves the cross-correlation
+ profile. The present function is then used to actually select these
+ informative tags, discarding all other information, including quality
+ scores that are not used in further processing.
+}
+\usage{
+select.informative.tags(data, binding.characteristics)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{data}{ Full alignment data (a list with $tags and $quality elements) }
+ \item{binding.characteristics}{ result of a
+ \code{\link{get.binding.characteristics}} call. If NULL value is
+ supplied,all tags will be accepted. }
+}
+\value{
+ A chromosome-wise tag list. Each element of the list corresponds to a
+ chromosome and is a numeric vector of 5' tag coordinates, with sign
+ designating DNA strand.
+ This form of tag data is used for most of the other processing.
+}
diff --git a/man/spp-package.Rd b/man/spp-package.Rd
new file mode 100644
index 0000000..abd3177
--- /dev/null
+++ b/man/spp-package.Rd
@@ -0,0 +1,144 @@
+\name{spp-package}
+\alias{spp-package}
+\alias{spp}
+\docType{package}
+\title{
+ChIP-seq (Solexa) Processing Pipeline
+}
+\description{
+A set of routines for reading short sequence alignments, calculating tag
+density, estimates of statistically significant enrichment/depletion
+along the chromosome, identifying point binding positions (peaks), and
+characterizing saturation properties related to sequencing depth.
+}
+\details{
+\tabular{ll}{
+Package: \tab spp\cr
+Type: \tab Package\cr
+Version: \tab 1.11\cr
+Date: \tab 2012-06-20\cr
+License: \tab What license is it under?\cr
+LazyLoad: \tab yes\cr
+}
+See example below for typical processing sequence.y
+}
+\author{Peter Kharchenko <peter.kharchenko at post.harvard.edu>}
+\references{
+Kharchenko P., Tolstorukov M., Park P. "Design and analysis of ChIP-seq
+experiments for DNA-binding proteins." Nature Biotech. doi:10.1038/nbt.1508
+}
+
+\examples{
+
+ # load the library
+ library(spp);
+
+ ## The following section shows how to initialize a cluster of 8 nodes for parallel processing
+ ## To enable parallel processing, uncomment the next three lines, and comment out "cluster<-NULL";
+ ## see "snow" package manual for details.
+ #library(snow)
+ #cluster <- makeCluster(2);
+ #invisible(clusterCall(cluster,source,"routines.r"));
+ cluster <- NULL;
+
+
+
+ # read in tag alignments
+ chip.data <- read.eland.tags("chip.eland.alignment");
+ input.data <- read.eland.tags("input.eland.alignment");
+
+ # get binding info from cross-correlation profile
+ # srange gives the possible range for the size of the protected region;
+ # srange should be higher than tag length; making the upper boundary too high will increase calculation time
+ #
+ # bin - bin tags within the specified number of basepairs to speed up calculation;
+ # increasing bin size decreases the accuracy of the determined parameters
+ binding.characteristics <- get.binding.characteristics(chip.data,srange=c(50,500),bin=5,cluster=cluster);
+
+
+ # plot cross-correlation profile
+ pdf(file="example.crosscorrelation.pdf",width=5,height=5)
+ par(mar = c(3.5,3.5,1.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
+ plot(binding.characteristics$cross.correlation,type='l',xlab="strand shift",ylab="cross-correlation");
+ abline(v=binding.characteristics$peak$x,lty=2,col=2)
+ dev.off();
+
+ # select informative tags based on the binding characteristics
+ chip.data <- select.informative.tags(chip.data,binding.characteristics);
+ input.data <- select.informative.tags(input.data,binding.characteristics);
+
+ # restrict or remove positions with anomalous number of tags relative
+ # to the local density
+ chip.data <- remove.local.tag.anomalies(chip.data);
+ input.data <- remove.local.tag.anomalies(input.data);
+
+
+ # output smoothed tag density (subtracting re-scaled input) into a WIG file
+ # note that the tags are shifted by half of the peak separation distance
+ smoothed.density <- get.smoothed.tag.density(chip.data,control.tags=input.data,bandwidth=200,step=100,tag.shift=round(binding.characteristics$peak$x/2));
+ writewig(smoothed.density,"example.density.wig","Example smoothed, background-subtracted tag density");
+ rm(smoothed.density);
+
+ # output conservative enrichment estimates
+ # alpha specifies significance level at which confidence intervals will be estimated
+ enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data,input.data,fws=2*binding.characteristics$whs,step=100,alpha=0.01);
+ writewig(enrichment.estimates,"example.enrichment.estimates.wig","Example conservative fold-enrichment/depletion estimates shown on log2 scale");
+ rm(enrichment.estimates);
+
+
+ # binding detection parameters
+ # desired FDR. Alternatively, an E-value can be supplied to the method calls below instead of the fdr parameter
+ fdr <- 1e-2;
+ # the binding.characteristics contains the optimized half-size for binding detection window
+ detection.window.halfsize <- binding.characteristics$whs;
+
+ # determine binding positions using wtd method
+ bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize,cluster=cluster)
+
+ # alternatively determined binding positions using lwcc method (note: this takes longer than wtd)
+ # bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=fdr,method=tag.lwcc,whs=detection.window.halfsize,cluster=cluster)
+
+ print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks"));
+
+ # output detected binding positions
+ output.binding.results(bp,"example.binding.positions.txt");
+
+
+ # -------------------------------------------------------------------------------------------
+ # the set of commands in the following section illustrates methods for saturation analysis
+ # these are separated from the previous section, since they are highly CPU intensive
+ # -------------------------------------------------------------------------------------------
+
+ # determine MSER
+ # note: this will take approximately 10-15x the amount of time the initial binding detection did
+ # The saturation criteria here is 0.99 consistency in the set of binding positions when adding 1e5 tags.
+ # To ensure convergence the number of subsampled chains (n.chains) should be higher (80)
+ mser <- get.mser(chip.data,input.data,step.size=1e5,test.agreement=0.99,n.chains=8,cluster=cluster,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize)
+
+ print(paste("MSER at a current depth is",mser));
+
+ # note: an MSER value of 1 or very near one implies that the set of detected binding positions satisfies saturation criteria without
+ # additional selection by fold-enrichment ratios. In other words, the dataset has reached saturation in a traditional sense (absolute saturation).
+
+ # interpolate MSER dependency on tag count
+ # note: this requires considerably more calculations than the previous steps (~ 3x more than the first MSER calculation)
+ # Here we interpolate MSER dependency to determine a point at which MSER of 2 is reached
+ # The interpolation will be based on the difference in MSER at the current depth, and a depth at 5e5 fewer tags (n.steps=6);
+ # evaluation of the intermediate points is omitted here to speed up the calculation (excluded.steps parameter)
+ # A total of 7 chains is used here to speed up calculation, whereas a higher number of chains (50) would give good convergence
+ msers <- get.mser.interpolation(chip.data,input.data,step.size=1e5,test.agreement=0.99, target.fold.enrichment=2, n.chains=7,n.steps=6,excluded.steps=c(2:4),cluster=cluster,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize)
+
+ print(paste("predicted sequencing depth =",round(unlist(lapply(msers,function(x) x$prediction))/1e6,5)," million tags"))
+
+
+ # note: the interpolation will return NA prediction if the dataset has reached absolute saturation at the current depth.
+ # note: use return.chains=T to also calculated random chains (returned under msers$chains field) - these can be passed back as
+ # "get.mser.interpolation( ..., chains=msers$chains)" to calculate predictions for another target.fold.enrichment value
+ # without having to recalculate the random chain predictions.
+
+ ## stop cluster if it was initialized
+ #stopCluster(cluster);
+
+
+
+}
diff --git a/man/write.broadpeak.info.Rd b/man/write.broadpeak.info.Rd
new file mode 100644
index 0000000..0ed5f66
--- /dev/null
+++ b/man/write.broadpeak.info.Rd
@@ -0,0 +1,16 @@
+\name{write.broadpeak.info}
+\alias{write.broadpeak.info}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Write out determined broad enrichment regions using broadPeak format }
+\description{
+ Writes out broad regions of enrichment determined by the
+ get.broad.enrichment.clusters method in a broadPeak format.
+}
+\usage{
+write.broadpeak.info(broadpeak.results, filename)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{broadpeak.results}{ output of the \code{\link{get.broad.enrichment.clusters}} }
+ \item{filename}{ file name }
+}
diff --git a/man/write.narrowpeak.binding.Rd b/man/write.narrowpeak.binding.Rd
new file mode 100644
index 0000000..df3243c
--- /dev/null
+++ b/man/write.narrowpeak.binding.Rd
@@ -0,0 +1,25 @@
+\name{write.narrowpeak.binding}
+\alias{write.narrowpeak.binding}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Write out determined binding peaks using narrowPeak format }
+\description{
+ Writes out determined binding positions into a narrowPeak file.
+ The region will correspond to associated broad enrichment region, if
+ such were added using add.broad.peak.regions method. Otherwise the
+ region size will be determined using margin (which defaults to the
+ window half size that was used to determine binding positions)
+ Note: since v1.13, FDR is written out in -log10() scale.
+}
+\usage{
+write.narrowpeak.binding(results, filename,margin=results$whs)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{results}{ output of the \code{\link{find.binding.positions}} }
+ \item{filename}{ file name }
+ \item{margin}{ explicit value of the margin to be used if the borad
+ region information is absent (defaults to peak detection window
+ half-size}
+ \item{npeaks}{ optionally, limit the output to the specified number of
+ top peaks }
+}
diff --git a/man/writewig.Rd b/man/writewig.Rd
new file mode 100644
index 0000000..f7e23d9
--- /dev/null
+++ b/man/writewig.Rd
@@ -0,0 +1,31 @@
+\name{writewig}
+\alias{writewig}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ A function to save a list of chromosome-wise x/y data frames
+ into a WIG file format. }
+\description{
+ Takes a list that contains an $x and $y data.frame for a number of
+ chromosomes and writes it out to a WIG BED style format.
+}
+\usage{
+writewig(dat, fname, feature, threshold = 5, zip = F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{dat}{ Chromosome coordinate-value data. \code{dat} is a list,
+ each member of a list is a data frame with $x and $y columns
+ containing chromosome positions and associated values. The names of
+ the list elements correspond to the chromosomes. }
+ \item{fname}{ Filename to which the output should be written }
+ \item{feature}{ Data description to be incorporated into the WIG header }
+ \item{threshold}{ Optional threshold to be saved in the WIG file}
+ \item{zip}{ Wheter to invoke a zip program to compress the file }
+}
+
+\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ }
+\examples{
+
+data <- list("chr1"=data.frame(x=c(100,130,200),y=c(1.2,4.0,2.3)));
+writewig(data,"filename");
+
+}
diff --git a/src/Makevars.in b/src/Makevars.in
new file mode 100644
index 0000000..61631ba
--- /dev/null
+++ b/src/Makevars.in
@@ -0,0 +1,4 @@
+PKG_LIBS=@LIBS@ -lz
+PKG_CFLAGS=-I./ -D_FASTMAP -DMAQ_LONGREADS @HAVE_LIBBZ2@
+PKG_CXXFLAGS=-I./ -D_FASTMAP -DMAQ_LONGREADS @HAVE_LIBBZ2@ @BOOST_CPPFLAGS@
+
diff --git a/src/api_global.h b/src/api_global.h
new file mode 100644
index 0000000..24f72f2
--- /dev/null
+++ b/src/api_global.h
@@ -0,0 +1,22 @@
+// ***************************************************************************
+// api_global.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides macros for exporting & importing BamTools API library symbols
+// ***************************************************************************
+
+#ifndef API_GLOBAL_H
+#define API_GLOBAL_H
+
+#include "bamtools_global.h"
+
+#ifdef BAMTOOLS_API_LIBRARY
+# define API_EXPORT BAMTOOLS_LIBRARY_EXPORT
+#else
+# define API_EXPORT BAMTOOLS_LIBRARY_IMPORT
+#endif
+
+#endif // API_GLOBAL_H
diff --git a/src/bed2vector.cpp b/src/bed2vector.cpp
new file mode 100644
index 0000000..63e9499
--- /dev/null
+++ b/src/bed2vector.cpp
@@ -0,0 +1,2705 @@
+#include "pc.h"
+#include "config.h"
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <strstream>
+#include <algorithm>
+#include <string>
+#include <functional>
+#include <utility>
+#include <boost/tokenizer.hpp>
+
+#ifdef HAVE_LIBBZ2
+#include <bzlib.h>
+#endif
+
+extern "C" {
+//#include "R.h"
+//#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+ // for getline
+#include <stdio.h> /* flockfile, getc_unlocked, funlockfile */
+#include <stdlib.h> /* malloc, realloc */
+
+#include <errno.h> /* errno */
+#include <unistd.h> /* ssize_t */
+ssize_t getline_local(char **lineptr, size_t *n, FILE *stream);
+}
+
+using namespace std;
+using namespace __gnu_cxx;
+
+
+class lessAbsoluteValue {
+public:
+ bool operator()(int a, int b) const {
+ return abs(a) < abs(b);
+ }
+};
+
+
+
+#ifdef HAVE_LIBBZ2
+int get_bzline(BZFILE* b,string& line) {
+ char c;
+ int nBuf;
+ int bzerror=BZ_OK;
+
+ while(bzerror == BZ_OK) {
+ nBuf=BZ2_bzRead(&bzerror, b, &c, 1);
+ if(bzerror==BZ_OK) {
+ if(c=='\n') {
+ return bzerror;
+ } else {
+ line+=c;
+ }
+ }
+ }
+ return bzerror;
+}
+
+int get_a_line(FILE *f,BZFILE *b,int bz2file,string& line) {
+ line="";
+ if(bz2file) {
+ int bzerror=get_bzline(b,line);
+ if(bzerror==BZ_OK) {
+ return(1);
+ } else {
+ if(bzerror!=BZ_STREAM_END) {
+ cerr<<"encountered BZERROR="<<bzerror<<endl;
+ }
+ return(0);
+ }
+ } else {
+ char *cline=NULL;
+ size_t n;
+ if(getline_local(&cline,&n,f) != -1) {
+ if(cline) {
+ cline[strlen(cline)-1]='\0';
+ line+=cline;
+ free(cline);
+ }
+ return(1);
+ } else {
+ return(0);
+ }
+ }
+}
+#endif
+
+
+/**
+ * Read in .bed data into a list chromosome of vectors representing 5' positions, with sign
+ * corresponding to the strand.
+ */
+
+//#define DEBUG 1
+
+extern "C" {
+SEXP read_bed_ends(SEXP filename) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t");
+
+
+ ifstream bed_file(fname);
+
+#ifdef DEBUG
+ Rprintf("opened %s\n",fname);
+#endif
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+
+ int fcount=0;
+ while(getline(bed_file,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string chr=*sit++; //chr=chr.substr(3,strlen(chr.c_str()));
+ string str_start=*sit++;
+ int fstart=atoi(str_start.c_str());
+ string str_end=*sit++;
+ int fend=atoi(str_end.c_str());
+ int fpos=fstart;
+ if(sit!=tok.end()) {
+ string u0=*sit++;
+ string nfield=*sit++;
+ string strand=*sit++;
+ if(strand=="-") {
+ fpos=-1*fend;
+ }
+ }
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d\n",chr.c_str(),cind,fpos);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ bed_file.close();
+
+
+#ifdef DEBUG
+ Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+ Rprintf("done. read %d fragments\n",fcount);
+
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ }
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ SEXP nv;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ int* i_nv=INTEGER(nv);
+ int i=0;
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_nv[i++]=*pi;
+ }
+ SET_VECTOR_ELT(ans, csi-pos.begin(), nv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+
+SEXP read_meland_old(SEXP filename) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<int> > poslen; // length
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t");
+
+
+ ifstream bed_file(fname);
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+
+ int fcount=0;
+ while(getline(bed_file,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ sit++; sit++;
+ string str_nm=*sit++;
+ int nm=0;
+ if(str_nm[0]=='U') {
+ nm=atoi((str_nm.c_str()+1));
+ } else {
+ continue;
+ }
+ sit++; sit++; sit++;
+ string str_len=*sit++;
+ int len=atoi(str_len.c_str());
+ string chr=*sit++; chr=chr.substr(3,strlen(chr.c_str()));
+ string str_pos=*sit++;
+ int fpos=atoi(str_pos.c_str());
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ poslen.push_back(vector<int>());
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ (poslen[cind]).push_back(len);
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ bed_file.close();
+
+
+#ifdef DEBUG
+ Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+ Rprintf("done. read %d fragments\n",fcount);
+
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi,lsi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+ lsi=poslen.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 3)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+
+
+
+ SEXP tv,nv,lv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(lv=allocVector(INTSXP,csi->size())); np++;
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+ int* i_lv=INTEGER(lv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ vector<int>::const_iterator ili=lsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i_lv[i]=*ili++;
+ i++;
+ }
+ PROTECT(dv = allocVector(VECSXP, 3)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ SET_VECTOR_ELT(dv, 2, lv);
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+ int get_a_line(FILE *f,string& line) {
+ line="";
+ char cline[1024];
+ if(fgets(cline,1024,f)) {
+ line+=cline;
+ return(1);
+ } else {
+ return(0);
+ }
+ }
+
+
+ SEXP read_meland(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<int> > poslen; // length
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t");
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+
+ Rprintf("opened %s\n",fname);
+
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string tagname=*sit++;
+ sit++;
+ string str_nm=*sit++;
+ int nm=0;
+ if(str_nm[0]=='U') {
+ nm=atoi((str_nm.c_str()+1));
+ } else {
+ continue;
+ }
+ sit++; sit++; sit++;
+ string str_len=*sit++;
+ int len=atoi(str_len.c_str());
+ string chr=*sit++; chr=chr.substr(3,strlen(chr.c_str()));
+ string str_pos=*sit++;
+ int fpos=atoi(str_pos.c_str());
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ poslen.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ (poslen[cind]).push_back(len);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+
+#ifdef DEBUG
+ Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+ Rprintf("done. read %d fragments\n",fcount);
+
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi,lsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+ lsi=poslen.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 3+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 3, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,lv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(lv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+ int* i_lv=INTEGER(lv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ vector<int>::const_iterator ili=lsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i_lv[i]=*ili++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 3+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ SET_VECTOR_ELT(dv, 2, lv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 3, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+
+// reads regular eland files, recording mismatch positions
+SEXP read_eland_mismatches(SEXP filename) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > mm1; // position of the first mismatch (or 0 for none)
+ vector< vector<int> > mm2; // position of the second mismatch
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ sit++;
+ string seq=*sit++;
+ string str_nm=*sit++;
+ int nm=0;
+ if(str_nm[0]=='U') {
+ nm=atoi((str_nm.c_str()+1));
+ } else {
+ continue;
+ }
+ sit++; sit++; sit++;
+ string chr=*sit++;
+ // extract chromosome name from this
+ int chrp=chr.find("chr");
+ int pp=chr.find('.');
+ chr=chr.substr(chrp+3,pp-chrp-3);
+
+ string str_pos=*sit++;
+ int fpos=atoi(str_pos.c_str());
+
+
+ string strand=*sit++;
+ int nstrand=0;
+ if(strand=="R") {
+ fpos=-1*(fpos+seq.size()-1);
+ nstrand=1;
+ }
+
+ sit++;
+
+ int nm1=0; int nm2=0;
+ if(sit!=tok.end()) {
+ string nms=*sit++;
+ nm1=atoi(nms.substr(0,nms.size()-1).c_str());
+ if(nstrand) { nm1=seq.size()-nm1+1; }
+ }
+ if(sit!=tok.end()) {
+ string nms=*sit++;
+ nm2=atoi(nms.substr(0,nms.size()-1).c_str());
+ if(nstrand) { nm2=seq.size()-nm2+1; }
+ }
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ mm1.push_back(vector<int>());
+ mm2.push_back(vector<int>());
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (mm1[cind]).push_back(nm1);
+ (mm2[cind]).push_back(nm2);
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm1=%d, nm2=%d\n",chr.c_str(),cind,fpos,nm1,nm2);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+
+#ifdef DEBUG
+ Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+ Rprintf("done. read %d fragments\n",fcount);
+
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi,lsi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=mm1.begin()+(csi-pos.begin());
+ lsi=mm2.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 3)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("f"));
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+
+
+
+ SEXP tv,nv,lv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(lv=allocVector(INTSXP,csi->size())); np++;
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+ int* i_lv=INTEGER(lv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ vector<int>::const_iterator ili=lsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i_lv[i]=*ili++;
+ i++;
+ }
+ PROTECT(dv = allocVector(VECSXP, 3)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ SET_VECTOR_ELT(dv, 2, lv);
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+ // read in regular eland files, adjusting the negative strand coordinate by sequence length
+ SEXP read_eland(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+ int eland_tag_length=*(INTEGER(eland_tag_length_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string tagname=*sit++;
+ string sequence=*sit++;
+ int len=sequence.size();
+ // adjust probe length if eland length limit was specified
+ if(eland_tag_length>0 && len>eland_tag_length) {
+ len=eland_tag_length;
+ }
+ string str_nm=*sit++;
+ int nm=0;
+ if(str_nm[0]=='U') {
+ nm=atoi((str_nm.c_str()+1));
+ } else {
+ continue;
+ }
+ sit++; sit++; sit++;
+ string chr=*sit++;
+ string str_pos=*sit++;
+ int fpos=atoi(str_pos.c_str());
+ string str_strand=*sit++;
+
+ if(str_strand[0]=='R') {
+ fpos=-1*(fpos+len-1);
+ }
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 2, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+
+ // read in extended eland files, adjusting the negative strand coordinate by sequence length
+ SEXP read_eland_extended(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+ int eland_tag_length=*(INTEGER(eland_tag_length_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string machinename=*sit++;
+ string runnumber=*sit++;
+ string lanenumber=*sit++;
+ *sit++;
+
+ string str_x=*sit++;
+ string str_y=*sit++;
+
+ string tagname=machinename+"."+runnumber+"."+lanenumber+"."+str_x+"."+str_y;
+
+
+
+ *sit++;
+ *sit++;
+
+
+ string sequence=*sit++;
+ *sit++;
+
+ string chr=*sit++;
+ string contig=*sit++;
+ chr=chr+contig;
+
+ int len=sequence.size();
+ // adjust probe length if eland length limit was specified
+ if(eland_tag_length>0 && len>eland_tag_length) {
+ len=eland_tag_length;
+ }
+
+
+
+ string str_pos=*sit++;
+ if(str_pos.size()<1) { continue; }
+ int fpos=atoi(str_pos.c_str());
+ string str_strand=*sit++;
+
+ if(str_strand[0]=='R') {
+ fpos=-1*(fpos+len-1);
+ }
+
+ string str_nm=*sit++;
+ // count non-digit characters
+ int nm=0;
+ for(int i=0;i<str_nm.size();i++) {
+ if(!isdigit(str_nm[i])) { nm++; }
+ }
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 2, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+ // read in eland multi files, adjusting the negative strand coordinate by sequence length
+SEXP read_eland_multi(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) {
+
+#ifdef DEBUG
+ Rprintf("read_eland_muti() : start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+ int eland_tag_length=*(INTEGER(eland_tag_length_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t","");
+ boost::char_separator<char> comsep(",","",boost::keep_empty_tokens);
+ boost::char_separator<char> colsep(":","",boost::keep_empty_tokens);
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int nline=0;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+ nline++;
+ // chomp
+ size_t elpos = line.find_last_not_of("\n");
+ if(elpos != string::npos) {
+ line = line.substr(0, elpos+1);
+ }
+#ifdef DEBUG
+ Rprintf("line %d: %s\n",nline,line.c_str());
+#endif
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string tagname=*sit++;
+ string sequence=*sit++;
+ string mspec=*sit++;
+ // parse out match spec
+
+ if(mspec=="NM" || mspec=="QC") { continue; }
+#ifdef DEBUG
+ Rprintf("parsing out spec \"%s\" : ",mspec.c_str());
+#endif
+
+ tokType stok(mspec, colsep);
+ tokType::iterator ssit=stok.begin();
+ string str_nm0=*ssit++;
+
+ int nm=0;
+ int nm0=atoi(str_nm0.c_str());
+ if(nm0>1) {
+#ifdef DEBUG
+ Rprintf("rejected for nm0\n");
+#endif
+ continue;
+ }
+ if(nm0==0) {
+ string str_nm1=*ssit++;
+ int nm1=atoi(str_nm1.c_str());
+ if(nm1>1) {
+#ifdef DEBUG
+ Rprintf("rejected for nm1\n");
+#endif
+ continue;
+ }
+ if(nm1==0) {
+ string str_nm2=*ssit++;
+ int nm2=atoi(str_nm2.c_str());
+ if(nm2>1) {
+#ifdef DEBUG
+ Rprintf("rejected for nm2\n");
+#endif
+ continue;
+ }
+ nm=2;
+ } else {
+ nm=1;
+ }
+ }
+
+#ifdef DEBUG
+ Rprintf("accepted (nm=%d)\n",nm);
+#endif
+ int npos=0;
+ string mpos=*sit++;
+ vector<string> mposc;
+ vector<int> mposp;
+ tokType ptok(mpos, comsep);
+ string prevchr;
+ for(tokType::iterator psit=ptok.begin();psit!=ptok.end();psit++) {
+ string cpos=*psit;
+ npos++;
+ int strand=1;
+ if(cpos.size()<5) {
+ Rprintf("ERROR: line=%d, match %d is too short: \"%s\"; ",nline,npos,cpos.c_str());
+ }
+ char lc=cpos.at(cpos.size()-1);
+
+ if(atoi(&lc)==nm) {
+ switch(cpos.at(cpos.size()-2)) {
+ case 'R': strand=-1; break;
+ case 'F': strand=1; break;
+ default:
+ Rprintf("ERROR: line=%d, match %d specifies an invalid strand %c\n",nline,npos,cpos.at(cpos.size()-2)); break;
+ continue;
+ }
+ string chr,str_pos;
+ size_t colpos=cpos.find(":");
+ if(colpos==string::npos) {
+ if(npos>1) {
+ chr=prevchr;
+ str_pos=cpos.substr(0,cpos.size()-2);
+ } else {
+ Rprintf("ERROR: line=%d, match %d does not contain chromosome separator: \"%s\"\n",nline,npos,cpos.c_str());
+ continue;
+ }
+ } else {
+ chr=cpos.substr(0,colpos);
+ str_pos=cpos.substr(colpos+1,cpos.size()-3-colpos);
+ }
+#ifdef DEBUG
+ Rprintf("\"%s\" : chr=%s, pos=%s, strand=%d\n",cpos.c_str(),chr.c_str(),str_pos.c_str(),strand);
+#endif
+ int pos=strand*atoi(str_pos.c_str());
+ mposc.push_back(chr);
+ mposp.push_back(pos);
+ }
+ }
+
+ string chr;
+ int fpos;
+ if(mposc.size()!=1) {
+ if(mposc.size()==0) {
+ Rprintf("ERROR: line=%d: no %d-mismatch matches were found in \"%s\"\n",nline,nm,mpos.c_str());
+ } else {
+ Rprintf("ERROR: line=%d: more than one (%d) %d-mismatch matches were found in \"%s\"\n",nline,mposc.size(),nm,mpos.c_str());
+ }
+ continue;
+ } else {
+ chr=*mposc.begin();
+ fpos=*mposp.begin();
+ }
+
+ int len=sequence.size();
+ // adjust probe length if eland length limit was specified
+ if(eland_tag_length>0 && len>eland_tag_length) {
+ len=eland_tag_length;
+ }
+
+ if(fpos<0) {
+ fpos=-1*(-1*fpos+len-1);
+ }
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 2, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+ // read in regular eland files, adjusting the negative strand coordinate by sequence length
+ SEXP read_bowtie(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+ boost::char_separator<char> sep2(",");
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n";
+ } else {
+#ifdef HAVE_LIBBZ2
+ BZFILE* b=0;
+ int bzerror;
+
+ int bz2file=0;
+ if(strstr(fname,".bz2")) {
+ bz2file=1;
+ b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+ if (bzerror != BZ_OK) { cout<<"bzerror="<<bzerror<<endl; }
+ }
+#endif
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+#ifdef HAVE_LIBBZ2
+ while(get_a_line(f,b,bz2file,line)) {
+#else
+ while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string tagname=*sit++;
+ string str_strand=*sit++;
+ string chr=*sit++;
+
+ string str_pos=*sit++;
+ int fpos=atoi(str_pos.c_str());
+
+ string sequence=*sit++;
+ sit++; sit++;
+ string mm=*sit++;
+
+ int len=sequence.size();
+ if(str_strand[0]=='-') {
+ fpos=-1*(fpos+len-1);
+ }
+ // determine number of mismatches
+ int nm=0;
+ if(mm.size()>0) {
+ nm++;
+ string::size_type tp(0);
+ while(tp!=string::npos) {
+ tp = mm.find(",",tp);
+ if(tp!=string::npos) {
+ tp++;
+ ++nm;
+ }
+ }
+ }
+
+
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+
+#ifdef HAVE_LIBBZ2
+ BZ2_bzReadClose( &bzerror, b);
+#endif
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 2, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+ // read in helicos tab-separated alignment output (regular or bz2)
+ SEXP read_helicostabf(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<int> > poslen; // length of the match
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+ boost::char_separator<char> sep2(",");
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n";
+ } else {
+#ifdef HAVE_LIBBZ2
+ BZFILE* b=0;
+ int bzerror;
+
+ int bz2file=0;
+ if(strstr(fname,".bz2")) {
+ bz2file=1;
+ b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+ if (bzerror != BZ_OK) { cout<<"bzerror="<<bzerror<<endl; }
+ }
+#endif
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ int nlines=0;
+#ifdef HAVE_LIBBZ2
+ while(get_a_line(f,b,bz2file,line)) {
+#else
+ while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+ nlines++;
+ // skip comments
+ if(line[0]=='#') { continue; }
+ if(line.compare(0,12,"Reference_ID")==0) {
+#ifdef DEBUG
+ Rprintf("matched header on line %d\n",nlines);
+#endif
+ continue;
+ }
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string chr=*sit++;
+ string tagname=*sit++;
+ string str_startpos=*sit++;
+ string str_endpos=*sit++;
+
+ string str_tstart=*sit++;
+ string str_tend=*sit++;
+ int len=atoi(str_tend.c_str())-atoi(str_tstart.c_str());
+
+ sit++; sit++;
+ string str_ndel=*sit++;
+ string str_nins=*sit++;
+ string str_nsub=*sit++;
+
+ string str_strand=*sit++;
+ int fpos;
+ if(str_strand[0]=='-') {
+ fpos=-1*atoi(str_endpos.c_str());
+ } else {
+ fpos=atoi(str_startpos.c_str());
+ }
+
+ // determine number of mismatches
+ int nm=atoi(str_ndel.c_str())+atoi(str_nins.c_str())+atoi(str_nsub.c_str());
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ poslen.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ (poslen[cind]).push_back(len);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d\n",chr.c_str(),cind,fpos,nm);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+
+#ifdef HAVE_LIBBZ2
+ BZ2_bzReadClose( &bzerror, b);
+#endif
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<int> >::const_iterator lsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+ lsi=poslen.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 3+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 3, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,lv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(lv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+ int* i_lv=INTEGER(lv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ vector<int>::const_iterator lni=lsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i_lv[i]=*lni++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 3+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ SET_VECTOR_ELT(dv, 2, lv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 3, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+
+ // read in text version of maq map
+ SEXP read_maqmap(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string tagname=*sit++;
+ string chr=*sit++;
+ string str_pos=*sit++;
+ int fpos=atoi(str_pos.c_str());
+ string str_strand=*sit++;
+ sit++; sit++; sit++; sit++; sit++;
+ string str_nm=*sit++;
+ sit++; sit++; sit++;
+ string str_len=*sit++;
+ int nm=atoi(str_nm.c_str());
+ int len=atoi(str_len.c_str());
+
+ if(str_strand[0]=='-') {
+ fpos=-1*(fpos+len-1);
+ }
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 2, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+
+
+
+ // read in tagalign file
+ SEXP read_tagalign(SEXP filename) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t");
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(get_a_line(f,line)) {
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string chr=*sit++;
+ string str_spos=*sit++;
+ string str_epos=*sit++;
+ sit++;
+ string str_qual=*sit++;
+ string str_strand=*sit;
+
+ int fpos;
+ if(str_strand[0]=='+') {
+ fpos=atoi(str_spos.c_str());
+ } else {
+ fpos=-1*atoi(str_epos.c_str());
+ }
+ int nm=atoi(str_qual.c_str());
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d\n",chr.c_str(),cind,fpos,nm);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+
+
+ SEXP tv,nv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ PROTECT(dv = allocVector(VECSXP, 2)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+
+
+ // arachne madness
+ SEXP read_arachne(SEXP filename) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t");
+
+
+
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+
+#ifdef HAVE_LIBBZ2
+ BZFILE* b=0;
+ int bzerror;
+
+ int bz2file=0;
+ if(strstr(fname,".bz2")) {
+ bz2file=1;
+ b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+ if (bzerror != BZ_OK) { cout<<"bzerror="<<bzerror<<endl; }
+ }
+#endif
+
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+#ifdef HAVE_LIBBZ2
+ while(get_a_line(f,b,bz2file,line)) {
+#else
+ while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string chr=*sit++;
+ string str_spos=*sit++;
+ int nm=0;
+ if(sit!=tok.end()) {
+ string str_mm=*sit;
+ nm=atoi(str_mm.c_str());
+ }
+
+ int fpos=atoi(str_spos.c_str());;
+
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d\n",chr.c_str(),cind,fpos,nm);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+#ifdef HAVE_LIBBZ2
+ BZ2_bzReadClose( &bzerror, b);
+#endif
+
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+
+
+ SEXP tv,nv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ PROTECT(dv = allocVector(VECSXP, 2)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+ // arachne madness
+ SEXP read_arachne_long(SEXP filename) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<int> > poslen; // length of the match
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ typedef boost::tokenizer<boost::char_separator<char> > tokType;
+ boost::char_separator<char> sep(" \t");
+
+
+
+
+
+ FILE *f=fopen(fname,"rb");
+ if (!f) { cout<<"can't open input file \""<<fname<<"\"\n"; }
+ else {
+
+#ifdef HAVE_LIBBZ2
+ BZFILE* b=0;
+ int bzerror;
+
+ int bz2file=0;
+ if(strstr(fname,".bz2")) {
+ bz2file=1;
+ b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+ if (bzerror != BZ_OK) { cout<<"bzerror="<<bzerror<<endl; }
+ }
+#endif
+
+
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+#ifdef HAVE_LIBBZ2
+ while(get_a_line(f,b,bz2file,line)) {
+#else
+ while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG
+ Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+ tokType tok(line, sep);
+ tokType::iterator sit=tok.begin();
+ if(sit!=tok.end()) {
+ string query=*sit++;
+ if(query!="QUERY") { continue; }
+ *sit++; *sit++; *sit++; *sit++;
+ string str_strand=*sit++;
+ string chr=*sit++;
+ string str_startpos=*sit++;
+ string str_endpos=*sit++;
+
+ int fpos;
+ if(str_strand[0]=='1') {
+ fpos=-1*atoi(str_endpos.c_str());
+ } else {
+ fpos=atoi(str_startpos.c_str());
+ }
+#ifdef DEBUG
+ Rprintf("chr=%s, fpos=%d\n",chr.c_str(),fpos);
+#endif
+ *sit++;
+ string str_nblocks=*sit++;
+ int nblocks=atoi(str_nblocks.c_str());
+#ifdef DEBUG
+ Rprintf("nblocks=%d\n",nblocks);
+#endif
+ // tally up the read length and the number of mismatches for all blocks
+ int len=0; int nm=0;
+ for(int i=0;i<nblocks;i++) {
+ string str_sgs=*sit++;
+ int sgs=atoi(str_sgs.c_str());
+ string str_slen=*sit++;
+ int slen=atoi(str_slen.c_str());
+ string str_snm=*sit++;
+ int snm=atoi(str_snm.c_str());
+#ifdef DEBUG
+ Rprintf("sgs=%d, slen=%d, snm=%d\n",sgs,slen,snm);
+#endif
+ len+=slen;
+ nm+=abs(sgs)+snm;
+ }
+ nm+=nblocks-1;
+
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ poslen.push_back(vector<int>());
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ (poslen[cind]).push_back(len);
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ }
+#ifdef HAVE_LIBBZ2
+ BZ2_bzReadClose( &bzerror, b);
+#endif
+
+ fclose(f);
+
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<int> >::const_iterator lsi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+ lsi=poslen.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 3)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+
+
+ SEXP tv,nv,lv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(lv=allocVector(INTSXP,csi->size())); np++;
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+ int* i_lv=INTEGER(lv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ vector<int>::const_iterator lni=lsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i_lv[i]=*lni++;
+ i++;
+ }
+ PROTECT(dv = allocVector(VECSXP, 3)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ SET_VECTOR_ELT(dv, 2, lv);
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+}
+
+
+
+
+/* PASTE REMAINDER AT BOTTOM OF FILE */
+ssize_t getline_local(char **linep, size_t *np, FILE *stream)
+{
+ char *p = NULL;
+ size_t i = 0;
+
+ if (!linep || !np) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!(*linep) || !(*np)) {
+ *np = 120;
+ *linep = (char *)malloc(*np);
+ if (!(*linep)) {
+ return -1;
+ }
+ }
+
+ flockfile(stream);
+
+ p = *linep;
+ for (int ch = 0; (ch = getc_unlocked(stream)) != EOF;) {
+ if (i >= *np) {
+ /* Grow *linep. */
+ size_t m = *np * 2;
+ char *s = (char *)realloc(*linep, m);
+
+ if (!s) {
+ int error = errno;
+ funlockfile(stream);
+ errno = error;
+ return -1;
+ }
+
+ *linep = s;
+ *np = m;
+
+ p = *linep;
+
+ }
+
+ p[i] = ch;
+ if ('\n' == ch) break;
+ i += 1;
+ }
+ funlockfile(stream);
+
+ /* Null-terminate the string. */
+ if (i >= *np) {
+ /* Grow *linep. */
+ size_t m = *np * 2;
+ char *s = (char *)realloc(*linep, m);
+
+ if (!s) {
+ return -1;
+ }
+
+ *linep = s;
+ *np = m;
+ p = *linep;
+
+ }
+
+ p[i + 1] = '\0';
+ return ((i > 0)? i : -1);
+}
diff --git a/src/cdensum.c b/src/cdensum.c
new file mode 100644
index 0000000..d764727
--- /dev/null
+++ b/src/cdensum.c
@@ -0,0 +1,185 @@
+#include <math.h>
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+
+
+#undef DEBUG 1
+
+// dout is npos-length output array.
+// n - number of positions in pos (and length of tc count array)
+// spos - starting position
+void cdensum(int *n, double *pos, double *tc, double *spos, int *bw,int *dw, int *npos, int *step,double *dout)
+{
+ int i,j;
+
+ double epos= *spos + ((double) *npos);
+ double dbw=(double) *bw;
+ for(i = 0; i< *n; i++) {
+ // size of the window to which the contributions should be added
+ int in=(int) (pos[i]- *spos);
+ int ic=tc[i];
+ int whs=(*dw)*(*bw)*ic;
+ int ws=(int) floor((in-whs)/(*step));
+ int we=(int) ceil((in+whs)/(*step));
+ if(ws<0) { ws=0; }
+ if(we>= *npos) { we= *npos -1; }
+
+ for(j=ws;j<we;j++) {
+ double beta=((double)(j*(*step)-in))/dbw;
+ dout[j]+=((double)ic)*exp(-0.5*beta*beta);
+ }
+ }
+}
+// .Call version of the above
+SEXP ccdensum(SEXP pos_R, SEXP tc_R, SEXP spos_R, SEXP bw_R, SEXP dw_R, SEXP npos_R, SEXP step_R) {
+ double* pos=REAL(pos_R);
+ double* tc=REAL(tc_R);
+ int n=LENGTH(pos_R);
+ int bw=*INTEGER(bw_R);
+ int dw=*INTEGER(dw_R);
+ double spos=*REAL(spos_R);
+ int npos=*INTEGER(npos_R);
+ int step=*INTEGER(step_R);
+ int i,j;
+
+ // allocate return array
+ SEXP out_R;
+ PROTECT(out_R=allocVector(REALSXP,npos));
+ double* dout=REAL(out_R);
+ for(i = 0; i< npos; i++) {
+ dout[i]=0;
+ }
+
+
+
+ double dbw=(double) bw;
+ for(i = 0; i< n; i++) {
+ // size of the window to which the contributions should be added
+ int in=(int) (pos[i]- spos);
+ int ic=tc[i];
+ int whs=dw*bw*ic;
+ int ws=(int) floor((in-whs)/(step));
+ int we=(int) ceil((in+whs)/(step));
+ if(ws<0) { ws=0; }
+ if(we>= npos) { we= npos -1; }
+
+ for(j=ws;j<we;j++) {
+ double beta=((double)(j*(step)-in))/dbw;
+ dout[j]+=((double)ic)*exp(-0.5*beta*beta);
+ }
+ }
+ UNPROTECT(1);
+ return(out_R);
+}
+
+
+// window tag counts
+// dout is npos-length output array that will contain window tag counts
+// windows are of a specified size, moved at a specified step
+// n - number of positions in sorted tag array (positive only)
+// spos - starting position
+void window_n_tags(int *n, double *pos, double *spos, int *window_size, int *window_step, int *npos, int *dout)
+{
+ int i;
+ int cs=0; int ce=0; // current array start/end indecies
+ int ctc=0; // current tag count
+ double wpos=*spos-(*window_size)/2; // left-edge position
+ //Rprintf("n=%d; window_size=%d, window_step=%d, npos=%d, spos=%f\n",*n,*window_size,*window_step,*npos,*spos);
+ for(i=0;i<*npos;i++) {
+ // advance end if needed
+ double ep=wpos+(*window_size);
+ while(ce<(*n) && pos[ce]<=ep) {
+ ctc++; ce++;
+ }
+ // advance start
+ while(cs<*n && pos[cs]<wpos) {
+ ctc--; cs++;
+ }
+ dout[i]=ctc;
+ // advance window position
+ wpos+=*window_step;
+ }
+}
+
+// window tag counts
+// windows are of a specified size, moved at a specified step
+// pos - tag positions (positive, pre-shifted)y
+// spos - starting position
+// returns nsteps-length output array that will contain window tag counts
+SEXP cwindow_n_tags(SEXP pos_R, SEXP spos_R, SEXP window_size_R, SEXP window_step_R, SEXP nsteps_R) {
+ double* pos=REAL(pos_R);
+ int n=LENGTH(pos_R);
+ int window_size=*INTEGER(window_size_R);
+ int window_step=*INTEGER(window_step_R);
+ int nsteps=*INTEGER(nsteps_R);
+ double spos=*REAL(spos_R);
+
+ // allocate return array
+ SEXP tc_R;
+ PROTECT(tc_R=allocVector(INTSXP,nsteps));
+ int* dout=INTEGER(tc_R);
+
+ int i;
+ int cs=0; int ce=0; // current array start/end indecies
+ int ctc=0; // current tag count
+ double wpos=spos-window_size/2; // left-edge position
+ //Rprintf("n=%d; window_size=%d, window_step=%d, npos=%d, spos=%f\n",n,window_size,window_step,nsteps,spos);
+ for(i=0;i<nsteps;i++) {
+ // advance end if needed
+ double ep=wpos+window_size;
+ while(ce<n && pos[ce]<=ep) {
+ ctc++; ce++;
+ }
+ // advance start
+ while(cs<n && pos[cs]<wpos) {
+ ctc--; cs++;
+ }
+ dout[i]=ctc;
+ // advance window position
+ wpos+=window_step;
+ }
+ UNPROTECT(1);
+ return(tc_R);
+}
+
+// tag counts in windows around specified positions
+// pos - tag positions
+// ntags - number of tags in each position
+// wpos - window positions
+// returns a pos-length vector giving number of tags that fall within window_half_size from the provided positions
+SEXP cwindow_n_tags_around(SEXP pos_R, SEXP ntags_R, SEXP wpos_R, SEXP window_half_size_R) {
+ double* pos=REAL(pos_R);
+ int* ntags=INTEGER(ntags_R);
+ int n=LENGTH(pos_R);
+ double* wpos=REAL(wpos_R);
+ int nw=LENGTH(wpos_R); // number of windows
+ double whs=(double) *INTEGER(window_half_size_R);
+
+ // allocate return array
+ SEXP tc_R;
+ PROTECT(tc_R=allocVector(INTSXP,nw));
+ int* dout=INTEGER(tc_R);
+
+ int i;
+ int cs=0; int ce=0; // current array start/end indecies
+ int ctc=0; // current tag count
+ for(i=0;i<nw;i++) {
+ //if(i>(nw-2)) { Rprintf("-i=%d; cs=%d, ce=%d; ctc=%d\n",i,cs,ce,ctc); }
+ // advance end if needed
+ double ep=wpos[i]+whs;
+ while(ce<n && pos[ce]<=ep) {
+ ctc+=ntags[ce]; ce++;
+ }
+ // advance start
+ double sp=wpos[i]-whs;
+ while(cs<n && pos[cs]<sp) {
+ ctc-=ntags[cs]; cs++;
+ }
+ dout[i]=ctc;
+ // if(i>(nw-2)) { Rprintf("+i=%d; cs=%d, ce=%d; ctc=%d\n",i,cs,ce,ctc); }
+ }
+ UNPROTECT(1);
+ return(tc_R);
+}
+
diff --git a/src/const.h b/src/const.h
new file mode 100644
index 0000000..2a06313
--- /dev/null
+++ b/src/const.h
@@ -0,0 +1,18 @@
+#ifndef NST_CONST_H
+#define NST_CONST_H
+
+#define MAX_ULL 0xffffffffffffffffull
+
+typedef unsigned long long bit64_t;
+typedef unsigned bit32_t;
+typedef unsigned short bit16_t;
+typedef unsigned char bit8_t;
+
+extern bit8_t nst_nt4_table[];
+extern bit8_t nst_nt16_table[];
+extern char *nst_nt4_rev_table;
+extern char *nst_nt16_rev_table;
+extern bit8_t nst_nt16_nt4_table[];
+extern int nst_nt16_count_table[];
+
+#endif
diff --git a/src/maqmap.c b/src/maqmap.c
new file mode 100644
index 0000000..96b4fff
--- /dev/null
+++ b/src/maqmap.c
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include "const.h"
+#include "maqmap.h"
+
+maqmap_t *maq_new_maqmap()
+{
+ maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t));
+ mm->format = MAQMAP_FORMAT_NEW;
+ return mm;
+}
+void maq_delete_maqmap(maqmap_t *mm)
+{
+ int i;
+ if (mm == 0) return;
+ for (i = 0; i < mm->n_ref; ++i)
+ free(mm->ref_name[i]);
+ free(mm->ref_name);
+ free(mm->mapped_reads);
+ free(mm);
+}
+void maqmap_write_header(gzFile fp, const maqmap_t *mm)
+{
+ int i, len;
+ gzwrite(fp, &mm->format, sizeof(int));
+ gzwrite(fp, &mm->n_ref, sizeof(int));
+ for (i = 0; i != mm->n_ref; ++i) {
+ len = strlen(mm->ref_name[i]) + 1;
+ gzwrite(fp, &len, sizeof(int));
+ gzwrite(fp, mm->ref_name[i], len);
+ }
+ gzwrite(fp, &mm->n_mapped_reads, sizeof(bit64_t));
+}
+maqmap_t *maqmap_read_header(gzFile fp)
+{
+ maqmap_t *mm;
+ int k, len;
+ mm = maq_new_maqmap();
+ gzread(fp, &mm->format, sizeof(int));
+ if (mm->format != MAQMAP_FORMAT_NEW) {
+ if (mm->format > 0) {
+ fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n");
+ exit(3);
+ }
+ assert(mm->format == MAQMAP_FORMAT_NEW);
+ }
+ gzread(fp, &mm->n_ref, sizeof(int));
+ mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
+ for (k = 0; k != mm->n_ref; ++k) {
+ gzread(fp, &len, sizeof(int));
+ mm->ref_name[k] = (char*)malloc(len * sizeof(char));
+ gzread(fp, mm->ref_name[k], len);
+ }
+ /* read number of mapped reads */
+ gzread(fp, &mm->n_mapped_reads, sizeof(bit64_t));
+ return mm;
+}
+
+/* mapvalidate */
+
+static void mapvalidate_core(gzFile fpin)
+{
+ maqmap_t *m = maqmap_read_header(fpin);
+ maqmap1_t *m1, mm1;
+ bit64_t n = 0;
+ int i, l;
+ bit64_t *cnt;
+ m1 = &mm1;
+ cnt = (bit64_t*)calloc(m->n_ref, 8);
+ printf("[message] number of reference sequences: %d\n", m->n_ref);
+ while ((l = maqmap_read1(fpin, m1)) != 0) {
+ if (l != sizeof(maqmap1_t)) {
+ printf("[fatal error] truncated map file.\n");
+ break;
+ }
+ ++n;
+ if ((int)m1->seqid >= m->n_ref) {
+ printf("[fatal error] maqmap1_t::seqid is invalid (%d >= %d).\n", m1->seqid, m->n_ref);
+ break;
+ }
+ ++cnt[m1->seqid];
+ if (m1->size >= MAX_READLEN - 1) {
+ printf("[faltal error] maqmap1_t::size is invalid (%d >= %d).\n", m1->size, MAX_READLEN - 1);
+ break;
+ }
+ }
+ if (m->n_mapped_reads != 0) {
+ if (m->n_mapped_reads != n) {
+ printf("[warning] maqmap1_t::n_mapped_reads is set, but not equals the real number (%llu != %llu).\n",
+ m->n_mapped_reads, n);
+ }
+ }
+ for (i = 0; i != m->n_ref; ++i)
+ printf("[message] %s : %llu\n", m->ref_name[i], cnt[i]);
+ free(cnt);
+ maq_delete_maqmap(m);
+}
+
+/* mapview */
+
+static void mapview_core(FILE *fpout, gzFile fpin, int is_verbose, int is_mm)
+{
+ bit32_t j;
+ maqmap_t *m = maqmap_read_header(fpin);
+ maqmap1_t *m1, mm1;
+ m1 = &mm1;
+ while (maqmap_read1(fpin, m1)) {
+ fprintf(fpout, "%s\t%s\t%d\t%c\t%d\t%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",
+ m1->name, m->ref_name[m1->seqid], (m1->pos>>1) + 1,
+ (m1->pos&1)? '-' : '+', m1->dist, m1->flag, m1->map_qual, (signed char)m1->seq[MAX_READLEN-1],
+ m1->alt_qual, m1->info1&0xf, m1->info2, m1->c[0], m1->c[1], m1->size);
+ if (is_verbose) {
+ fputc('\t', fpout);
+ for (j = 0; j != m1->size; ++j) {
+ if (m1->seq[j] == 0) fputc('n', fpout);
+ else if ((m1->seq[j]&0x3f) < 27) fputc("acgt"[m1->seq[j]>>6&3], fpout);
+ else fputc("ACGT"[m1->seq[j]>>6&3], fpout);
+ }
+ fputc('\t', fpout);
+ for (j = 0; j != m1->size; ++j)
+ fputc((m1->seq[j]&0x3f) + 33, fpout);
+ }
+ if (is_mm) {
+ bit64_t *p = (bit64_t*)(m1->seq + 55);
+ fprintf(fpout, "\t%llx", *p);
+ }
+ fputc('\n', fpout);
+ }
+ maq_delete_maqmap(m);
+}
+
+int ma_mapview(int argc, char *argv[])
+{
+ int c, is_verbose = 1, is_mm = 0;
+ while ((c = getopt(argc, argv, "bN")) >= 0) {
+ switch (c) {
+ case 'b': is_verbose = 0; break;
+ case 'N': is_mm = 1; break;
+ }
+ }
+ if (argc == optind) {
+ fprintf(stderr, "Usage: maq mapview [-bN] <in.map>\n");
+ return 1;
+ }
+ gzFile fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[optind], "r");
+ mapview_core(stdout, fp, is_verbose, is_mm);
+ gzclose(fp);
+ return 0;
+}
+
+int ma_mapvalidate(int argc, char *argv[])
+{
+ gzFile fp;
+ if (argc < 2) {
+ fprintf(stderr, "Usage: maq mapvalidate <in.map>\n");
+ return 1;
+ }
+ fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[1], "r");
+ mapvalidate_core(fp);
+ gzclose(fp);
+ return 0;
+}
diff --git a/src/maqmap.h b/src/maqmap.h
new file mode 100644
index 0000000..9beba0c
--- /dev/null
+++ b/src/maqmap.h
@@ -0,0 +1,70 @@
+#ifndef MAQMAP_H_
+#define MAQMAP_H_
+
+#ifdef MAQ_LONGREADS
+# define MAX_READLEN 128
+#else
+# define MAX_READLEN 64
+#endif
+
+#define MAX_NAMELEN 36
+#define MAQMAP_FORMAT_OLD 0
+#define MAQMAP_FORMAT_NEW -1
+
+#define PAIRFLAG_FF 0x01
+#define PAIRFLAG_FR 0x02
+#define PAIRFLAG_RF 0x04
+#define PAIRFLAG_RR 0x08
+#define PAIRFLAG_PAIRED 0x10
+#define PAIRFLAG_DIFFCHR 0x20
+#define PAIRFLAG_NOMATCH 0x40
+#define PAIRFLAG_SW 0x80
+
+#include <string.h>
+#include <zlib.h>
+#include "const.h"
+
+/*
+ name: read name
+ size: the length of the read
+ seq: read sequence (see also below)
+ seq[MAX_READLEN-1]: single end mapping quality (equals to map_qual if not paired)
+ map_qual: the final mapping quality
+ alt_qual: the lower quality of the two ends (equals to map_qual if not paired)
+ flag: status of the pair
+ dist: offset of the mate (zero if not paired)
+ info1: mismatches in the 24bp (higher 4 bits) and mismatches (lower 4 bits)
+ info2: sum of errors of the best hit
+ c[2]: count of all 0- and 1-mismatch hits on the reference
+ */
+typedef struct
+{
+ bit8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */
+ bit8_t size, map_qual, info1, info2, c[2], flag, alt_qual;
+ bit32_t seqid, pos;
+ int dist;
+ char name[MAX_NAMELEN];
+} maqmap1_t;
+
+typedef struct
+{
+ int format, n_ref;
+ char **ref_name;
+ bit64_t n_mapped_reads;
+ maqmap1_t *mapped_reads;
+} maqmap_t;
+
+#define maqmap_read1(fp, m1) gzread((fp), (m1), sizeof(maqmap1_t))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ maqmap_t *maq_new_maqmap();
+ void maq_delete_maqmap(maqmap_t *mm);
+ void maqmap_write_header(gzFile fp, const maqmap_t *mm);
+ maqmap_t *maqmap_read_header(gzFile fp);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/maqread.cpp b/src/maqread.cpp
new file mode 100644
index 0000000..896068f
--- /dev/null
+++ b/src/maqread.cpp
@@ -0,0 +1,207 @@
+#include "pc.h"
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <strstream>
+#include <algorithm>
+#include <string>
+#include <functional>
+#include <utility>
+#include <zlib.h>
+
+extern "C" {
+//#include "R.h"
+//#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+#include "maqmap.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx;
+
+
+class lessAbsoluteValue {
+public:
+ bool operator()(int a, int b) const {
+ return abs(a) < abs(b);
+ }
+};
+
+
+
+//#define DEBUG 1
+
+extern "C" {
+
+ // read in text version of maq map
+ SEXP read_binmaqmap(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ const char* fname=CHAR(asChar(filename));
+ int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG
+ Rprintf("fname=%s\n",fname);
+#endif
+
+ // main data vector
+ // chr - pos
+ vector< vector<int> > pos;
+ vector< vector<int> > posnm; // number of mismatches
+ vector< vector<string> > tagnames;
+
+ // chromosome map
+ hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+ vector<string> cnames;
+
+
+ gzFile f=gzopen(fname,"r");
+
+ maqmap_t *m = maqmap_read_header(f);
+ maqmap1_t *m1, mm1;
+ m1 = &mm1;
+
+ if (!f) {
+ cout<<"can't open input file \""<<fname<<"\"\n";
+ } else {
+ Rprintf("opened %s\n",fname);
+
+ // read in bed line
+ string line;
+ int fcount=0;
+ while(maqmap_read1(f, m1)) {
+ string tagname=string(m1->name);
+ string chr=string(m->ref_name[m1->seqid]);
+ int len=m1->size;
+ int fpos=(m1->pos>>1) + 1;
+ if(m1->pos&1) {
+ fpos=-1*(fpos+len-1);
+ }
+ int nm=m1->info1&0xf;
+
+#ifdef DEBUG
+ Rprintf("read in map line chr=%s tagname=%s fpos=%d, nm=%d, len=%d\n",chr.c_str(),tagname.c_str(),fpos,nm,len);
+#endif
+
+
+ // determine the chromosome index
+ hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+ int cind=-1;
+ if(li==cind_map.end()) {
+ // register new chromosome
+ cind=cnames.size();
+ cnames.push_back(chr);
+ cind_map[chr]=cind;
+ // allocate new pos vector
+ pos.push_back(vector<int>());
+ posnm.push_back(vector<int>());
+ if(read_names) {
+ tagnames.push_back(vector<string>());
+ }
+#ifdef DEBUG
+ Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+ } else {
+ cind=li->second;
+ }
+ fcount++;
+ (pos[cind]).push_back(fpos);
+ (posnm[cind]).push_back(nm);
+ if(read_names) {
+ (tagnames[cind]).push_back(tagname);
+ }
+#ifdef DEBUG
+ Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+ if(fcount>30) {
+ break;
+ }
+#endif
+
+ }
+ gzclose(f);
+ Rprintf("done. read %d fragments\n",fcount);
+ }
+
+
+ // construct output structures
+ SEXP chnames;
+ int np=0; // number of protections
+ PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+ for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+ SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+ }
+ np++;
+
+ // sort
+ //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+ // sort(csi->begin(), csi->end(), lessAbsoluteValue());
+ //}
+
+ SEXP ans;
+ PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
+ vector<vector<int> >::const_iterator nsi;
+ vector<vector<string> >::const_iterator ssi;
+ for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+ nsi=posnm.begin()+(csi-pos.begin());
+
+ SEXP dv,dnames_R;
+ PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+ SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+ SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+ if(read_names) {
+ SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+ }
+
+
+
+ SEXP tv,nv,sv;
+ PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
+ PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
+ if(read_names) {
+ PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
+ }
+ int* i_tv=INTEGER(tv);
+ int* i_nv=INTEGER(nv);
+
+ int i=0;
+ vector<int>::const_iterator ini=nsi->begin();
+ for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+ i_tv[i]=*pi;
+ i_nv[i]=*ini++;
+ i++;
+ }
+ if(read_names) {
+ int i=0;
+ ssi=tagnames.begin()+(csi-pos.begin());
+ for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+ SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+ i++;
+ }
+ }
+ PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
+ SET_VECTOR_ELT(dv, 0, tv);
+ SET_VECTOR_ELT(dv, 1, nv);
+ if(read_names) {
+ SET_VECTOR_ELT(dv, 2, sv);
+ }
+ setAttrib(dv, R_NamesSymbol, dnames_R);
+
+ SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+ }
+
+ setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG
+ Rprintf("unprotecting %d elements\n",np);
+#endif
+
+ UNPROTECT(np);
+ return(ans);
+}
+
+
+}
diff --git a/src/pc.h b/src/pc.h
new file mode 100644
index 0000000..3611c97
--- /dev/null
+++ b/src/pc.h
@@ -0,0 +1,17 @@
+#ifndef PC_H
+#define PC_H 1
+#include <functional>
+#include <ext/hash_map>
+
+namespace __gnu_cxx
+{
+ template<> struct hash< std::string >
+ {
+ size_t operator()( const std::string& x ) const
+ {
+ return hash< const char* >()( x.c_str() );
+ }
+ };
+}
+
+#endif
diff --git a/src/peaks.cpp b/src/peaks.cpp
new file mode 100644
index 0000000..37770d5
--- /dev/null
+++ b/src/peaks.cpp
@@ -0,0 +1,806 @@
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <string>
+#include <set>
+#include <cmath>
+#include <cstdlib>
+
+extern "C" {
+//#include "R.h"
+//#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx;
+
+/**
+ * Calculate all local peaks
+ */
+
+//#define DEBUG 1
+
+extern "C" {
+ SEXP find_peaks(SEXP x_R,SEXP thr_R,SEXP max_span_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ double* x=REAL(x_R);
+ int nx=LENGTH(x_R);
+ int max_span=*INTEGER(max_span_R);
+ double thr=REAL(thr_R)[0];
+#ifdef DEBUG
+ Rprintf("n=%d; thr=%f; max_span=%d\n",nx,thr,max_span);
+#endif
+
+ vector<int> pos;
+
+ double pv=x[0];
+ double ppv=0; // previous peak value
+ int ppp=-max_span-1; // previous peak position
+
+ for(int i=1;i<(nx-1);i++) {
+ if(x[i]>pv && x[i]>=thr && x[i]>x[i+1]) {
+ if(max_span>2) {
+ //Rprintf("i=%d; ppp=%d\n",i,ppp);
+ if(i-ppp > max_span) {
+ if(ppp>=0) {
+ pos.push_back(ppp);
+ }
+ //Rprintf("recorded %d; now %d\n",ppp,i);
+ ppp=i; ppv=x[i];
+ } else {
+ if(x[i]>ppv) {
+ //Rprintf("reset from %d to %d\n",ppp,i);
+ ppp=i; ppv=x[i];
+ }
+ }
+ } else {
+ pos.push_back(i);
+ }
+ }
+ if(x[i]!=x[i+1]) { pv=x[i]; }
+ }
+
+ // add remaining peak
+ if(max_span>2 && ppp>=0) {
+ pos.push_back(ppp);
+ }
+
+ SEXP nv;
+ PROTECT(nv=allocVector(INTSXP,pos.size()));
+ int* i_nv=INTEGER(nv);
+ int i=0;
+ for(vector<int> ::const_iterator pi=pos.begin();pi!=pos.end();++pi) {
+ i_nv[i++]=1+(*pi);
+ }
+
+ UNPROTECT(1);
+ return(nv);
+ }
+
+
+
+
+ /************************************************************************/
+ // given a data vector d (positive values) and a set of signed center coordinates pos,
+ // returns coordinates of data points relative to the centers
+ // size is the size of the region around the centers
+ // return: vector of relative coordinates (x) and indecies of centers relative the coordinate
+ // was calculated (i).
+ SEXP get_relative_coordinates(SEXP d_R,
+ SEXP pos_R,
+ SEXP size_R)
+ {
+ int *d, *pos;
+ int npos,nd,size;
+
+ d = INTEGER(d_R); pos = INTEGER(pos_R);
+ npos=LENGTH(pos_R); nd=LENGTH(d_R);
+ size = INTEGER(size_R)[0];
+#ifdef DEBUG
+ Rprintf("|d|=%d, |c|=%d, size=%d\n",nd,npos,size);
+#endif
+
+ vector<int> x; vector<int> xi;
+ int k=0; // current pos index
+
+ for(int i=0;i<nd;i++) {
+ // increment k until pos[k]+size>=d[i]
+ while((std::abs(pos[k])+size) < d[i]) { k++; if(k==npos) { break; };
+#ifdef DEBUG
+ Rprintf("advancing k to %d\n",k);
+#endif
+ }
+ if(k==npos) { break; };
+ // increment i until d[i]>=pos[k]-size
+ while((std::abs(pos[k])-size) > d[i]) { i++; if(i==nd) { break; }
+#ifdef DEBUG
+ Rprintf("advancing i to %d\n",i);
+#endif
+ }
+ if(i==nd) { break; }
+
+
+ int l=k;
+ while((l<npos) && ((std::abs(pos[l])-size) <= d[i])) { l++;
+#ifdef DEBUG
+ Rprintf("advancing l to %d\n",l);
+#endif
+ }
+ for(int j=k;j<l;j++) {
+ int pd=d[i]-std::abs(pos[j]);
+ if(std::abs(pd)<=size) {
+ // record
+ if(pos[j]>0) {
+ x.push_back(pd);
+ } else {
+ x.push_back(-1*pd);
+ }
+ xi.push_back(j);
+#ifdef DEBUG
+ Rprintf("recorded i=%d, j=%d\n",i,j);
+#endif
+ } else {
+ break;
+ }
+ }
+ }
+
+ SEXP xv_R,xiv_R;
+ PROTECT(xv_R=allocVector(INTSXP,x.size()));
+ PROTECT(xiv_R=allocVector(INTSXP,x.size()));
+ int* xv=INTEGER(xv_R);
+ int* xiv=INTEGER(xiv_R);
+
+ int i=0;
+ for(vector<int> ::const_iterator pi=x.begin();pi!=x.end();++pi) {
+ xv[i++]=*pi;
+ }
+ i=0;
+ for(vector<int> ::const_iterator pi=xi.begin();pi!=xi.end();++pi) {
+ xiv[i++]=1+(*pi);
+ }
+
+ SEXP ans_R, names_R;
+ PROTECT(names_R = allocVector(STRSXP, 2));
+ SET_STRING_ELT(names_R, 0, mkChar("x"));
+ SET_STRING_ELT(names_R, 1, mkChar("i"));
+
+ PROTECT(ans_R = allocVector(VECSXP, 2));
+ SET_VECTOR_ELT(ans_R, 0, xv_R);
+ SET_VECTOR_ELT(ans_R, 1, xiv_R);
+ setAttrib(ans_R, R_NamesSymbol, names_R);
+
+ UNPROTECT(4);
+ return(ans_R);
+ }
+
+
+ // determines a set of points within a set of fragments
+ // note: all vectors sorted in ascending order
+ // note: all vectors are integers
+ // x_R - vector of point positions
+ // se_R - vector of start and end positions
+ // fi_R - vector of signed fragment indecies
+ // return_list_R - whether a list of fragments should be returned for each point
+ // return_unique_R - whether points in multiple fragments should be omitted
+ SEXP points_within(SEXP x_R,SEXP se_R,SEXP fi_R,SEXP return_list_R,SEXP return_unique_R,SEXP return_point_counts_R) {
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ int* x=INTEGER(x_R);
+ int nx=LENGTH(x_R);
+ int* se=INTEGER(se_R);
+ int* fi=INTEGER(fi_R);
+ int nf=LENGTH(se_R);
+
+ int return_list=*(INTEGER(return_list_R));
+ int return_unique=*(INTEGER(return_unique_R));
+ int return_point_counts=*(INTEGER(return_point_counts_R));
+
+#ifdef DEBUG
+ Rprintf("nf=%d; nx=%d, return_list=%d, return_unique=%d, return_point_counts=%d\n",nf/2,nx,return_list,return_unique,return_point_counts);
+#endif
+ set<int> fset;
+
+
+ SEXP nv; int *i_nv;
+ int np=0;
+ if(return_point_counts) {
+ PROTECT(nv = allocVector(INTSXP, nf/2)); np++;
+ i_nv=INTEGER(nv);
+ for(int i=0;i<nf/2;i++) { i_nv[i]=0; }
+ } else if(return_list) {
+ PROTECT(nv = allocVector(VECSXP, nx)); np++;
+ } else {
+ PROTECT(nv=allocVector(INTSXP,nx)); np++;
+ i_nv=INTEGER(nv);
+ }
+
+ int j=0;
+
+ for(int i=0;i<nx;i++) {
+ // advance j
+ while(j<nf && se[j]<x[i]) {
+ int frag=fi[j];
+ if(frag>0) { // insert
+ fset.insert(frag);
+#ifdef DEBUG
+ Rprintf("inserted frag %d, size=%d\n",frag,fset.size());
+#endif
+ } else { // remove
+ fset.erase(-frag);
+#ifdef DEBUG
+ Rprintf("removed frag %d, size=%d\n",-frag,fset.size());
+#endif
+ }
+ j++;
+ }
+#ifdef DEBUG
+ Rprintf("i=%d j=%d\n",i,j);
+#endif
+ if(return_list) {
+ if(fset.empty() || (return_unique && fset.size()>1)) {
+ // assign null list?
+ } else {
+ SEXP fil_R;
+ PROTECT(fil_R=allocVector(INTSXP,fset.size())); np++;
+ int* fil=INTEGER(fil_R);
+ int k=0;
+ for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) {
+ fil[k]=*ki; k++;
+ }
+ SET_VECTOR_ELT(nv, i, fil_R);
+ UNPROTECT(1); np--;
+ }
+ } else {
+ if(return_point_counts) {
+ for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) {
+ i_nv[*ki-1]++;
+ }
+ } else {
+ if(fset.empty() || (return_unique && fset.size()>1)) {
+ i_nv[i]=-1;
+ } else {
+ i_nv[i]=*fset.begin();
+ }
+ }
+ }
+ }
+
+ UNPROTECT(np);
+ return(nv);
+ }
+
+
+ SEXP expuni_lr(SEXP x_R, // positions and their number (assumed sorted in ascending order)
+ SEXP mdist_R, // max distance at which points should be considered
+ SEXP lambda_R, // lambda value
+ SEXP spos_R, // starting position
+ SEXP epos_R, // ending position
+ SEXP step_R, // step size
+ SEXP return_peaks_R, // whether peak positions should be returned, or entire score vector
+ SEXP min_peak_lr_R // min peak height (lr)
+ )
+ {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ int* x=INTEGER(x_R);
+ int nx=LENGTH(x_R);
+ int mdist=INTEGER(mdist_R)[0];
+ double lambda=*(REAL(lambda_R));
+
+ int return_peaks=*(INTEGER(return_peaks_R));
+ double min_peak=*(REAL(min_peak_lr_R));
+
+ int spos=*(INTEGER(spos_R));
+ int epos=*(INTEGER(epos_R));
+ int step=*(INTEGER(step_R));
+
+ int nsteps=(int) (epos-spos)/step;
+
+
+#ifdef DEBUG
+ Rprintf("n=%d; lambda=%f; mdist=%d; spos=%d; epos=%d; step=%d; nsteps=%d\n",nx,lambda,mdist,spos,epos,step,nsteps);
+#endif
+
+
+ SEXP nv;
+ double *d_nv;
+ if(!return_peaks) {
+ PROTECT(nv=allocVector(REALSXP,nsteps+1));
+ d_nv=REAL(nv);
+ }
+
+
+ int i=0; // current index of the first point being used in the calculations
+ int j=0; // current index of the last point being used in the calculations
+ int sx=0; // current sum of all positions
+ int n=0;
+
+ for(int k=0; k<=nsteps; k++) {
+ int cpos=spos+k*step;
+ // increase i until x[i]>=cpos-mdist; remove x from sx; decrement n;
+ while(i<nx && x[i]<(cpos-mdist)) {
+ n--; sx-=x[i]; i++;
+ //Rprintf("incremented i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]);
+ }
+ //Rprintf("stable i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]);
+
+ //if(i>j) { j=i; }
+
+ // increase j until x[j]>cpos
+ while(j<nx && x[j]<=cpos) {
+ n++; sx+=x[j]; j++;
+ //Rprintf("incremented j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j-1]);
+ }
+ //Rprintf("stable j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j]);
+
+ // calculate lr
+ d_nv[k]=((double)(1-n))*log(lambda)-lambda*((double)(n*(cpos+1)-sx));
+ //Rprintf("recorded lr[%d]=%f\n",k-1,d_nv[k-1]);
+ }
+ UNPROTECT(1);
+ return(nv);
+ }
+
+
+ SEXP allpdist(SEXP x_R,SEXP max_dist_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ double* x=REAL(x_R);
+ int nx=LENGTH(x_R);
+ double max_dist=*REAL(max_dist_R);
+#ifdef DEBUG
+ Rprintf("n=%d; max_dist=%d\n",nx,max_dist);
+#endif
+
+ vector<double> dist;
+
+ for(int i=0;i<nx;i++) {
+ for(int j=i+1;j<nx;j++) {
+
+ double d=x[j]-x[i];
+#ifdef DEBUG
+ Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
+#endif
+ if(d<=max_dist) {
+ dist.push_back(d);
+ } else {
+ break;
+ }
+ }
+ }
+
+ SEXP nv;
+ PROTECT(nv=allocVector(REALSXP,dist.size()));
+ double* i_nv=REAL(nv);
+ int i=0;
+ for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) {
+ i_nv[i++]=*pi;
+ }
+
+ UNPROTECT(1);
+ return(nv);
+ }
+
+ // same as above, but for two different sets
+ SEXP allxpdist(SEXP x_R,SEXP y_R, SEXP max_dist_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ double* x=REAL(x_R);
+ double* y=REAL(y_R);
+ int nx=LENGTH(x_R);
+ int ny=LENGTH(y_R);
+ double max_dist=*REAL(max_dist_R);
+#ifdef DEBUG
+ Rprintf("nx=%d; ny=%d; max_dist=%d\n",nx,ny,max_dist);
+#endif
+
+ vector<double> dist;
+ int yi=0; // latest y start index
+
+ for(int i=0;i<nx;i++) {
+ // adjust yi so that yi>=x[i]-max_dist_R
+ while(y[yi]<(x[i]-max_dist) && yi<ny) { yi++; }
+ if(yi==ny) { break; }
+
+ for(int j=yi;j<ny;j++) {
+ double d=y[j]-x[i];
+#ifdef DEBUG
+ Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
+#endif
+ if(d<=max_dist) {
+ dist.push_back(d);
+ } else {
+ break;
+ }
+ }
+ }
+
+ SEXP nv;
+ PROTECT(nv=allocVector(REALSXP,dist.size()));
+ double* i_nv=REAL(nv);
+ int i=0;
+ for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) {
+ i_nv[i++]=*pi;
+ }
+
+ UNPROTECT(1);
+ return(nv);
+ }
+
+ // returns a vector giving for each point,
+ // number of points within a given max_dist
+ SEXP nwithindist(SEXP x_R,SEXP max_dist_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ double* x=REAL(x_R);
+ int nx=LENGTH(x_R);
+ double max_dist=*REAL(max_dist_R);
+
+ SEXP nv;
+ PROTECT(nv=allocVector(REALSXP,nx));
+ double* i_nv=REAL(nv);
+ for(int i=0;i<nx;i++) { i_nv[i]=0; }
+
+#ifdef DEBUG
+ Rprintf("n=%d; max_dist=%d\n",nx,max_dist);
+#endif
+
+ for(int i=0;i<nx;i++) {
+ for(int j=i+1;j<nx;j++) {
+
+ double d=x[j]-x[i];
+#ifdef DEBUG
+ Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
+#endif
+ if(d<=max_dist) {
+ i_nv[i]++;
+ i_nv[j]++;
+ } else {
+ break;
+ }
+ }
+ }
+
+ UNPROTECT(1);
+ return(nv);
+ }
+
+
+
+
+ // given a list of sorted chromosome signal and background vectors (unscaled), determine
+ // cluster contigs exceeding thr poisson P value, based on a whs window size,
+ // and satisfying mcs cluster size
+ SEXP find_poisson_enrichment_clusters(SEXP pos_R,SEXP flag_R,SEXP wsize_R,SEXP thr_R,SEXP mcs_R,SEXP bgm_R,SEXP mintag_R,SEXP either_R) {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+ double* pos=REAL(pos_R);
+ int* flag=INTEGER(flag_R);
+ int nt=LENGTH(pos_R);
+
+ int mcs=*INTEGER(mcs_R);
+ int wsize=*INTEGER(wsize_R);
+ int either=*INTEGER(either_R);
+ double thr=REAL(thr_R)[0];
+ double bgm=REAL(bgm_R)[0];
+ double mintag=REAL(mintag_R)[0];
+
+#ifdef DEBUG
+ Rprintf("nt=%d; wsize=%d; thr=%f; mcs=%d; min.tag=%f; bgm=%f\n",nt,wsize,thr,mcs,mintag,bgm);
+#endif
+
+
+ vector< pair<double,double> > contigs;
+
+ // running indecies (start and end)
+ int si=0;
+ int ei=0;
+
+ // current window coordinate
+ double ws=pos[0];
+
+ // current window tag counts
+ int cc[2]={0,0};
+
+
+ if(nt>0) {
+ cc[flag[si]]++;
+ // increment window end
+ while(ei<(nt-1) && (pos[ei+1]-ws) <= wsize) {
+ ei++;
+ cc[flag[ei]]++;
+ }
+
+
+ // cluster start,end positions
+ double cs,ce;
+ int inclust=0;
+
+ while(si<nt-1) {
+
+ if((pos[si+1]-ws) > (pos[ei+1] - ws - wsize) && ei!=(nt-1)) {
+ // move end boudnary
+ ei++;
+ ws=pos[ei]-wsize;
+ cc[flag[ei]]++;
+ while(ei<(nt-1) && pos[ei+1]==ws+wsize) {
+ ei++;
+ cc[flag[ei]]++;
+ }
+
+ // increment window start
+ while(si<(nt-1) && pos[si] < ws) {
+ cc[flag[si]]--;
+ si++;
+ }
+
+ } else {
+ // move up start boundary
+ ws=pos[si+1];
+ cc[flag[si]]--;
+ si++;
+ while(si<(nt-1) && pos[si+1]==ws) {
+ cc[flag[si]]--;
+ si++;
+ }
+
+ // increment window end
+ while(ei<(nt-1) && (pos[ei+1] - ws) <= wsize) {
+ ei++;
+ cc[flag[ei]]++;
+ }
+
+ }
+
+ // calculate z score
+ double dc0=((double)cc[0])+0.5;
+ double dc1=((double)cc[1])+0.5;
+ double rte=dc0+dc1-0.25*thr*thr;
+ double lb;
+ if(rte<=0) {
+ lb=0;
+ } else {
+ lb=(sqrt(dc1*dc0) - 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr);
+ if(lb<0) { lb=0; }
+ lb*=lb;
+ }
+
+ //Rprintf("%f=f(%f,%f,%f); %f=f(%f,%f,%f)\n",lb,1.0-thr,2.0*dc1,2.0*dc0,ub,thr,2.0*dc1,2.0*dc0);
+
+#ifdef DEBUG
+ //double ub=gsl_cdf_fdist_Qinv(thr,2.0*dc1,2.0*dc0)*dc1/dc0;
+ double ub=(sqrt(dc1*dc0) + 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr);
+ ub*=ub;
+ Rprintf("s=%d (%f); e=%d (%f); window: %f-%f; cc=[%d,%d]; lb=%f; ub=%f\n",si,pos[si],ei,pos[ei],ws,ws+wsize,cc[0],cc[1],lb,ub);
+#endif
+
+ int bc=lb>=bgm && cc[1]>=mintag;
+ if(either) {
+ bc=lb>=bgm || cc[1]>=mintag;
+ }
+ if(bc) {
+ if(inclust) {
+ double nce=ws+wsize/2.0;
+ if(nce-ce > wsize/2.0) {
+ // next point is too far removed, end cluster
+ if(ce-cs >= mcs) {
+ contigs.push_back(pair<double,double>(cs,ce));
+#ifdef DEBUG
+ Rprintf("recorded cluster %f-%f\n",cs,ce);
+#endif
+ }
+ inclust=0;
+ } else {
+ ce=nce;
+ }
+ } else {
+ inclust=1;
+ cs=ws+wsize/2.0;
+ ce=cs;
+ }
+ } else {
+ if(inclust) {
+ if(ce-cs >= mcs) {
+ contigs.push_back(pair<double,double>(cs,ce));
+#ifdef DEBUG
+ Rprintf("recorded cluster %f-%f\n",cs,ce);
+#endif
+ }
+ inclust=0;
+ }
+ }
+
+ }
+
+ if(inclust) {
+ if(ce-cs >= mcs) {
+ contigs.push_back(pair<double,double>(cs,ce));
+#ifdef DEBUG
+ Rprintf("recorded cluster %f-%f\n",cs,ce);
+#endif
+ }
+ inclust=0;
+ }
+ }
+
+ SEXP cs_R,ce_R;
+ PROTECT(cs_R=allocVector(REALSXP,contigs.size()));
+ PROTECT(ce_R=allocVector(REALSXP,contigs.size()));
+ double* csa=REAL(cs_R);
+ double* cea=REAL(ce_R);
+
+ int i=0;
+ for(vector< pair<double,double> >::const_iterator ci=contigs.begin(); ci!=contigs.end();++ci) {
+ csa[i]=ci->first;
+ cea[i]=ci->second;
+ i++;
+ }
+
+ SEXP ans_R, names_R;
+ PROTECT(names_R = allocVector(STRSXP, 2));
+ SET_STRING_ELT(names_R, 0, mkChar("s"));
+ SET_STRING_ELT(names_R, 1, mkChar("e"));
+
+ PROTECT(ans_R = allocVector(VECSXP, 2));
+ SET_VECTOR_ELT(ans_R, 0, cs_R);
+ SET_VECTOR_ELT(ans_R, 1, ce_R);
+ setAttrib(ans_R, R_NamesSymbol, names_R);
+
+ UNPROTECT(4);
+ return(ans_R);
+
+ }
+
+
+ // finds intersection between a list of regions
+ // the flag has +n/-n value, corresponding to the start/end of a segment in n-th regionset
+ // max_val: 1 - report max overlapping value, -1: report min, 0 - don't look at values
+ // returns: $s, $e, ($v) lists
+ SEXP region_intersection(SEXP n_R,SEXP pos_R,SEXP flags_R,SEXP vals_R,SEXP max_val_R,SEXP union_R) {
+ const int max_val=*INTEGER(max_val_R);
+ const int unionr=*INTEGER(union_R);
+ const int n=*INTEGER(n_R);
+ double* pos=REAL(pos_R);
+ int* flags=INTEGER(flags_R);
+ double* val=REAL(vals_R);
+
+#ifdef DEBUG
+ Rprintf("n=%d; npos=%d; max_val=%d\n",n,LENGTH(pos_R),max_val);
+#endif
+
+ int s[n]; // flag status for each set
+ double mv[n]; // max/min value of current clusters
+
+ for(int i=0;i<n;i++) { s[i]=0; }
+
+ vector<double> starts;
+ vector<double> ends;
+ vector<double> values;
+
+ int start=-1;
+ double mval=0;
+ for(int i=0;i<LENGTH(pos_R);i++) {
+ // update flags
+ int f=flags[i];
+ if(f>0) {
+ s[std::abs(f)-1]++;
+ } else {
+ s[std::abs(f)-1]--;
+ }
+
+ if(max_val!=0 && val[i]*max_val > mval*max_val) { mval=val[i]; }
+
+ // joined status
+ int all;
+ if(unionr) {
+ all=0;
+ for(int j=0;j<n;j++) { if(s[j]>0) { all=1; break;} }
+ } else {
+ all=1;
+ for(int j=0;j<n;j++) { all=all & (s[j]>0); }
+ }
+
+
+ //Rprintf("i=%d; s=[",i);
+ //for(int j=0;j<n;j++) { Rprintf("%d",s[j]); }
+ //Rprintf("]; all=%d; start=%d\n",all,start);
+
+ if(start>=0) {
+ // in fragment
+ if(!all) {
+ // end fragment
+ starts.push_back(pos[start]);
+ ends.push_back(pos[i]);
+ start=-1;
+ if(max_val!=0) { values.push_back(mval); }
+
+#ifdef DEBUG
+ Rprintf("recorded new fragment (s=%f,e=%f,v=%f);\n",pos[start],pos[i],mval);
+#endif
+ }
+ } else {
+ // should a fragment be started?
+ if(all) {
+ start=i;
+ if(max_val!=0) { mval=val[i]; }
+#ifdef DEBUG
+ Rprintf("starting new fragment (s=%f,i=%d);\n",pos[start],i);
+#endif
+ }
+ }
+ }
+ SEXP cs_R,ce_R,cv_R;
+ PROTECT(cs_R=allocVector(REALSXP,starts.size()));
+ PROTECT(ce_R=allocVector(REALSXP,ends.size()));
+
+ double* csa=REAL(cs_R);
+ int i=0;
+ for(vector<double>::const_iterator ci=starts.begin(); ci!=starts.end(); ++ci) {
+ csa[i]=*ci; i++;
+ }
+
+ csa=REAL(ce_R);
+ i=0;
+ for(vector<double>::const_iterator ci=ends.begin(); ci!=ends.end(); ++ci) {
+ csa[i]=*ci; i++;
+ }
+
+ if(max_val!=0) {
+ PROTECT(cv_R=allocVector(REALSXP,values.size()));
+ csa=REAL(cv_R);
+ i=0;
+ for(vector<double>::const_iterator ci=values.begin(); ci!=values.end(); ++ci) {
+ csa[i]=*ci; i++;
+ }
+ }
+
+ SEXP ans_R, names_R;
+ if(max_val!=0) {
+ PROTECT(names_R = allocVector(STRSXP, 3));
+ SET_STRING_ELT(names_R, 0, mkChar("s"));
+ SET_STRING_ELT(names_R, 1, mkChar("e"));
+ SET_STRING_ELT(names_R, 2, mkChar("v"));
+
+ PROTECT(ans_R = allocVector(VECSXP, 3));
+ SET_VECTOR_ELT(ans_R, 0, cs_R);
+ SET_VECTOR_ELT(ans_R, 1, ce_R);
+ SET_VECTOR_ELT(ans_R, 2, cv_R);
+ } else {
+ PROTECT(names_R = allocVector(STRSXP, 2));
+ SET_STRING_ELT(names_R, 0, mkChar("s"));
+ SET_STRING_ELT(names_R, 1, mkChar("e"));
+
+ PROTECT(ans_R = allocVector(VECSXP, 2));
+ SET_VECTOR_ELT(ans_R, 0, cs_R);
+ SET_VECTOR_ELT(ans_R, 1, ce_R);
+ }
+
+ setAttrib(ans_R, R_NamesSymbol, names_R);
+
+ if(max_val!=0) {
+ UNPROTECT(5);
+ } else {
+ UNPROTECT(4);
+ }
+ return(ans_R);
+ }
+
+}
+
diff --git a/src/wdl.cpp b/src/wdl.cpp
new file mode 100644
index 0000000..18a2258
--- /dev/null
+++ b/src/wdl.cpp
@@ -0,0 +1,659 @@
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <string>
+#include <set>
+#include <cmath>
+#include <cstdlib>
+
+extern "C" {
+//#include "R.h"
+//#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx;
+
+//#define DEBUG 1
+
+extern "C" {
+
+ /************************************************************************/
+ /*
+ * lwcc - calculate local window cross-correlation
+ */
+
+ SEXP lwcc(SEXP x_R, // positive strand hist
+ SEXP y_R, // negative strand hist of the same length
+ SEXP osize_R, // outer boundary distance
+ SEXP isize_R, // inner boundary distance
+ SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned
+ SEXP min_peak_dist_R, // distance between closest peaks
+ SEXP min_peak_val_R, // min peak threshold
+ SEXP tag_weight_R, // tag weight
+ SEXP bg_subtract_R, // a flag whether do background subtractio
+ SEXP bgp_R, // optional background hist for positive strand
+ SEXP bgn_R, // optional background hist for negative strand
+ SEXP bg_wsize_R, // window size for the background counts
+ SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference (including is cutout)
+ SEXP round_up_R // whether to round up fractional signal tag counts
+ )
+ {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+
+ int is=INTEGER(isize_R)[0];
+ int os=INTEGER(osize_R)[0];
+ double rs=((double)(2*os+1));
+ int* x=INTEGER(x_R);
+ int* y=INTEGER(y_R);
+ int n_x=LENGTH(x_R);
+
+ // background-related
+ int* bgp=INTEGER(bgp_R);
+ int* bgn=INTEGER(bgn_R);
+ int bg_whs=INTEGER(bg_wsize_R)[0];
+
+ int return_peaks=*(INTEGER(return_peaks_R));
+ double min_peak_val=*(REAL(min_peak_val_R));
+ int min_peak_dist=*(INTEGER(min_peak_dist_R));
+ double tag_weight=*(REAL(tag_weight_R));
+
+ const int round_up=*(INTEGER(round_up_R));
+ const int bg_subtract=*(INTEGER(bg_subtract_R));
+ const double bg_weight=*(REAL(bg_weight_R));
+
+ int i; // point at which the value is being calculated
+ int start=os;
+ int end=n_x-os-1;
+
+ // bg tag counts within bg window
+ int bg_pn1=0;
+ int bg_nn1=0;
+ int bg_pn2=0;
+ int bg_nn2=0;
+
+
+
+ // illustration for counting:
+ //
+ // 012345678901234567890123456789012
+ // ==========------|------==========
+ //
+ // osize=16; isize=6;
+
+
+ SEXP nv;
+ double *d_nv;
+ vector<int> ppos;
+ vector<double> pval;
+ if(!return_peaks) {
+ PROTECT(nv=allocVector(REALSXP,n_x));
+ d_nv=REAL(nv);
+ for(int i=0;i<n_x;i++) {
+ d_nv[i]=0;
+ }
+ }
+
+#ifdef DEBUG
+ Rprintf("start=%d end=%d tag_weight=%f\n", start,end,tag_weight);
+ Rprintf("x[1]=%d x[2]=%d y[1]=%d y[2]=%d\n",x[1],x[2],y[1],y[2]);
+#endif
+
+ int lpp=-1; // last peak position
+ double lpv=-1e3; // last peak value
+
+ double ppv=-1e3; // last value
+ double pppv=-11e-3; // value before last
+
+ int pn1,pn2,nn1,nn2;
+
+
+ if(bg_subtract) {
+ // pre-initialize background tag counts,
+ for(int i=0;i<bg_whs;i++) {
+ if(i<n_x) {
+ bg_pn2+=bgp[i];
+ bg_nn2+=bgn[i];
+ }
+ }
+ }
+
+
+ for(i=0;i<end;i++) {
+#ifdef DEBUG
+ //Rprintf("i=%d ", i);
+#endif
+
+ if(bg_subtract) {
+ // update background counts
+ int nl=i-bg_whs-1;
+
+ if(nl>=0) {
+ bg_pn1-=bgp[nl];
+ bg_nn1-=bgn[nl];
+ }
+ bg_pn1+=bgp[i];
+ bg_nn1+=bgn[i];
+
+ if(i>0) {
+ bg_pn2-=bgp[i-1];
+ bg_nn2-=bgn[i-1];
+ }
+ int nr=i+bg_whs;
+ if(nr<n_x) {
+ bg_pn2+=bgp[nr];
+ bg_nn2+=bgn[nr];
+ }
+ }
+
+ if(i >= start) {
+ // update counts, taking into account masked out regions
+ pn1=pn2=nn1=nn2=0;
+
+ for(int k=0;k<=(os-is);k++) {
+ int xp1=x[i-os+k];
+ int xp2=x[i+os-k];
+ int xn1=y[i+os-k];
+ int xn2=y[i-os+k];
+
+ if(xp1!=-1 && xn1!=-1) {
+ pn1+=xp1;
+ nn1+=xn1;
+ }
+ if(xp2!=-1 && xn2!=-1) {
+ pn2+=xp2;
+ nn2+=xn2;
+ }
+ }
+
+ // calculate the means
+ double mp=((double)(pn1+pn2))/rs;
+ double mn=((double)(nn1+nn2))/rs;
+#ifdef DEBUG
+ Rprintf("mp=%f mn=%f\n",mp,mn);
+#endif
+ // calculate correlation
+ double varp=0;
+ double varn=0;
+ double num=0;
+ double val=-1e3;
+ if(mp>0 & mn>0) {
+ for(int k=0;k<=(os-is);k++) {
+ int xp1=x[i-os+k];
+ int xp2=x[i+os-k];
+ int xn1=y[i+os-k];
+ int xn2=y[i-os+k];
+
+
+ if(xp1!=-1 && xn1!=-1) {
+ double nnp1=((double) xp1)-mp;
+ double nnn1=((double) xn1)-mn;
+ num+=nnp1*nnn1;
+ varp+=nnp1*nnp1;
+ varn+=nnn1*nnn1;
+ }
+
+ if(xp2!=-1 && xn2!=-1) {
+ double nnp2=((double) xp2)-mp;
+ double nnn2=((double) xn2)-mn;
+ num+=nnp2*nnn2;
+ varp+=nnp2*nnp2;
+ varn+=nnn2*nnn2;
+ }
+
+ }
+ double tagw;
+ double spn1=((double)pn1)*tag_weight;
+ double snn1=((double)nn1)*tag_weight;
+ double spn2=((double)pn2)*tag_weight;
+ double snn2=((double)nn2)*tag_weight;
+ if(round_up) {
+ if(pn1>0 && spn1<1) { spn1=1.0; }
+ //if(pn2>0 && spn2<1) { spn2=1.0; }
+ if(nn1>0 && snn1<1) { snn1=1.0; }
+ //if(nn2>0 && snn2<1) { snn2=1.0; }
+ }
+
+ if(bg_subtract) {
+ spn1-=((double)bg_pn1)*bg_weight;
+ snn1-=((double)bg_nn2)*bg_weight;
+ spn2-=((double)bg_pn2)*bg_weight;
+ snn2-=((double)bg_nn1)*bg_weight;
+
+ if(spn2<0) spn2=0;
+ if(snn2<0) snn2=0;
+
+ if(spn1>0 && snn1>0) {
+ tagw=(2.0*sqrt(spn1*snn1)-(spn2+snn2+1.0));
+ } else {
+ tagw=-(spn2+snn2+1.0);
+ }
+ //cout<<"bg_pn1="<<bg_pn1<<"; bg_pn2="<<bg_pn2<<"; bg_nn1="<<bg_nn1<<"; bg_nn2="<<bg_nn2<<endl;
+ } else {
+ tagw=2.0*sqrt(spn1*snn1)-(spn2+snn2);
+ }
+
+ if(tagw<0) {
+ val=0.0;
+ } else {
+ if(num==0.0) {
+ val=0;
+ } else {
+ val=num/(sqrt(varp*varn));
+ }
+ val=val*sqrt(tagw) + tagw;
+
+ }
+ //cout<<"val="<<val<<endl;
+
+#ifdef DEBUG
+ Rprintf("pn1=%d pn2=%d nn1=%d nn2=%d tag.weight=%f tagw=%f\n",pn1,pn2,nn1,nn2,tag_weight,tagw);
+ Rprintf("tagw=%f varp=%f varn=%f num=%f cor=%f val=%f\n",tagw,varp,varn,num,num/sqrt(varp*varn),val);
+#endif
+ }
+
+
+
+ if(return_peaks) {
+ // determine if previous position was a peak
+ if(ppv>min_peak_val && ppv>val && ppv>pppv) {
+ if(lpp>0 && (i-lpp+1)>min_peak_dist) {
+ // record previous peak position
+ ppos.push_back(lpp);
+ pval.push_back(lpv);
+#ifdef DEBUG
+ Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp));
+#endif
+ lpp=i-1; lpv=ppv;
+#ifdef DEBUG
+ Rprintf("updated peak to x=%d y=%f\n",lpp,lpv);
+#endif
+ } else {
+ if(ppv>lpv) {
+ // update last peak positions
+#ifdef DEBUG
+ Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv);
+#endif
+ lpp=i-1; lpv=ppv;
+ }
+ }
+ }
+
+ // update previous values
+ if(val!=ppv) {
+ pppv=ppv; ppv=val;
+ }
+ } else {
+ d_nv[i]=val;
+ }
+ }
+ }
+
+ if(return_peaks) {
+ // record last position
+ if(lpp>0) {
+#ifdef DEBUG
+ Rprintf("recording last peak x=%d y=%f\n",lpp,lpv);
+#endif
+ ppos.push_back(lpp);
+ pval.push_back(lpv);
+ }
+
+ SEXP rpp_R,rpv_R;
+ PROTECT(rpp_R=allocVector(INTSXP,ppos.size()));
+ PROTECT(rpv_R=allocVector(REALSXP,ppos.size()));
+ int* rpp=INTEGER(rpp_R);
+ double* rpv=REAL(rpv_R);
+
+ for(int i=0;i<ppos.size();i++) {
+ rpp[i]=ppos[i];
+ rpv[i]=pval[i];
+ }
+
+ SEXP ans_R, names_R;
+ PROTECT(names_R = allocVector(STRSXP, 2));
+ SET_STRING_ELT(names_R, 0, mkChar("x"));
+ SET_STRING_ELT(names_R, 1, mkChar("v"));
+
+ PROTECT(ans_R = allocVector(VECSXP, 2));
+ SET_VECTOR_ELT(ans_R, 0, rpp_R);
+ SET_VECTOR_ELT(ans_R, 1, rpv_R);
+ setAttrib(ans_R, R_NamesSymbol, names_R);
+
+ UNPROTECT(4);
+ return(ans_R);
+ } else {
+ UNPROTECT(1);
+ return(nv);
+ }
+
+ }
+
+
+
+ /************************************************************************/
+ /*
+ * wtd - window tag difference implementation
+ */
+
+ SEXP wtd(SEXP x_R, // positive strand hist
+ SEXP y_R, // negative strand hist of the same length
+ SEXP wsize_R, // outer boundary distance
+ SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned
+ SEXP min_peak_dist_R, // distance between closest peaks
+ SEXP min_peak_val_R, // min peak threshold
+ SEXP direct_count_R, // whether tag weighting should not be done
+ SEXP tag_weight_R, // tag weight
+ SEXP ignore_masking_R, // whether to ignore masked regions
+ SEXP bg_subtract_R, // a flag whether do background subtractio
+ SEXP bgp_R, // optional background hist for positive strand
+ SEXP bgn_R, // optional background hist for negative strand
+ SEXP bg_wsize_R, // window size for the background counts
+ SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference
+ SEXP round_up_R // whether to round up fractional signal tag counts
+ )
+ {
+
+#ifdef DEBUG
+ Rprintf("start\n");
+#endif
+
+ int whs=INTEGER(wsize_R)[0];
+ int* x=INTEGER(x_R);
+ int* y=INTEGER(y_R);
+ int n_x=LENGTH(x_R);
+
+ // background-related
+ int* bgp=INTEGER(bgp_R);
+ int* bgn=INTEGER(bgn_R);
+ int bg_whs=INTEGER(bg_wsize_R)[0];
+
+
+ const int return_peaks=*(INTEGER(return_peaks_R));
+ const int direct_count=*(INTEGER(direct_count_R));
+ const int ignore_masking=*(INTEGER(ignore_masking_R));
+ const double min_peak_val=*(REAL(min_peak_val_R));
+ const int min_peak_dist=*(INTEGER(min_peak_dist_R));
+ const double tag_weight=*(REAL(tag_weight_R));
+
+ const int round_up=*(INTEGER(round_up_R));
+ const int bg_subtract=*(INTEGER(bg_subtract_R));
+ const double bg_weight=*(REAL(bg_weight_R));
+
+ int i; // point at which the value is being calculated
+ int start=whs+1;
+ int end=n_x-whs-1;
+
+ // tag counts to calculate the means
+ int pn1=0;
+ int pn2=0;
+ int nn1=0;
+ int nn2=0;
+
+ // bg tag counts within bg window
+ int bg_pn1=0;
+ int bg_pn2=0;
+ int bg_nn1=0;
+ int bg_nn2=0;
+
+ SEXP nv;
+ double *d_nv;
+ vector<int> ppos;
+ vector<double> pval;
+ if(!return_peaks) {
+ PROTECT(nv=allocVector(REALSXP,n_x));
+ d_nv=REAL(nv);
+ for(int i=0;i<n_x;i++) {
+ d_nv[i]=0;
+ }
+ }
+
+#ifdef DEBUG
+ Rprintf("whs=%d start=%d end=%d tag_weight=%f ignore_masing=%d\n", whs, start,end,tag_weight,ignore_masking);
+ Rprintf("x[1]=%d x[2]=%d y[1]=%d y[2]=%d\n",x[1],x[2],y[1],y[2]);
+#endif
+
+ int lpp=-1; // last peak position
+ double lpv=-1000; // last peak value
+
+ double ppv=-1000; // last value
+ int ppl=-1; // position of the last value
+ double pppv=-1000; // value before last
+
+
+ if(ignore_masking==1) {
+ for(int i=0;i<whs;i++) {
+ pn1+=x[i];
+ pn2+=x[i+whs+1];
+ nn1+=y[i];
+ nn2+=y[i+whs+1];
+
+ }
+ }
+
+ if(bg_subtract) {
+ // pre-initialize background tag counts,
+ for(int i=0;i<bg_whs;i++) {
+ if(i<n_x) {
+ bg_pn2+=bgp[i];
+ bg_nn2+=bgn[i];
+ }
+ }
+ // increment center of background count window to the start position
+ for(int i=0;i<start;i++) {
+ // update background counts
+ int nl=i-bg_whs-1;
+
+ if(nl>=0) {
+ bg_pn1-=bgp[nl];
+ bg_nn1-=bgn[nl];
+ }
+ bg_pn1+=bgp[i];
+ bg_nn1+=bgn[i];
+
+ if(i>0) {
+ bg_pn2-=bgp[i-1];
+ bg_nn2-=bgn[i-1];
+ }
+ int nr=i+bg_whs;
+ if(nr<n_x) {
+ bg_pn2+=bgp[nr];
+ bg_nn2+=bgn[nr];
+ }
+ }
+
+ }
+
+
+#ifdef DEBUG
+ Rprintf("initialization: i=%d pn1=%d, pn2=%d, nn1=%d, nn2=%d", i,pn1,pn2,nn1,nn2);
+#endif
+
+ for(i=start;i<end;i++) {
+ if(bg_subtract) {
+ // update background counts
+ int nl=i-bg_whs-1;
+
+ if(nl>=0) {
+ bg_pn1-=bgp[nl];
+ bg_nn1-=bgn[nl];
+ }
+ bg_pn1+=bgp[i];
+ bg_nn1+=bgn[i];
+
+ if(i>0) {
+ bg_pn2-=bgp[i-1];
+ bg_nn2-=bgn[i-1];
+ }
+ int nr=i+bg_whs;
+ if(nr<n_x) {
+ bg_pn2+=bgp[nr];
+ bg_nn2+=bgn[nr];
+ }
+ }
+
+ // update counts
+ if(ignore_masking==1) {
+ pn1+=x[i-1]-x[i-whs-1];
+ pn2+=x[i+whs]-x[i-1];
+ nn1+=y[i-1]-y[i-whs-1];
+ nn2+=y[i+whs]-y[i-1];
+
+ } else {
+
+ pn1=pn2=nn1=nn2=0;
+
+ for(int k=0;k<whs;k++) {
+ int xp1=x[i-k-1];
+ int xp2=x[i+k];
+ int xn1=y[i-k-1];
+ int xn2=y[i+k];
+
+ // omit masked positions
+ if(xp1!=-1 && xn1!=-1 && xp2!=-1 && xn2!=-1) {
+ pn1+=xp1;
+ nn1+=xn1;
+ pn2+=xp2;
+ nn2+=xn2;
+ }
+ }
+ }
+
+ double val;
+ double spn1=((double)pn1)*tag_weight;
+ double snn1=((double)nn1)*tag_weight;
+ double spn2=((double)pn2)*tag_weight;
+ double snn2=((double)nn2)*tag_weight;
+ if(round_up) {
+ if(pn1>0 && spn1<1) { spn1=1.0; }
+ //if(pn2>0 && spn2<1) { spn2=1.0; }
+ //if(nn1>0 && snn1<1) { snn1=1.0; }
+ if(nn2>0 && snn2<1) { snn2=1.0; }
+ }
+
+ if(direct_count) {
+ val=spn1+snn2;
+ if(round_up && val<1) {
+ val=1.0;
+ }
+ if(bg_subtract) {
+ val-=((double) (bg_pn1+bg_nn2))*bg_weight;
+ }
+ } else {
+ if(bg_subtract) {
+ spn1-=((double)bg_pn1)*bg_weight;
+ snn1-=((double)bg_nn1)*bg_weight;
+ spn2-=((double)bg_pn2)*bg_weight;
+ snn2-=((double)bg_nn2)*bg_weight;
+
+ if(spn2<0) spn2=0;
+ if(snn1<0) snn1=0;
+
+ if(spn1>0 && snn2>0) {
+ val=(2.0*sqrt(spn1*snn2)-(spn2+snn1+1.0));
+ } else {
+ val=-(spn2+snn1+1.0);
+ }
+ } else {
+ val=2.0*sqrt(spn1*snn2)-(spn2+snn1+tag_weight);
+ }
+ }
+ //double val=sqrt(pn1*nn2);
+ //if(pn2>nn1) { val-=pn2; } else { val-=pn1; }
+#ifdef DEBUG
+ Rprintf("update: i=%d pn1=%d pn2=%d nn1=%d nn2=%d val=%f\n",i,pn1,pn2,nn1,nn2,val);
+#endif
+
+ if(return_peaks) {
+ // determine if previous position was a peak
+ if(ppv>min_peak_val && ppv>val && ppv>pppv) {
+ if(lpp>0 && (i-lpp+1)>min_peak_dist) {
+ // record previous peak position
+ ppos.push_back(lpp);
+ pval.push_back(lpv);
+#ifdef DEBUG
+ Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp));
+#endif
+ if(ppl!=-1 && ppl!=i-1) {
+ lpp=(int) round((ppl+i-1)/2);
+ } else {
+ lpp=i-1;
+ }
+ lpv=ppv;
+#ifdef DEBUG
+ Rprintf("updated peak to x=%d y=%f\n",lpp,lpv);
+#endif
+ } else {
+ if(ppv>lpv) {
+ // update last peak positions
+#ifdef DEBUG
+ Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv);
+#endif
+ if(ppl!=-1 && ppl!=i-1) {
+ lpp=(int) round((ppl+i-1)/2);
+ } else {
+ lpp=i-1;
+ }
+ lpv=ppv;
+ }
+ }
+ }
+
+ // update previous values
+ if(val!=ppv) {
+ pppv=ppv; ppv=val; ppl=i;
+ }
+ } else {
+ d_nv[i]=val;
+ }
+ }
+
+ if(return_peaks) {
+ // record last position
+ if(lpp>0) {
+#ifdef DEBUG
+ Rprintf("recording last peak x=%d y=%f\n",lpp,lpv);
+#endif
+ ppos.push_back(lpp);
+ pval.push_back(lpv);
+ }
+
+ SEXP rpp_R,rpv_R;
+ PROTECT(rpp_R=allocVector(INTSXP,ppos.size()));
+ PROTECT(rpv_R=allocVector(REALSXP,ppos.size()));
+ int* rpp=INTEGER(rpp_R);
+ double* rpv=REAL(rpv_R);
+
+ for(int i=0;i<ppos.size();i++) {
+ rpp[i]=ppos[i];
+ rpv[i]=pval[i];
+ }
+
+ SEXP ans_R, names_R;
+ PROTECT(names_R = allocVector(STRSXP, 2));
+ SET_STRING_ELT(names_R, 0, mkChar("x"));
+ SET_STRING_ELT(names_R, 1, mkChar("v"));
+
+ PROTECT(ans_R = allocVector(VECSXP, 2));
+ SET_VECTOR_ELT(ans_R, 0, rpp_R);
+ SET_VECTOR_ELT(ans_R, 1, rpv_R);
+ setAttrib(ans_R, R_NamesSymbol, names_R);
+
+ UNPROTECT(4);
+ return(ans_R);
+ } else {
+ UNPROTECT(1);
+ return(nv);
+ }
+
+ }
+
+
+}
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-hms-dbmi-spp.git
More information about the debian-med-commit
mailing list