[med-svn] [r-other-apmswapp] 10/12: New upstream version 1.0
Andreas Tille
tille at debian.org
Fri Nov 10 12:26:39 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-other-apmswapp.
commit bd013a90c0c59a8b5a6a760470efbf6bf94abe06
Author: Andreas Tille <tille at debian.org>
Date: Fri Nov 10 13:23:07 2017 +0100
New upstream version 1.0
---
DESCRIPTION | 18 ++
NAMESPACE | 1 +
R/Inttab_Norm.R | 102 +++++++++++
R/NonspecFilter.r | 75 ++++++++
R/Saintout_process.r | 48 +++++
R/TSPM_apms.r | 111 ++++++++++++
R/mat_Int_change.r | 39 ++++
R/myTSPM.r | 121 +++++++++++++
R/myWFalg_Dudoit.r | 48 +++++
R/saint_permF.r | 235 ++++++++++++++++++++++++
debian/changelog | 7 -
debian/compat | 1 -
debian/control | 36 ----
debian/copyright | 11 --
debian/patches/series | 1 -
debian/patches/uses_deseq2.patch | 111 ------------
debian/rules | 5 -
debian/source/format | 1 -
debian/upstream/metadata | 11 --
debian/watch | 3 -
inst/extdata/baittab.txt | 7 +
inst/extdata/inttable.txt | 378 +++++++++++++++++++++++++++++++++++++++
inst/extdata/prottable.txt | 63 +++++++
man/apmsWAPP-internal.Rd | 22 +++
man/apmsWAPP-package.Rd | 41 +++++
man/int_mat.Rd | 41 +++++
man/norm.inttable.Rd | 68 +++++++
man/saint_permF.Rd | 117 ++++++++++++
man/tspm_apms.Rd | 95 ++++++++++
man/varFilter.Rd | 64 +++++++
30 files changed, 1694 insertions(+), 187 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..e47c8d4
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: apmsWAPP
+Type: Package
+Title: Pre- and Postprocessing for AP-MS data analysis based on
+ spectral counts
+Version: 1.0
+Date: 2013-03-14
+Author: Martina Fischer
+Maintainer: Martina Fischer <fischerm at rki.de>
+Description: apmsWAPP provides a complete workflow for the analysis of AP-MS data (replicate single-bait purifications including negative controls) based on spectral counts.
+ It comprises pre-processing, scoring and postprocessing of protein interactions.
+ A final list of interaction candidates is reported: it provides a ranking of the candidates according
+ to their p-values which allow estimating the number of false-positive interactions.
+Depends: genefilter, seqinr, multtest, gtools, limma, edgeR, DESeq,
+ aroma.light
+License: Review License
+LazyLoad: yes
+SystemRequirements: SAINT_v2.3.4
+Packaged: 2013-04-09 15:13:20 UTC; martina
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..40ea8e2
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/Inttab_Norm.R b/R/Inttab_Norm.R
new file mode 100644
index 0000000..16917d9
--- /dev/null
+++ b/R/Inttab_Norm.R
@@ -0,0 +1,102 @@
+### Function for Normalization of Counts in the interactiontable:
+# Input: - original interaction matrix (interaction table including zeros, raw discrete counts!)
+# - baittable = classification of samples and controls
+# - different normalization methods can be chosen in "norm":
+# "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"
+# Output: normed interaction-matrix (normed count table), scaling factors
+
+norm.inttable <- function( inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile")) {
+
+require(limma)
+require(edgeR)
+require(DESeq)
+require(aroma.light)
+norm <- match.arg(norm)
+
+baittab$V3 <- as.character(baittab$V3)
+baittab$V1 <- as.character(baittab$V1)
+
+Cpos <- match(baittab$V1[grep("C",baittab$V3)], colnames(inttab.mat)) # columns in inttab.mat for "C"controls
+Tpos <- match(baittab$V1[grep("T",baittab$V3)], colnames(inttab.mat)) # columns in inttab.mat for "T"samples
+
+
+ ########## Normalization ############
+
+# "sumtotal" :
+if(norm=="sumtotal") {
+sumcount <- apply(inttab.mat, 2, function(x){sum(x)}) # sum total in samples
+scal.fac <- sumcount
+scal.fac[Cpos] <- scal.fac[Cpos] / median(sumcount[Cpos])
+scal.fac[Tpos] <- scal.fac[Tpos] / median(sumcount[Tpos])
+inttab.norm <- apply(inttab.mat, 2, function(x){x/sum(x)} )
+
+inttab.norm[ ,Cpos] <- inttab.norm[ ,Cpos] * median(sumcount[Cpos]) # within Ctrl replicates
+inttab.norm[ ,Tpos] <- inttab.norm[ ,Tpos] * median(sumcount[Tpos]) # within Bait replicates
+}
+
+
+# "upper-quartile" (proposed in Bullard et al. 2010):
+if(norm=="upperquartile") {
+upper.quartiles <- apply(inttab.mat, 2, function(x){quantile(x, probs=0.75)})
+zerocount <- which(upper.quartiles==0)
+scal.fac <- apply(inttab.mat, 2, function(x){qu<-quantile(x,probs=0.75); if(qu!=0)qu else 1})
+scal.fac[setdiff(Cpos,zerocount)] <- scal.fac[setdiff(Cpos,zerocount)] / median(scal.fac[Cpos])
+scal.fac[setdiff(Tpos,zerocount)] <- scal.fac[setdiff(Tpos,zerocount)] / median(scal.fac[Tpos]) # scaling factors
+
+inttab.norm <- apply(inttab.mat, 2, function(x){qu<-quantile(x,probs=0.75); if(qu!=0)x/qu else x}) # normed by 75% quantile of the samples
+scaling <- ifelse(upper.quartiles==0,1,upper.quartiles)
+inttab.norm[,setdiff(Cpos,zerocount)] <- inttab.norm[,setdiff(Cpos,zerocount)] * median(scaling[Cpos]) # transfer back to the count scales
+inttab.norm[,setdiff(Tpos,zerocount)] <- inttab.norm[,setdiff(Tpos,zerocount)] * median(scaling[Tpos]) # normalized count matrix
+}
+
+
+# "DESeq" - normalization method in the DESeq package (Anders et al. 2010):
+if(norm=="DESeq") {
+cds <- newCountDataSet( inttab.mat, baittab$V3) # build countDataSet
+
+cds1 <- estimateSizeFactors( cds[ ,Cpos] )
+cds2 <- estimateSizeFactors( cds[ ,Tpos] )
+sizeFactors( cds ) [Cpos] <- sizeFactors( cds1 )
+sizeFactors( cds ) [Tpos] <- sizeFactors( cds2 )
+scal.fac <- sizeFactors( cds ) # scaling factors
+
+inttab.norm <- scale(counts(cds), center=FALSE, scale=sizeFactors(cds))
+colnames(inttab.norm) <-colnames(counts(cds)) # normalized count matrix
+}
+
+
+# "TMM" - normalization method in the edgeR package (Robinson et al. 2010):
+if(norm=="TMM") {
+d1 <- DGEList(inttab.mat[, Cpos])
+d1 <- calcNormFactors(d1, method="TMM")
+eff.libsize1 <- d1$samples$lib.size * d1$samples$norm.factors
+
+d2 <- DGEList(inttab.mat[, Tpos])
+d2 <- calcNormFactors(d2, method="TMM")
+eff.libsize2 <- d2$samples$lib.size * d2$samples$norm.factors
+
+eff.libsize <- rep(NA, times=dim(inttab.mat)[2])
+eff.libsize[Cpos] <- eff.libsize1
+eff.libsize[Tpos] <- eff.libsize2
+scal.fac <- eff.libsize # scaling factors
+scal.fac[Cpos] <- scal.fac[Cpos] / median(eff.libsize1)
+scal.fac[Tpos] <- scal.fac[Tpos] / median(eff.libsize2)
+
+inttab.norm <- scale(inttab.mat, center=FALSE, scale=eff.libsize)
+inttab.norm[ ,Cpos] <- inttab.norm[ ,Cpos] * median(eff.libsize1)
+inttab.norm[ ,Tpos] <- inttab.norm[ ,Tpos] * median(eff.libsize2) # normalized count matrix
+}
+
+
+# "quantile" normalization:
+if(norm=="quantile") {
+inttab.norm <- inttab.mat
+inttab.norm[ ,Cpos] <- normalizeQuantileRank (inttab.mat[ ,Cpos], robust=TRUE) # quantile normalization within replicates
+inttab.norm[ ,Tpos] <- normalizeQuantileRank (inttab.mat[ ,Tpos], robust=TRUE)
+scal.fac <- NA # no scaling factors available for this method
+}
+
+
+return(list(inttab.norm, scal.fac)) # output
+
+} # function end
diff --git a/R/NonspecFilter.r b/R/NonspecFilter.r
new file mode 100644
index 0000000..2995bff
--- /dev/null
+++ b/R/NonspecFilter.r
@@ -0,0 +1,75 @@
+### Nonspecific Filtering via Variance
+# Input Matrix: cols=Samples, rows=Protein IDs
+# Filtering via: - IQR
+# - overall Variance
+# - noVar: no variance filter, only 1.filter is applied
+# Cutoff Selection via: - shorth (shortest intervall containing 50% of the data) , default: NA
+# - define quantile cutoff, e.g. 0.5 -> 50% data filtered of smallest Var or IQR
+# limit = number of expected candidates in the data
+
+#### Functions
+
+my.rowIQR <- function(mat) { #adapted for matrix from genefilter:::rowIQRs
+require(genefilter)
+rowQ(mat, ceiling(0.75 * ncol(mat))) - rowQ(mat, floor(0.25 * ncol(mat)))
+}
+
+varFilter <- function (mat, baittab, func= c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit=0)
+{
+ require(genefilter)
+ func <- match.arg(func)
+
+ Cpos <- match(baittab$V1[grep("C",baittab$V3)], colnames(mat) ) # columns in mat for "C"controls
+ Tpos <- match(baittab$V1[grep("T",baittab$V3)], colnames(mat) ) # columns in mat for "T"samples
+ ctrl.med <- apply(mat[,Cpos], 1, function(x){median(x)} ) # 1.filter: ctrl counts > bait counts
+ bait.med <- apply(mat[,Tpos], 1, function(x){median(x)} )
+ mat <- mat[-which(ctrl.med > bait.med), ]
+cat("Biological filter reduced the number of proteins to: ", dim(mat)[1], "\n")
+if (dim(mat)[1] < limit) {func <- "noVar"; cat("Argument limit is chosen too small, in order to hold it the variance filter cannot be applied\n") }
+
+ if (func == "IQR") { #2. filter: small Var over all samples
+ vars <- my.rowIQR(mat) }
+ else if (func == "overallVar") {
+ vars <- rowVars(mat) }
+ else if (func == "noVar") {
+ return(mat) }
+ else stop("Define filter function!\n")
+
+
+ if (is.na(var.cutoff)) { # calculate shorth for cutoff
+ var.cut <- shorth(vars, tie.action="min")
+ if(var.cut <= median(vars)) selected <- which(vars > var.cut)
+ else stop("shorth calculation is not appropriate in this case, define a quantile for cutoff calculation!\n")
+ }
+
+ else if (0 < var.cutoff && var.cutoff < 1) {
+ quant <- quantile(vars, probs = var.cutoff) # cutoff corresponding to defined quantile
+ selected <- which(vars > quant) }
+ else stop("Cutoff Quantile has to be between 0 and 1, alternative 'NA' for shortest interval calculation \n")
+
+ mat.f <- mat[selected,]
+
+ if (dim(mat.f)[1] > limit ) {
+ cat("Variance filter reduced the number of proteins to: ", dim(mat.f)[1], "\n")
+ return(mat.f) }
+ else {
+ cat("Chosen parameter setup filtered data below the threshold of the expected number of candidates \n")
+ if (is.na(var.cutoff)) var.cutoff=0.4
+ # filter parameter setup failed to hold the limit, cutoff adjustment
+ while( dim(mat.f)[1] < limit) {
+ if (var.cutoff<=0.05) {cat("Decrease limit, variance filter cannot be applied \n"); return(mat) }
+ else var.cutoff <- var.cutoff-0.05
+ quant <- quantile(vars, probs = var.cutoff)
+ selected <- which(vars > quant)
+ mat.f <- mat[selected,]
+ }
+
+ cat("Variance filter reduced the number of proteins to: ", dim(mat.f)[1], "\n")
+ return(mat.f)
+ }
+
+}
+
+
+
+
diff --git a/R/Saintout_process.r b/R/Saintout_process.r
new file mode 100644
index 0000000..917546d
--- /dev/null
+++ b/R/Saintout_process.r
@@ -0,0 +1,48 @@
+####### Saint output correction for Tab-misplacements: function "new.saintoutput"
+# Input: file-name of original saint interaction-output = unique-interaction table -> define path before!
+# Output: corrected saint-output of interactions
+
+
+ ### Correction SAINT output ###
+
+saintout <- function(test) {
+require(seqinr)
+test <- test[2:length(test)]
+test <- strsplit(test,split="\t")
+length(test)
+#table(unlist(lapply(test, function(x){length(x)} ) )) #check number of columns: expected 12
+
+lost12 <- which(is.na(unlist(lapply(test, function(x){x[12]} ) ) )) #column 12 missing
+
+for ( i in lost12) {
+a <- strsplit(test[[i]][10], split="|", fixed=TRUE)
+ll <- length(a[[1]])
+if (ll==1) # no ctrl counts, pure shifting of columns 11+12 to the previous ones
+ {test[[i]][12]<-test[[i]][11] ;
+ test[[i]][11]<-test[[i]][10] ;
+ test[[i]][10] <- NA
+ }
+else # column 11 displaced in column 10
+ {
+ if(is.integer(a[[1]][ll])==FALSE)
+ {test[[i]][12]<-test[[i]][11] ;
+ test[[i]][11]<- a[[1]][ll] ;
+ g <- c2s(paste(a[[1]][-ll],sep="|",collaps=""))
+ test[[i]][10] <- substr(g,1,nchar(g)-1) }
+ else print("different case in:",i )
+ }
+}
+return(test)
+}
+#######
+
+ ########## SAINT OUTPUT in Data Frame ##############
+
+new.saintoutput <- function(filename){
+require(seqinr)
+saint <- readLines(filename) # read original saint output
+saint.out <- saintout(saint) # function to correct tab-mistakes from the original Saint output
+saint.out2 <- as.data.frame(do.call("rbind",saint.out))
+colnames(saint.out2) <- unlist(strsplit(readLines(filename)[1], split="\t", fixed=TRUE))
+return(saint.out2)
+}
diff --git a/R/TSPM_apms.r b/R/TSPM_apms.r
new file mode 100644
index 0000000..ff2bd4e
--- /dev/null
+++ b/R/TSPM_apms.r
@@ -0,0 +1,111 @@
+###### Analysis of AP-MS Data: Detection of protein interaction partners and separation from contaminants
+##### on the base of spectral count Data
+###### Method: Application of the TSPM model + pre- and postprocessing steps
+
+#Steps: - Normalization
+# - Filtering
+# - TSPM model
+# - p-value Adjustment for multiple testing by:
+# * Benjamini-Hochberg procedure: FDR controlled p-values
+# * permutation approach + Westfall&Young algorithm: FWER controlled p-values for each protein interaction-candidate
+
+# Assumptions:
+# - Input of only one bait protein; controls required; - replicates for each group required
+# - minimum number of replicates for bait and control experiment preferred to be 3
+
+# Output:
+# - TSPM output (id, LFC, rawp, BH.padj, LRT dispersion)
+# - case WY-adjustment: WY adj. pvalues + counter
+# - (filtered) (normalized) count matrix
+
+
+tspm_apms <- function(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, adj.method=c("BH","WY")) {
+require(multtest)
+require(gtools)
+
+norm <- match.arg(norm)
+adj.method <- match.arg(adj.method)
+
+baittab <- read.table(baittab)
+baittab$V3 <- as.character(baittab$V3)
+baittab$V2 <- as.character(baittab$V2)
+baittab$V1 <- as.character(baittab$V1)
+if(all(baittab$V3%in%c("C","T"))==FALSE) stop("Sample and Controls need to be defined by T respectively C")
+if(setequal(baittab$V1, colnames(counts))==FALSE) stop("Names of samples need to be consistent in Baittable and Count-matrix")
+
+baittab <- baittab[order(baittab$V3),] # baittab sorted: controls first, followed by bait samples
+counts <- counts[ ,match(baittab$V1, colnames(counts))] # counts in the same order as baittable
+
+counts.org <- counts
+
+#setwd("O:/NG4/staff/Martina/MS_Salmonellen")
+#source("Inttab_Norm.r") # normalization
+#source("myTSPM.r") # TSPM model
+#source("NonspecFilter.r") # filtering
+#source("myWFalg_Dudoit.r") # Westfall&Young algorithm
+
+
+ #### NORMALIZATION ####
+if (norm!="none") {
+ if(norm=="quantile") {
+ norm.out <- norm.inttable (counts, baittab, norm)
+ counts <- round(norm.out[[1]])
+ lib.size <- rep(1, times=dim(counts)[2]) }
+ else {
+ norm.out <- norm.inttable (counts, baittab, norm)
+ lib.size <- norm.out[[2]]
+ counts <- norm.out[[1]] }
+}
+else { lib.size <- rep(1, times=dim(counts)[2]) }
+
+
+ #### FILTERING ####
+if(Filter==TRUE) {
+filter.method <- match.arg(filter.method)
+counts.nf <- varFilter (counts, baittab, func=filter.method, var.cutoff, limit)
+pos.f <- match(rownames(counts.nf), rownames(counts))
+if (norm!="quantile") { counts.f <- counts.org[pos.f,] } # filtered unnormalized counts
+else {counts.f <- counts[pos.f,] } # case quantile-norm
+}
+
+ ##### TSPM model #####
+# TSPM parameter:
+x1 <- factor(baittab$V3[match(colnames(counts),baittab$V1)], levels=c("C","T")) # important: order C -> T in levels
+x0 <- rep(1, times=dim(counts)[2])
+
+# TSPM Modell for diff.expr.
+if (Filter==TRUE) new.counts <- counts.f
+else {if(norm=="quantile") new.counts <- counts else new.counts <- counts.org }
+
+tspm <- TSPM (new.counts, x1, x0, lib.size, alpha.wh=0.05)
+
+if(adj.method=="BH") {
+if (Filter==TRUE) return(list(tspm, counts.nf)) else return(list(tspm, counts)) # output1: filtered (normalized) counts, output2: (normalized) counts
+}
+
+
+if (adj.method=="WY") {
+org.LRT <- tspm$LRT
+classlabel <- ifelse(x1=="C",0,1)
+perms <- mt.sample.label(classlabel,test="t",fixed.seed.sampling="n",B=0)
+perms2 <- apply(perms, 2, function(x){ifelse(x==0,"C","T")} )
+# permutation table:
+proteins <- rownames(new.counts)
+permmat <- as.data.frame(matrix(data=NA, nrow=length(proteins), ncol=(dim(perms)-1) ) )
+rownames(permmat) <- proteins
+
+for ( j in 2:nrow(perms2)) {
+x1 <- factor(perms2[j,], levels=c("C","T"))
+tspm.perm <- TSPM (new.counts, x1, x0, lib.size, alpha.wh=0.05) # permuted TSPM model
+permmat[,(j-1)] <- tspm.perm$LRT
+}
+
+set.seed(12345) # Westfall&Young Algorithm
+pp <- permute(c(1:length(proteins)))
+WY.padj <- WY.permalg(org.LRT[pp], permmat[pp,])
+
+if (Filter==TRUE) return(list(tspm, WY.padj, counts.nf, permmat)) else return(list(tspm, WY.padj, counts, permmat)) # output1: filtered (normalized) counts, output2: (normalized) counts
+} # wy end
+
+
+} # end
diff --git a/R/mat_Int_change.r b/R/mat_Int_change.r
new file mode 100644
index 0000000..9bf9c05
--- /dev/null
+++ b/R/mat_Int_change.r
@@ -0,0 +1,39 @@
+##### Translation Count-Matrix <-> Interactiontable (format required for SAINT)
+
+#### Function Interactiontable -> Matrix:
+# Input: Interactiontable as required in SAINT (including zero counts)
+
+int2mat <- function(IntSaint) {
+prots <- as.character(unique(IntSaint$V3))
+ #transfer inttab -> inttab.mat (matrix)
+rowspec <- function(i){ # = matrix row i
+protframe <-IntSaint[grep(paste("^",i,"$",sep=""), IntSaint$V3), ]
+return(protframe$V4[match(as.character(unique(IntSaint$V1)),protframe$V1)])}
+# spectral counts in matrix-form:
+IntSaint.mat <- t(vapply(prots, rowspec , rep(0, times=length(as.character(unique(IntSaint$V1))))))
+colnames(IntSaint.mat) <- as.character(unique(IntSaint$V1))
+return(IntSaint.mat)
+}
+
+# example
+# baitint.mat <- int2mat("bait_LBcl_IntSaint.txt")
+
+
+##### Function Matrix -> Interactiontable
+# Input: matrix count table, baittable as classifier for ctrls and bait-samples
+
+mat2int <- function(mat, baittab){
+inttab <- data.frame(V1=NA, V2=NA, V3=NA, V4=0.0)
+for ( i in 1:dim(mat)[2]) {
+V1 <- rep(colnames(mat)[i], times= dim(mat)[1] )
+V2 <- rep(as.character(baittab$V2[match(colnames(mat)[i], baittab$V1)]), times=dim(mat)[1] )
+V3 <- rownames(mat)
+V4 <- mat[,i]
+inttab <- rbind(inttab, cbind(V1,V2,V3,V4))
+}
+inttab <- inttab[-1,]
+inttab$V4 <- as.numeric(inttab$V4)
+rownames(inttab) <- NULL
+return(inttab)
+}
+
diff --git a/R/myTSPM.r b/R/myTSPM.r
new file mode 100644
index 0000000..997bb39
--- /dev/null
+++ b/R/myTSPM.r
@@ -0,0 +1,121 @@
+##### Origin:
+##-------------------------------------------------------------------
+## Name: TSPM.R
+## R code for the paper by Paul L. Auer and R.W. Doerge:
+## "A Two-Stage Poisson Model for Testing RNA-Seq Data"
+## Date: February 2011
+## Contact: Paul Auer plivermo at fhcrc.org
+## R.W. Doerge doerge at purdue.edu
+##------------------------------------------------------------------------
+
+#### !!!
+#### TSPM function adapted here for the application to affinity-purification mass-spectrometry data (AP-MS)
+#### based on spectral counts
+
+
+
+###### The TSPM function ##############################################
+#######################################################################
+
+
+TSPM <- function(counts, x1, x0, lib.size, alpha.wh){
+
+## Input:
+#counts: a matrix of spectral counts (rows=proteins, columns=samples)
+#x1: a vector of bait/control assignement (under the alternative hypothesis)
+#x0: a vector of bait/control assignement (under the null hypothesis)
+#lib.size: a vector of scaling factors for normalization
+#alpha.wh: the significance threshold to use for deciding whether a protein is overdispersed.
+# Defaults to 0.05.
+
+
+## Output:
+#id: name of the protein
+#log.fold.change: a vector containing the estimated log fold changes for each protein
+#pvalues: a vector containing the raw p-values testing differential expression for each protein
+#LRT: a vector of Likelihood-Ratio statistics for each protein
+#dispersion: a vector of yes/no indicating overdispersion for each protein
+#padj: a vector containing the p-values after adjusting for multiple testing using the method of Benjamini-Hochberg
+
+
+######## The main loop that fits the GLMs to each protein ########################
+
+### Initializing model parameters ####
+n <- dim(counts)[1]
+per.gene.disp <- NULL
+LRT <- NULL
+score.test <- NULL
+LFC <- NULL
+
+###### Fitting the GLMs for each protein #################
+ for(i in 1:n){
+ ### Fit full and reduced models ###
+ model.1 <- glm(as.numeric(counts[i,]) ~ x1, offset=log(lib.size), family=poisson)
+ model.0 <- glm(as.numeric(counts[i,]) ~ x0, offset=log(lib.size), family=poisson)
+
+ if (model.1$coef[2]<0) {model.1 <- model.0} # constraint: parameter beta1>0 ! one-sided-test situation
+
+ ### Obtain diagonals of Hat matrix from the full model fit ###
+ hats <- hatvalues(model.1)
+
+ ### Obtain Pearson overdispersion estimate ####
+ per.gene.disp[i] <- sum(residuals(model.1, type="pearson")^2)/model.1$df.residual
+
+ ### Obtain Likelihood ratio statistic ####
+ LRT[i] <- deviance(model.0)-deviance(model.1)
+
+ ### Obtain score test statistic ####
+ score.test[i] <- 1/(2*length(counts[i,])) * sum(residuals(model.1, type="pearson")^2 - ((counts[i,] - hats*model.1$fitted.values)/model.1$fitted.values))^2
+
+ ### Obtain the estimated log fold change ###
+ LFC[i] <- model.1$coef[2]
+ }
+
+## Initialize parameters for Working-Hotelling bands around the score TSs ###
+qchi <- qchisq(df=1, (1:n-0.5)/n)
+MSE <- 2
+UL <- NULL
+
+#### Obtain the upper boundary of the WH bands #######################################
+xbar <- mean(qchi)
+bottom <- sum((qchi-xbar)^2)
+top <- (qchi-xbar)^2
+s <- sqrt(MSE*(1/n) + (top/bottom))
+W <- sqrt(2*qf(df1=1, df2=n-1, p=1-(alpha.wh/n)))
+UL <- pmax(qchi + W*s,1)
+
+###### Obtain the indices of the over-dispersed and not-over-dispersed proteins, respectively ##########
+
+if(length(which(sort(score.test)-UL > 0))>0){ # overdispersed proteins existent
+cutoff <- min(which(sort(score.test)-UL > 0))
+temp <- cutoff-1 + seq(cutoff:length(score.test))
+over.disp <- which(score.test %in% sort(score.test)[temp])
+not.over.disp <- setdiff(1:length(score.test), over.disp)
+}
+else { # no overdispersed proteins
+not.over.disp <- c(1:length(score.test))
+over.disp <- NULL
+}
+
+
+###### Compute p-values ####################################
+p.f <- pf(LRT[over.disp]/per.gene.disp[over.disp], df1=1, df2=model.1$df.residual, lower.tail=FALSE)
+p.chi <- pchisq(LRT[not.over.disp], df=1, lower.tail=FALSE)
+p <- NULL
+p[over.disp] <- p.f
+p[not.over.disp] <- p.chi
+
+##### Adjust the p-values using the B-H method ####################
+p.bh.f <- p.adjust(p.f, method="BH")
+p.bh.chi <- p.adjust(p.chi, method="BH")
+final.p.bh.tagwise <- NULL
+final.p.bh.tagwise[over.disp] <- p.bh.f
+final.p.bh.tagwise[not.over.disp] <- p.bh.chi
+
+index.disp <- NULL
+index.disp[over.disp] <- "yes"
+index.disp[not.over.disp] <- "no"
+### Output ###
+data.frame(id=rownames(counts), log.fold.change=LFC, pvalues=p,
+padj=final.p.bh.tagwise, LRT=LRT, dispersion=index.disp)
+}
diff --git a/R/myWFalg_Dudoit.r b/R/myWFalg_Dudoit.r
new file mode 100644
index 0000000..fc98632
--- /dev/null
+++ b/R/myWFalg_Dudoit.r
@@ -0,0 +1,48 @@
+# Westfall & Young Algorithm implemented after Dudoit in "Dudoit-class (package ClassComparison)"
+
+
+##### Westfall&Young Algorithm:
+# Input: - original Saint.out scores (org.statistic)
+# - score- Permutationmatrix (perm.avgp / perm.maxp)
+# Output: - adjusted p-values, counter=number of exceeding scores, either of the permuted score exceeding its original one a permuted score from on of the the lower ranked original candidates
+
+WY.permalg <- function(org.statistic, perm.p) {
+ tor <- rev(order(abs(org.statistic))) # sort observed org.values
+
+ grounded <- abs(org.statistic[tor]) # org.statistic in decreasing order
+ num.gene <- length(org.statistic)
+ counter <- rep(0, num.gene) # counter for each gene via permutations
+ names(counter) <- rownames(perm.p[tor,])
+ nPerm <- dim(perm.p)[2]
+ for (i in 1:nPerm) {
+ #cat("\n", i, ". Run \n", sep="")
+ dudoit.t <- abs(perm.p[tor,i])
+ dudoit.u <- rep(dudoit.t[num.gene], num.gene)
+ trigger <- sum(dudoit.t > dudoit.u)
+ while(trigger > 0) {
+ dudoit.t <- (dudoit.u + dudoit.t + abs(dudoit.u - dudoit.t))/2
+ trigger <- sum(dudoit.t > dudoit.u)
+ if (trigger > 0) {
+ target <- (1:num.gene)[dudoit.t >dudoit.u][trigger]
+ dudoit.u[1:target] <- dudoit.t[target]
+ }
+ }
+ #print (head(cbind(grounded, abs(perm.p[tor,i]) , dudoit.u) ))
+ counter <- counter + (dudoit.u >= grounded)
+ }
+ adjusted.p <- counter/nPerm
+ return(cbind(counter,adjusted.p))
+}
+
+
+
+
+# example:
+
+# data generated from uniform distribution
+#perm.p <- as.data.frame(matrix(data=NA, nrow=10, ncol=8 ) )
+#rownames(perm.p) <- c(1:10)
+#perm.p <-apply (perm.p, c(1,2), function(x){x<-runif(1,min=0.1,max=9)}) # permuted statistics
+#org <- sapply(1:10, function(x){x<-runif(1,min=0.1,max=9)}) # original statistic
+
+#WY.permalg(org,perm.p)
diff --git a/R/saint_permF.r b/R/saint_permF.r
new file mode 100644
index 0000000..332b7ea
--- /dev/null
+++ b/R/saint_permF.r
@@ -0,0 +1,235 @@
+#### Analysis of AP_MS data: SAINT framed by Pre- and Postprocessing steps - whole pipeline ####
+# Aim: Identify interaction partners of a bait protein and separation from the contaminants
+
+#Steps: - Normalization
+# - Filtering
+# - original SAINT run on (normalized)(filtered) data
+# - Permutation approach on SAINT: shuffle of controls and bait samples
+# - Westfall&Young algorithm to receive FWER controlled p-values for each protein interaction-candidate
+
+# Assumptions:
+# - Input of only one bait protein; controls required; - replicates for each group required
+# - bait protein name(V2 in baittable) same for all bait replicates, same for all controls
+# - minimum number of replicates for bait and control experiment is 3
+
+##### Input:
+## - Baittable as required for SAINT: classification of bait(T) and ctrls(C) including their names
+## - Interactiontable as required for SAINT
+# * important: a protein which was not detected in a sample receives a zero count
+## - Proteintable = Preyfile as required for SAINT
+ ### these 3 files should be stored in the working directory !! ##
+
+# - choose a normalization method; default="none" (no normalization)
+# - Filtering TRUE/FALSE, set all parameters for filtering (method, cutoff, limit)
+
+#Inputfiles example:
+# file_baittable <- "baittable.txt" # read Baittable
+# file_inttable <- "LBcl_IntSaint.txt" # read Interactiontable
+
+
+#Output: - in case of normalization: normed interaction-table (txt file)
+# - in case of filtering: filtered (normed) interaction-table (txt file)
+# - Saint output: "unique_interaction" file on the original data (normalized: "_orig" ;filtered: "_orgF")
+# - permutation matrix perm.avgp, perm.maxp (Rdata) = scores for each protein from all permutation runs
+# - overall result "WY_Result.csv": Saint interaction output + WY adjusted p-values for each candidate
+
+
+
+saint_permF <- function(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, intern.norm=FALSE, saint.options="2000 10000 0 1 0") {
+require(multtest)
+require(gtools)
+
+norm <- match.arg(norm)
+saint.options <- match.arg(saint.options)
+
+baittable <- read.table(file_baittable) # data input
+inttable<- read.table(file_inttable)
+
+baittable$V3 <- as.character(baittable$V3)
+baittable$V2 <- as.character(baittable$V2)
+baittable$V1 <- as.character(baittable$V1)
+inttable$V1 <- as.character(inttable$V1)
+inttable$V2 <- as.character(inttable$V2)
+
+
+if(all(baittable$V3%in%c("C","T"))==FALSE) {
+stop("Define bait and control samples only by C and T in the Baittable \n") }
+
+if (setequal(names(table(baittable$V2)), names(table(inttable$V2)))==FALSE | setequal(names(table(baittable$V1)), names(table(inttable$V1)))==FALSE ) {
+stop("Names of bait and control samples need to be consistent in the Bait- and Interactiontable. \n") }
+
+
+baittab <- baittable[order(baittable$V3),] # baittab sorted: controls first followed by bait samples
+inttable.raw <- inttable
+
+
+
+ ##### NORMALIZATION #####
+if (norm!="none") {
+inttab.mat <- int2mat(inttable)
+norm.out <- norm.inttable (inttab.mat, baittab, norm)
+inttable <- mat2int(norm.out[[1]], baittab)
+baitname <- as.character(unique(baittab$V2[grep("T", baittab$V3)]))
+normSaint_filename <- paste(norm, paste(baitname, "IntSaint.txt",sep="_"),sep="_")
+write.table(inttable, file=normSaint_filename, quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+}
+
+
+ ####### FILTERING #######
+if(Filter==TRUE) {
+filter.method <- match.arg(filter.method)
+intmat <- int2mat(inttable)
+intmat <- varFilter(intmat, baittab, func=filter.method, var.cutoff, limit) # FILTER FUNCTION
+inttable <- mat2int(intmat, baittab) # new filtered inttable with selected proteins
+
+if(intern.norm==TRUE) { # repeated normalization on filtered interaction table
+del.prots <- setdiff(unique(inttable.raw$V3), unique(inttable$V3))
+del.pos <- unlist(lapply(del.prots, function(x){which(inttable.raw$V3==x)} ))
+intmat.f <- int2mat(inttable.raw[-del.pos,] )
+norm2.out <- norm.inttable (intmat.f, baittab, norm)
+inttable <- mat2int(norm2.out[[1]], baittab)
+}
+
+write.table(inttable, file="Inttable_filtered.txt", quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+
+# 1.SAINT run on original data on a filtered protein-set:
+saint.command <- paste("saint-spc-ctrl", "Inttable_filtered.txt", prottable, file_baittable, saint.options,sep=" ")
+system("rm -r LOG")
+system("rm -r MAPPING") # deletion of former folders
+system("rm -r MCMC")
+system("rm -r RESULT")
+system(saint.command) # linux run
+#SAINT output "unique_interactions":
+filename.org <- "./RESULT/unique_interactions"
+orig.filter <- new.saintoutput(filename.org)
+write.table(orig.filter, file="Unique_Interactions_orgF.txt", quote=FALSE, sep="\t", row.names=FALSE)
+
+proteins <- as.character(orig.filter$Prey)
+}
+## NO Filtering:
+else {
+ if (norm=="none") { # 1.SAINT run on original data (unnormalized + no filter)
+ saint.command2 <- paste("saint-spc-ctrl", file_inttable, prottable, file_baittable, saint.options,sep=" ")
+ system("rm -r LOG")
+ system("rm -r MAPPING")
+ system("rm -r MCMC")
+ system("rm -r RESULT")
+ system(saint.command2) }
+ if (norm!="none") { # 1.SAINT run on original data (normalized+ no filter)
+ saint.command.norm <- paste("saint-spc-ctrl", normSaint_filename, prottable, file_baittable, saint.options,sep=" ")
+ system("rm -r LOG")
+ system("rm -r MAPPING")
+ system("rm -r MCMC")
+ system("rm -r RESULT")
+ system(saint.command.norm) }
+
+ #SAINT output "unique_interactions":
+ filename.org <- "./RESULT/unique_interactions"
+ orig.out <- new.saintoutput(filename.org)
+ write.table(orig.out, file="Unique_Interactions_Orig.txt", quote=FALSE, sep="\t", row.names=FALSE)
+ proteins <- as.character(orig.out$Prey)
+}
+
+
+ ######### Permutation ###########
+
+ctrl <- as.character(baittab$V1[grep("C", baittab$V3)]) # control replicates
+ctrl.sup <- unique(as.character(baittab$V2[grep("C", baittab$V3)])) #control sup-name
+baits <- as.character(baittab$V1[grep("T", baittab$V3)]) # bait replicates
+bait.sup <- unique(as.character(baittab$V2[grep("T", baittab$V3)])) #superior bait name
+
+# permutation setup:
+shuf.vec <- baittab$V3
+classlabel <- ifelse(shuf.vec=="C",0,1)
+perms <- mt.sample.label(classlabel,test="t",fixed.seed.sampling="n",B=0)
+perms2 <- apply(perms, 2, function(x){ifelse(x==0,"C","T")} )
+perms.baitname <- apply( perms, 2, function(x) {ifelse(x==0,baittab$V2[1],baittab$V2[length(baittab$V2)]) } )
+
+# permutation table to store the resulting scores from each permutation:
+perm.avgp <- as.data.frame(matrix(data=NA, nrow=length(proteins), ncol=(dim(perms)-1) ) )
+rownames(perm.avgp) <- proteins
+perm.maxp <- as.data.frame(matrix(data=NA, nrow=length(proteins), ncol=(dim(perms)-1) ) )
+rownames(perm.maxp) <- proteins
+
+#### Each permutation: create new baittable + interactiontable -> new Saint run -> store resulting scores
+#1.row = original data setup (skipped)
+
+for ( j in 2:nrow(perms2)) {
+cat(j, ". Run started \n", sep="")
+system("rm -r LOG")
+system("rm -r MAPPING") # delete folders for next permutation run
+system("rm -r MCMC")
+system("rm -r RESULT")
+
+baittab.perm <- baittab
+baittab.perm$V3 <- perms2[j,]
+baittab.perm$V2 <- perms.baitname[j,] # permuted baittable
+
+new.ctrls <- baittab.perm$V1[grep("C",baittab.perm$V3)]
+new.ctrl <- new.ctrls [which(new.ctrls %in% ctrl ==FALSE) ] # bait turned into control
+new.baits <- baittab.perm$V1[grep("T",baittab.perm$V3)]
+new.bait <- new.baits [which(new.baits %in% baits ==FALSE) ] # control turned into bait
+
+perm.inttable <- inttable # permuted inttable
+newctrl.pos <- unlist(sapply(new.ctrl, function(x){which(perm.inttable$V1==x)}))
+perm.inttable$V2[newctrl.pos] <- ctrl.sup
+newbait.pos <- unlist(sapply(new.bait, function(x){which(perm.inttable$V1==x)}))
+perm.inttable$V2[newbait.pos] <- bait.sup
+
+
+print(table(perm.inttable$V1, perm.inttable$V2)) # check permutation
+
+write.table(baittab.perm, file="Baittable_perm.txt", quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+write.table(perm.inttable, file="Inttable_perm.txt", quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+
+### SAINT run on permutated data
+saint.commandp1 <- paste("saint-spc-ctrl", "Inttable_perm.txt", prottable, "Baittable_perm.txt",saint.options,sep=" ")
+system(saint.commandp1)
+
+# SAINT output "unique_interactions"
+filename <- "./RESULT/unique_interactions"
+saintout.perm <- try(new.saintoutput(filename))
+while (class(saintout.perm)=="try-error") # case of file generation failure
+ { system("rm -r LOG") # do it again
+ system("rm -r MAPPING")
+ system("rm -r MCMC")
+ system("rm -r RESULT")
+ saint.commandp1 <- paste("saint-spc-ctrl", "Inttable_perm.txt", prottable, "Baittable_perm.txt", saint.options,sep=" ")
+ system(saint.commandp1)
+ saintout.perm <- try(new.saintoutput(filename))
+ }
+saintout.perm$AvgP <- as.numeric(as.vector(saintout.perm$AvgP))
+saintout.perm$MaxP <- as.numeric(as.vector(saintout.perm$MaxP))
+saintout.perm$Prey <- as.character(saintout.perm$Prey)
+
+# extract scores from output "unique-interactions"
+protpos.perm <- match(proteins, saintout.perm$Prey)
+perm.avgp[,(j-1)] <- saintout.perm$AvgP[protpos.perm]
+perm.maxp[,(j-1)] <- saintout.perm$MaxP[protpos.perm]
+
+}
+# permutation end
+
+save(perm.avgp, file="perm_avgP.RData")
+save(perm.maxp, file="perm_maxP.RData")
+
+system("rm -r LOG")
+system("rm -r MAPPING")
+system("rm -r MCMC")
+system("rm -r RESULT")
+ ####### Generation p-values #######
+
+if(Filter==TRUE) {orig <- orig.filter} else {orig <- orig.out}
+
+orig$AvgP <- as.numeric(as.vector(orig$AvgP))
+set.seed(12345)
+pp <- permute(c(1:length(proteins)))
+wy <- WY.permalg(orig$AvgP[pp], perm.avgp[pp,])
+tor <- rev(order(abs(orig$AvgP)))
+write.csv2(cbind(orig[tor,] ,"WY_counter"=wy[match(orig[tor,]$Prey,rownames(wy)),1],"WY_P.adjust"=wy[match(orig[tor,]$Prey,rownames(wy)),2]) ,file="WY_Result.csv")
+
+}
+
+# end
+
+
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 6ccb5b1..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,7 +0,0 @@
-r-other-apmswapp (1.0-1) UNRELEASED; urgency=low
-
- * Initial release (Closes: #<bug>)
- TODO: http://www.bioconductor.org/packages/2.6/bioc/html/DESeq.html
- -> r-bioc-deseq (seems to depend from locfit)
-
- -- Andreas Tille <tille at debian.org> Fri, 04 Nov 2016 11:28:50 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 3198240..0000000
--- a/debian/control
+++ /dev/null
@@ -1,36 +0,0 @@
-Source: r-other-apmswapp
-Section: gnu-r
-Priority: optional
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Build-Depends: debhelper (>= 9),
- dh-r,
- r-base-dev,
- r-cran-gtools,
- r-bioc-edger,
- r-bioc-limma,
- r-bioc-multtest,
- r-bioc-genefilter,
- r-bioc-aroma.light,
- r-bioc-deseq2,
- r-cran-seqinr,
- r-cran-foreign,
- r-cran-nnet
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-other-apmswapp/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-other-apmswapp/trunk/
-Homepage: http://sourceforge.net/projects/apmswapp/
-
-Package: r-other-apmswapp
-Architecture: all
-Depends: ${misc:Depends},
- ${R:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: GNU R Pre- and Postprocessing For Affinity Purification Mass Spectrometry
- The reliable detection of protein-protein-interactions by affinity
- purification mass spectrometry (AP-MS) is a crucial stepping stone for
- the understanding of biological processes. The main challenge in a
- typical AP-MS experiment is to separate true interaction proteins from
- contaminants by contrasting counts of proteins binding to specific baits
- with counts of negative controls.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index cb8a69b..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,11 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: Martina Fischer <fischerm at rki.de>
-Source: http://sourceforge.net/projects/apmswapp/files/
-
-Files: *
-Copyright: 2013-2014 Martina Fischer <fischerm at rki.de>
-License: <license>
-
-Files: debian/*
-Copyright: 2015 Andreas Tille <tille at debian.org>
-License: <license>
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index b1daabd..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-uses_deseq2.patch
diff --git a/debian/patches/uses_deseq2.patch b/debian/patches/uses_deseq2.patch
deleted file mode 100644
index b26a665..0000000
--- a/debian/patches/uses_deseq2.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Fri, 04 Nov 2016 11:28:50 +0100
-Description: No idea whether this makes sense but Debian has now DESeq2 - just
- try whether this might work as well.
-
---- a/DESCRIPTION
-+++ b/DESCRIPTION
-@@ -10,7 +10,7 @@ Description: apmsWAPP provides a complet
- It comprises pre-processing, scoring and postprocessing of protein interactions.
- A final list of interaction candidates is reported: it provides a ranking of the candidates according
- to their p-values which allow estimating the number of false-positive interactions.
--Depends: genefilter, seqinr, multtest, gtools, limma, edgeR, DESeq,
-+Depends: genefilter, seqinr, multtest, gtools, limma, edgeR, DESeq2,
- aroma.light
- License: Review License
- LazyLoad: yes
---- a/R/Inttab_Norm.R
-+++ b/R/Inttab_Norm.R
-@@ -2,14 +2,14 @@
- # Input: - original interaction matrix (interaction table including zeros, raw discrete counts!)
- # - baittable = classification of samples and controls
- # - different normalization methods can be chosen in "norm":
--# "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"
-+# "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"
- # Output: normed interaction-matrix (normed count table), scaling factors
-
--norm.inttable <- function( inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile")) {
-+norm.inttable <- function( inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq2", "TMM", "quantile")) {
-
- require(limma)
- require(edgeR)
--require(DESeq)
-+require(DESeq2)
- require(aroma.light)
- norm <- match.arg(norm)
-
-@@ -50,8 +50,8 @@ inttab.norm[,setdiff(Tpos,zerocount)] <-
- }
-
-
--# "DESeq" - normalization method in the DESeq package (Anders et al. 2010):
--if(norm=="DESeq") {
-+# "DESeq2" - normalization method in the DESeq2 package (Anders et al. 2010):
-+if(norm=="DESeq2") {
- cds <- newCountDataSet( inttab.mat, baittab$V3) # build countDataSet
-
- cds1 <- estimateSizeFactors( cds[ ,Cpos] )
---- a/R/TSPM_apms.r
-+++ b/R/TSPM_apms.r
-@@ -19,7 +19,7 @@
- # - (filtered) (normalized) count matrix
-
-
--tspm_apms <- function(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, adj.method=c("BH","WY")) {
-+tspm_apms <- function(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, adj.method=c("BH","WY")) {
- require(multtest)
- require(gtools)
-
---- a/R/saint_permF.r
-+++ b/R/saint_permF.r
-@@ -35,7 +35,7 @@
-
-
-
--saint_permF <- function(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, intern.norm=FALSE, saint.options="2000 10000 0 1 0") {
-+saint_permF <- function(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, intern.norm=FALSE, saint.options="2000 10000 0 1 0") {
- require(multtest)
- require(gtools)
-
---- a/man/norm.inttable.Rd
-+++ b/man/norm.inttable.Rd
-@@ -8,7 +8,7 @@ Normalization of spectral count data
- Normalization of spectral counts in bait and control samples based on an AP-MS experiment.
- }
- \usage{
--norm.inttable(inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile"))
-+norm.inttable(inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"))
- }
-
- \arguments{
-@@ -30,7 +30,7 @@ Five different normalization methods, ad
- In the \sQuote{sumtotal} normalization counts are divided by the total number of counts in the sample.
- The \sQuote{upperquartile} normalization corrects counts by dividing each count by the 75\% quantile of its sample counts.
- The \sQuote{quantile} method equalizes the distributions of protein counts across all samples.
--In the \sQuote{DESeq} approach by Anders and Huber (2010), counts are divided by the the median of the ratio of its count over its geometric mean across all samples.
-+In the \sQuote{DESeq2} approach by Anders and Huber (2010), counts are divided by the the median of the ratio of its count over its geometric mean across all samples.
- In the \sQuote{TMM} approach by Robinson and Oshlack (2010), a scaling factor is computed as the weighted mean of log ratios between chosen test and reference samples.
-
- }
---- a/man/saint_permF.Rd
-+++ b/man/saint_permF.Rd
-@@ -7,7 +7,7 @@ Pre- and Postprocessing for AP-MS data a
- A complete workflow for the identification of true interaction proteins based on AP-MS data, embedding the scoring method SAINT into a pre- and postprocessing framework.
- }
- \usage{
--saint_permF(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, intern.norm = FALSE, saint.options = "2000 10000 0 1 0")
-+saint_permF(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, intern.norm = FALSE, saint.options = "2000 10000 0 1 0")
- }
- %- maybe also 'usage' for other objects documented here.
- \arguments{
---- a/man/tspm_apms.Rd
-+++ b/man/tspm_apms.Rd
-@@ -8,7 +8,7 @@ A complete workflow for the analysis of
-
- }
- \usage{
--tspm_apms(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, adj.method = c("BH", "WY"))
-+tspm_apms(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, adj.method = c("BH", "WY"))
- }
-
- \arguments{
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 529c38a..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/make -f
-
-%:
- dh $@ --buildsystem R
-
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index 6765b59..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,11 +0,0 @@
-Reference:
- Author: Martina Fischer and Susann Zilkenat and Roman G. Gerlach and Samuel Wagner and Bernhard Y. Renard
- Title: "Pre- and post-processing workflow for affinity purification mass spectrometry data"
- Journal: Journal of Proteome Research
- Year: 2014
- Volume: 13
- Number: 5
- Pages: 2239-49
- DOI: 10.1021/pr401249b
- PMID: 24641689
- URL: http://pubs.acs.org/doi/abs/10.1021/pr401249b
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 897a45f..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=3
-
-http://sf.net/apmswapp/apmsWAPP_(\d[\d\.]+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff --git a/inst/extdata/baittab.txt b/inst/extdata/baittab.txt
new file mode 100644
index 0000000..e7087d9
--- /dev/null
+++ b/inst/extdata/baittab.txt
@@ -0,0 +1,7 @@
+ctrl1 ctrl C
+ctrl2 ctrl C
+ctrl3 ctrl C
+bait1 bait T
+bait2 bait T
+bait3 bait T
+
diff --git a/inst/extdata/inttable.txt b/inst/extdata/inttable.txt
new file mode 100644
index 0000000..3acbe4e
--- /dev/null
+++ b/inst/extdata/inttable.txt
@@ -0,0 +1,378 @@
+ctrl1 ctrl con1 0
+ctrl2 ctrl con1 0
+ctrl3 ctrl con1 0
+bait1 bait con1 0
+bait2 bait con1 0
+bait3 bait con1 1
+ctrl1 ctrl con2 0
+ctrl2 ctrl con2 0
+ctrl3 ctrl con2 0
+bait1 bait con2 3
+bait2 bait con2 0
+bait3 bait con2 0
+ctrl1 ctrl con3 0
+ctrl2 ctrl con3 0
+ctrl3 ctrl con3 0
+bait1 bait con3 1
+bait2 bait con3 0
+bait3 bait con3 0
+ctrl1 ctrl con4 0
+ctrl2 ctrl con4 0
+ctrl3 ctrl con4 0
+bait1 bait con4 0
+bait2 bait con4 2
+bait3 bait con4 0
+ctrl1 ctrl con5 0
+ctrl2 ctrl con5 0
+ctrl3 ctrl con5 0
+bait1 bait con5 0
+bait2 bait con5 0
+bait3 bait con5 3
+ctrl1 ctrl con6 0
+ctrl2 ctrl con6 0
+ctrl3 ctrl con6 0
+bait1 bait con6 0
+bait2 bait con6 1
+bait3 bait con6 0
+ctrl1 ctrl con7 0
+ctrl2 ctrl con7 0
+ctrl3 ctrl con7 0
+bait1 bait con7 0
+bait2 bait con7 1
+bait3 bait con7 0
+ctrl1 ctrl con8 0
+ctrl2 ctrl con8 0
+ctrl3 ctrl con8 0
+bait1 bait con8 0
+bait2 bait con8 3
+bait3 bait con8 0
+ctrl1 ctrl con9 0
+ctrl2 ctrl con9 0
+ctrl3 ctrl con9 0
+bait1 bait con9 0
+bait2 bait con9 0
+bait3 bait con9 3
+ctrl1 ctrl con10 0
+ctrl2 ctrl con10 0
+ctrl3 ctrl con10 0
+bait1 bait con10 0
+bait2 bait con10 0
+bait3 bait con10 1
+ctrl1 ctrl con260 1
+ctrl2 ctrl con260 2
+ctrl3 ctrl con260 2
+bait1 bait con260 3
+bait2 bait con260 1
+bait3 bait con260 1
+ctrl1 ctrl con261 3
+ctrl2 ctrl con261 5
+ctrl3 ctrl con261 2
+bait1 bait con261 2
+bait2 bait con261 1
+bait3 bait con261 3
+ctrl1 ctrl con262 1
+ctrl2 ctrl con262 1
+ctrl3 ctrl con262 4
+bait1 bait con262 1
+bait2 bait con262 1
+bait3 bait con262 1
+ctrl1 ctrl con263 2
+ctrl2 ctrl con263 3
+ctrl3 ctrl con263 6
+bait1 bait con263 5
+bait2 bait con263 4
+bait3 bait con263 9
+ctrl1 ctrl con264 2
+ctrl2 ctrl con264 5
+ctrl3 ctrl con264 5
+bait1 bait con264 4
+bait2 bait con264 4
+bait3 bait con264 3
+ctrl1 ctrl con265 1
+ctrl2 ctrl con265 0
+ctrl3 ctrl con265 1
+bait1 bait con265 0
+bait2 bait con265 1
+bait3 bait con265 2
+ctrl1 ctrl con266 1
+ctrl2 ctrl con266 3
+ctrl3 ctrl con266 1
+bait1 bait con266 2
+bait2 bait con266 1
+bait3 bait con266 0
+ctrl1 ctrl con267 3
+ctrl2 ctrl con267 4
+ctrl3 ctrl con267 7
+bait1 bait con267 7
+bait2 bait con267 3
+bait3 bait con267 4
+ctrl1 ctrl con268 7
+ctrl2 ctrl con268 5
+ctrl3 ctrl con268 5
+bait1 bait con268 2
+bait2 bait con268 6
+bait3 bait con268 7
+ctrl1 ctrl con269 6
+ctrl2 ctrl con269 1
+ctrl3 ctrl con269 2
+bait1 bait con269 5
+bait2 bait con269 2
+bait3 bait con269 1
+ctrl1 ctrl con270 2
+ctrl2 ctrl con270 2
+ctrl3 ctrl con270 2
+bait1 bait con270 3
+bait2 bait con270 2
+bait3 bait con270 3
+ctrl1 ctrl con380 11
+ctrl2 ctrl con380 13
+ctrl3 ctrl con380 21
+bait1 bait con380 18
+bait2 bait con380 23
+bait3 bait con380 18
+ctrl1 ctrl con381 36
+ctrl2 ctrl con381 37
+ctrl3 ctrl con381 28
+bait1 bait con381 41
+bait2 bait con381 36
+bait3 bait con381 38
+ctrl1 ctrl con382 22
+ctrl2 ctrl con382 25
+ctrl3 ctrl con382 21
+bait1 bait con382 26
+bait2 bait con382 24
+bait3 bait con382 23
+ctrl1 ctrl con383 43
+ctrl2 ctrl con383 54
+ctrl3 ctrl con383 45
+bait1 bait con383 49
+bait2 bait con383 39
+bait3 bait con383 48
+ctrl1 ctrl con384 42
+ctrl2 ctrl con384 39
+ctrl3 ctrl con384 34
+bait1 bait con384 32
+bait2 bait con384 38
+bait3 bait con384 37
+ctrl1 ctrl con385 35
+ctrl2 ctrl con385 47
+ctrl3 ctrl con385 38
+bait1 bait con385 40
+bait2 bait con385 36
+bait3 bait con385 32
+ctrl1 ctrl con386 17
+ctrl2 ctrl con386 17
+ctrl3 ctrl con386 28
+bait1 bait con386 27
+bait2 bait con386 33
+bait3 bait con386 30
+ctrl1 ctrl con387 22
+ctrl2 ctrl con387 12
+ctrl3 ctrl con387 26
+bait1 bait con387 24
+bait2 bait con387 24
+bait3 bait con387 15
+ctrl1 ctrl con388 27
+ctrl2 ctrl con388 33
+ctrl3 ctrl con388 27
+bait1 bait con388 27
+bait2 bait con388 31
+bait3 bait con388 33
+ctrl1 ctrl con389 34
+ctrl2 ctrl con389 33
+ctrl3 ctrl con389 31
+bait1 bait con389 22
+bait2 bait con389 33
+bait3 bait con389 31
+ctrl1 ctrl con390 39
+ctrl2 ctrl con390 56
+ctrl3 ctrl con390 46
+bait1 bait con390 43
+bait2 bait con390 53
+bait3 bait con390 41
+ctrl1 ctrl prot20 0
+ctrl2 ctrl prot20 0
+ctrl3 ctrl prot20 0
+bait1 bait prot20 7
+bait2 bait prot20 3
+bait3 bait prot20 0
+ctrl1 ctrl prot21 0
+ctrl2 ctrl prot21 0
+ctrl3 ctrl prot21 0
+bait1 bait prot21 11
+bait2 bait prot21 14
+bait3 bait prot21 12
+ctrl1 ctrl prot22 0
+ctrl2 ctrl prot22 0
+ctrl3 ctrl prot22 0
+bait1 bait prot22 7
+bait2 bait prot22 7
+bait3 bait prot22 10
+ctrl1 ctrl prot23 0
+ctrl2 ctrl prot23 0
+ctrl3 ctrl prot23 0
+bait1 bait prot23 23
+bait2 bait prot23 29
+bait3 bait prot23 28
+ctrl1 ctrl prot24 0
+ctrl2 ctrl prot24 0
+ctrl3 ctrl prot24 0
+bait1 bait prot24 14
+bait2 bait prot24 17
+bait3 bait prot24 18
+ctrl1 ctrl prot25 0
+ctrl2 ctrl prot25 0
+ctrl3 ctrl prot25 0
+bait1 bait prot25 14
+bait2 bait prot25 15
+bait3 bait prot25 11
+ctrl1 ctrl prot26 0
+ctrl2 ctrl prot26 0
+ctrl3 ctrl prot26 0
+bait1 bait prot26 17
+bait2 bait prot26 10
+bait3 bait prot26 10
+ctrl1 ctrl prot27 0
+ctrl2 ctrl prot27 0
+ctrl3 ctrl prot27 0
+bait1 bait prot27 4
+bait2 bait prot27 9
+bait3 bait prot27 0
+ctrl1 ctrl prot28 0
+ctrl2 ctrl prot28 0
+ctrl3 ctrl prot28 0
+bait1 bait prot28 16
+bait2 bait prot28 16
+bait3 bait prot28 19
+ctrl1 ctrl prot29 0
+ctrl2 ctrl prot29 0
+ctrl3 ctrl prot29 0
+bait1 bait prot29 9
+bait2 bait prot29 13
+bait3 bait prot29 11
+ctrl1 ctrl prot30 0
+ctrl2 ctrl prot30 0
+ctrl3 ctrl prot30 0
+bait1 bait prot30 19
+bait2 bait prot30 18
+bait3 bait prot30 19
+ctrl1 ctrl prot31 0
+ctrl2 ctrl prot31 0
+ctrl3 ctrl prot31 0
+bait1 bait prot31 8
+bait2 bait prot31 7
+bait3 bait prot31 6
+ctrl1 ctrl prot32 0
+ctrl2 ctrl prot32 0
+ctrl3 ctrl prot32 0
+bait1 bait prot32 17
+bait2 bait prot32 9
+bait3 bait prot32 10
+ctrl1 ctrl prot33 0
+ctrl2 ctrl prot33 0
+ctrl3 ctrl prot33 0
+bait1 bait prot33 8
+bait2 bait prot33 7
+bait3 bait prot33 15
+ctrl1 ctrl prot34 0
+ctrl2 ctrl prot34 0
+ctrl3 ctrl prot34 0
+bait1 bait prot34 15
+bait2 bait prot34 15
+bait3 bait prot34 12
+ctrl1 ctrl prot35 0
+ctrl2 ctrl prot35 0
+ctrl3 ctrl prot35 0
+bait1 bait prot35 7
+bait2 bait prot35 6
+bait3 bait prot35 6
+ctrl1 ctrl prot36 0
+ctrl2 ctrl prot36 0
+ctrl3 ctrl prot36 0
+bait1 bait prot36 15
+bait2 bait prot36 14
+bait3 bait prot36 18
+ctrl1 ctrl prot37 0
+ctrl2 ctrl prot37 0
+ctrl3 ctrl prot37 0
+bait1 bait prot37 23
+bait2 bait prot37 29
+bait3 bait prot37 27
+ctrl1 ctrl prot38 0
+ctrl2 ctrl prot38 0
+ctrl3 ctrl prot38 0
+bait1 bait prot38 30
+bait2 bait prot38 23
+bait3 bait prot38 26
+ctrl1 ctrl prot39 0
+ctrl2 ctrl prot39 0
+ctrl3 ctrl prot39 0
+bait1 bait prot39 4
+bait2 bait prot39 9
+bait3 bait prot39 4
+ctrl1 ctrl prot40 0
+ctrl2 ctrl prot40 0
+ctrl3 ctrl prot40 0
+bait1 bait prot40 20
+bait2 bait prot40 31
+bait3 bait prot40 34
+ctrl1 ctrl sprot1 5
+ctrl2 ctrl sprot1 8
+ctrl3 ctrl sprot1 3
+bait1 bait sprot1 17
+bait2 bait sprot1 13
+bait3 bait sprot1 24
+ctrl1 ctrl sprot2 5
+ctrl2 ctrl sprot2 3
+ctrl3 ctrl sprot2 1
+bait1 bait sprot2 9
+bait2 bait sprot2 15
+bait3 bait sprot2 7
+ctrl1 ctrl sprot3 0
+ctrl2 ctrl sprot3 2
+ctrl3 ctrl sprot3 1
+bait1 bait sprot3 21
+bait2 bait sprot3 22
+bait3 bait sprot3 15
+ctrl1 ctrl sprot4 2
+ctrl2 ctrl sprot4 3
+ctrl3 ctrl sprot4 2
+bait1 bait sprot4 15
+bait2 bait sprot4 17
+bait3 bait sprot4 22
+ctrl1 ctrl sprot5 1
+ctrl2 ctrl sprot5 2
+ctrl3 ctrl sprot5 7
+bait1 bait sprot5 11
+bait2 bait sprot5 11
+bait3 bait sprot5 19
+ctrl1 ctrl sprot6 7
+ctrl2 ctrl sprot6 4
+ctrl3 ctrl sprot6 10
+bait1 bait sprot6 23
+bait2 bait sprot6 17
+bait3 bait sprot6 25
+ctrl1 ctrl sprot7 4
+ctrl2 ctrl sprot7 3
+ctrl3 ctrl sprot7 2
+bait1 bait sprot7 16
+bait2 bait sprot7 18
+bait3 bait sprot7 13
+ctrl1 ctrl sprot8 1
+ctrl2 ctrl sprot8 3
+ctrl3 ctrl sprot8 1
+bait1 bait sprot8 16
+bait2 bait sprot8 6
+bait3 bait sprot8 13
+ctrl1 ctrl sprot9 4
+ctrl2 ctrl sprot9 5
+ctrl3 ctrl sprot9 1
+bait1 bait sprot9 20
+bait2 bait sprot9 20
+bait3 bait sprot9 15
+ctrl1 ctrl sprot10 1
+ctrl2 ctrl sprot10 3
+ctrl3 ctrl sprot10 2
+bait1 bait sprot10 13
+bait2 bait sprot10 20
+bait3 bait sprot10 16
diff --git a/inst/extdata/prottable.txt b/inst/extdata/prottable.txt
new file mode 100644
index 0000000..e6716f2
--- /dev/null
+++ b/inst/extdata/prottable.txt
@@ -0,0 +1,63 @@
+con1 217 con1
+con2 361 con2
+con3 289 con3
+con4 247 con4
+con5 336 con5
+con6 71 con6
+con7 203 con7
+con8 348 con8
+con9 536 con9
+con10 96 con10
+con260 349 con260
+con261 48 con261
+con262 358 con262
+con263 250 con263
+con264 393 con264
+con265 124 con265
+con266 194 con266
+con267 835 con267
+con268 588 con268
+con269 43 con269
+con270 119 con270
+con380 314 con380
+con381 226 con381
+con382 217 con382
+con383 612 con383
+con384 228 con384
+con385 111 con385
+con386 222 con386
+con387 265 con387
+con388 385 con388
+con389 166 con389
+con390 347 con390
+prot20 251 prot20
+prot21 225 prot21
+prot22 141 prot22
+prot23 345 prot23
+prot24 125 prot24
+prot25 270 prot25
+prot26 226 prot26
+prot27 574 prot27
+prot28 244 prot28
+prot29 651 prot29
+prot30 150 prot30
+prot31 187 prot31
+prot32 427 prot32
+prot33 457 prot33
+prot34 265 prot34
+prot35 587 prot35
+prot36 395 prot36
+prot37 80 prot37
+prot38 546 prot38
+prot39 110 prot39
+prot40 392 prot40
+sprot1 77 sprot1
+sprot2 416 sprot2
+sprot3 389 sprot3
+sprot4 152 sprot4
+sprot5 212 sprot5
+sprot6 121 sprot6
+sprot7 373 sprot7
+sprot8 57 sprot8
+sprot9 285 sprot9
+sprot10 449 sprot10
diff --git a/man/apmsWAPP-internal.Rd b/man/apmsWAPP-internal.Rd
new file mode 100644
index 0000000..bb93029
--- /dev/null
+++ b/man/apmsWAPP-internal.Rd
@@ -0,0 +1,22 @@
+\name{internals}
+\alias{my.rowIQR}
+\alias{new.saintoutput}
+\alias{saintout}
+\alias{TSPM}
+\alias{WY.permalg}
+
+\title{
+internal functions
+}
+\description{
+Functions for internal usage only
+}
+\usage{
+my.rowIQR(mat)
+new.saintoutput(filename)
+saintout(test)
+TSPM(counts, x1, x0, lib.size, alpha.wh)
+WY.permalg(org.statistic, perm.p)
+}
+
+\keyword{internal}
diff --git a/man/apmsWAPP-package.Rd b/man/apmsWAPP-package.Rd
new file mode 100644
index 0000000..667a9f3
--- /dev/null
+++ b/man/apmsWAPP-package.Rd
@@ -0,0 +1,41 @@
+\name{apmsWAPP-package}
+\alias{apmsWAPP-package}
+\alias{apmsWAPP}
+\docType{package}
+\title{
+Pre- and Postprocessing for AP-MS data analysis
+}
+\description{
+The package \pkg{apmsWAPP} provides a complete workflow for the analysis of AP-MS data, based on replicate single-bait purifications including negative controls. \cr
+It comprises the three main parts of pre-processing, scoring and postprocessing of interaction proteins: \cr
+For pre-processing, five different normalization methods and a filtering procedure is provided. \cr
+For scoring protein-protein-interactions, either the method of SAINT or a two-stage-poisson model (TSPM) adapted to AP-MS data can be chosen. \cr
+For postprocessing, the user can choose between the permutation-based approach of Westfall&Young (applicable to both, SAINT and TSPM) and the adjustment procedure of Benjamini-Hochberg (applicable to TSPM).
+Postprocessing results in the generation of p-values for each interaction candidate, allowing to control the number of false-positive interactions.
+}
+\details{
+\tabular{ll}{
+Package: \tab apmsWAPP\cr
+Type: \tab Package\cr
+Version: \tab 1.0\cr
+Date: \tab 2013-03-14\cr
+License: \tab review license\cr
+}
+The two main function calls are: \code{\link{saint_permF}} (framework based on SAINT) and \code{\link{tspm_apms}} (framework based on TSPM).
+}
+
+\author{
+Martina Fischer (fischerm at rki.de)
+}
+
+\references{
+Fischer M, Zilkenat S, Gerlach R, Wagner S, Renard BY. Pre- and Postprocessing for Affinity Purification Mass Spectrometry Data: More Reliable Detection of Interaction Candidates. \emph{in review}
+
+Choi H, Larsen B, Lin Z-Y, et al. SAINT: probabilistic scoring of affinity purification-mass spectrometry data. \emph{Nature Methods} 2011.
+
+Auer PL, Doerge RW. A two-stage Poisson model for testing RNA-Seq data. \emph{Statistical Applications in Genetics and Molecular Biology} 2011.
+}
+\keyword{package}
+\keyword{APMS}
+
+
diff --git a/man/int_mat.Rd b/man/int_mat.Rd
new file mode 100644
index 0000000..4f3f863
--- /dev/null
+++ b/man/int_mat.Rd
@@ -0,0 +1,41 @@
+\name{int_mat}
+\alias{int2mat}
+\alias{mat2int}
+\title{
+Format transformation of spectral counts
+}
+\description{
+Transformation of a count matrix into an interaction table (format required for SAINT) and vice versa.
+}
+\usage{
+int2mat(IntSaint)
+
+mat2int(mat, baittab)
+}
+\arguments{
+ \item{IntSaint}{a data.frame. The interaction table as required for SAINT (including zero counts).
+}
+ \item{mat}{matrix of spectral counts, proteins in rows and samples in columns.
+}
+ \item{baittab}{a data.frame. The baittable as required for SAINT, classifying control and bait samples.
+}
+}
+\details{
+The \emph{interaction table} consists of four columns: IP name, bait or control name, protein name, spectral count (\bold{note}: a protein which was not detected in one of the samples receives a zero count). \cr
+\code{int2mat} transfers the interaction table into a matrix form. \code{mat2int} transfers a matrix of spectral counts into the interaction table format defined by SAINT.
+}
+\value{
+Either a matrix of spectral counts or a data.frame representing the interaction table is returned.
+}
+
+\author{
+Martina Fischer
+}
+
+\examples{
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+interactiontable <- read.table(intfile)
+count.mat <- int2mat(interactiontable)
+class(count.mat)
+dim(count.mat)
+}
\ No newline at end of file
diff --git a/man/norm.inttable.Rd b/man/norm.inttable.Rd
new file mode 100644
index 0000000..8135748
--- /dev/null
+++ b/man/norm.inttable.Rd
@@ -0,0 +1,68 @@
+\name{norm.inttable}
+\alias{norm.inttable}
+
+\title{
+Normalization of spectral count data
+}
+\description{
+Normalization of spectral counts in bait and control samples based on an AP-MS experiment.
+}
+\usage{
+norm.inttable(inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile"))
+}
+
+\arguments{
+ \item{inttab.mat}{
+ matrix of spectral counts, proteins in rows and samples in columns.
+}
+ \item{baittab}{
+ a data.frame. The baittable as required for SAINT, classifying control and bait samples.
+}
+ \item{norm}{
+ method to normalize the data.
+}
+}
+\details{
+The baittable corresponds to a format as required for SAINT, consisting of three columns: IP name, bait or control name, indicator for bait and control experiment (T=bait purification, C=control).\cr
+Note that the IP names in the baittable must be in agreement with the sample names.
+
+Five different normalization methods, adapted from microarray and RNA-seq analysis to AP-MS data, are available: \cr
+In the \sQuote{sumtotal} normalization counts are divided by the total number of counts in the sample.
+The \sQuote{upperquartile} normalization corrects counts by dividing each count by the 75\% quantile of its sample counts.
+The \sQuote{quantile} method equalizes the distributions of protein counts across all samples.
+In the \sQuote{DESeq} approach by Anders and Huber (2010), counts are divided by the the median of the ratio of its count over its geometric mean across all samples.
+In the \sQuote{TMM} approach by Robinson and Oshlack (2010), a scaling factor is computed as the weighted mean of log ratios between chosen test and reference samples.
+
+}
+
+\value{
+A list containing the following components:
+\item{1}{normalized spectral count matrix}
+\item{2}{scaling factors (if available)}
+}
+
+\references{
+Anders S, Huber W. Differential expression analysis for sequence count data. \emph{Genome Biology} 2010.
+
+Robinson MD, Oshlack A. A scaling normalization method for differential expression analysis of RNA-seq data. \emph{Genome Biology} 2010.
+
+Bolstad BM, Irizarry RA, Astrand M, et al. A comparison of normalization methods for high density oligonucleotide array data based on variance and bias. \emph{Bioinformatics} 2003.
+
+Dillies M-A, Rau A, Aubert J, et al. A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis. \emph{Briefings in Bioinformatics} 2012.
+}
+\author{
+Martina Fischer
+}
+
+
+\examples{
+#input data
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+counts <- int2mat(read.table(intfile))
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+baittab <- read.table(baitfile)
+# Normalization:
+norm.counts <- norm.inttable(counts, baittab, norm = "upperquartile")
+}
+\keyword{normalization}
+\keyword{APMS}
diff --git a/man/saint_permF.Rd b/man/saint_permF.Rd
new file mode 100644
index 0000000..5300581
--- /dev/null
+++ b/man/saint_permF.Rd
@@ -0,0 +1,117 @@
+\name{saint_permF}
+\alias{saint_permF}
+\title{
+Pre- and Postprocessing for AP-MS data analysis using SAINT
+}
+\description{
+A complete workflow for the identification of true interaction proteins based on AP-MS data, embedding the scoring method SAINT into a pre- and postprocessing framework.
+}
+\usage{
+saint_permF(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, intern.norm = FALSE, saint.options = "2000 10000 0 1 0")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+ \item{file_baittable}{
+ a character string specifying the pathname of the baittable. see \emph{Details}.
+}
+ \item{file_inttable}{
+ a character string specifying the pathname of the interaction table. see \emph{Details}.
+}
+ \item{prottable}{
+ a character string specifying the pathname of the protein table. see \emph{Details}.
+
+}
+ \item{norm}{
+ method to normalize the data. If \code{norm="none"}, no normalization of the data is performed.
+}
+ \item{Filter}{
+ logical value, whether filtering of the data is applied (Default \code{TRUE}).
+}
+ \item{filter.method}{
+ method to use for filtering, must be one of \code{"IQR"}, \code{"overallVar"} or \code{"noVar"}, only used when \code{Filter=TRUE}.
+}
+ \item{var.cutoff}{
+ percentile (between 0 and 1) or \code{NA}. Cutoff for filtering the data, defined by a quantile or shortest-interval (=\code{NA}, Default), only used when \code{Filter=TRUE}.
+}
+ \item{limit}{
+ minimal number of expected true interaction proteins in the data.
+}
+ \item{intern.norm}{
+ logical value. If \code{TRUE}, normalization is repeated on the filtered data (Default \code{FALSE}).
+}
+ \item{saint.options}{
+ parameters set for SAINT.
+}
+}
+\details{
+The input files correspond to the input formats used by SAINT: the baittable, prey- and interaction table in the form of tab-delimited files. \cr
+The \emph{baittable} consists of three columns: IP name, bait or control name, indicator for bait and control experiment (T=bait purification, C=control). \cr
+The \emph{interaction table} consists of four columns: IP name, bait or control name, protein name, spectral count (\bold{note}: a protein which was not detected in one of the samples receives a zero count). \cr
+The \emph{protein table} refers to the preyfile, it consists of three columns: protein names, protein length, protein names or associated gene names (if available).\cr
+A more detailed description on the generation of these files is given in Choi et.al. \emph{(Current Protocols in Bioinformatics 2012)}.
+
+Pre-processing comprises normalization and filtering of the data: \cr
+Here, it can be chosen from five different normalization methods, adapted from microarray and RNA-seq analysis to AP-MS data. For further details see \code{\link{norm.inttable}}. \cr
+The filter consists of a biological filter and a statistical variance filter and aims to remove obvious contaminants from further analysis. \cr
+If \code{filter.method="noVar"}, only the biological filter is conducted.
+Both are conducted, if \code{filter.method="IQR"}, here the variance is calculated by the inter-quartile-range, or if \code{filter.method="overallVar"}, here the variance is calculated across all samples. \cr
+The \code{var.cutoff} defines the fraction of proteins with the lowest overall variance, which are considered as contaminants and are removed.
+\code{var.cutoff=NA} refers to a cutoff defined by the mean of the shortest intervall containing 50\% of the data (default). Alternatively, a quantile can be set as cutoff, e.g. a cutoff of 0.5 filters 50\% of the data showing the smallest overall variance or IQR. see also \code{\link{varFilter}}\cr
+The parameter \code{limit} assures, that filtering results in a number of proteins above the number of expected true interaction proteins.
+
+The corresponding parameters in SAINT \code{[nburn][niter][lowMode][minFold][normalize]} are set as recommended by SAINT. Further details on the parameter setting can be found in Choi et.al.\emph{(Current Protocols in Bioinformatics 2012)}.
+}
+
+\value{
+The overall result is reported in the file \emph{WY_Result.csv}: \cr
+It is based on the original Saint output \sQuote{unique_interactions}, but additionally Westfall&Young adjusted p-values are assigned to each interaction candidate. These p-values control the FWER, allowing to estimate the portion of false-positive interactions.
+
+Different .txt and .xls files are generated, enabling the user to follow the different intermediate results: \cr
+\enumerate{
+\item In case of normalization: normalized count data in form of the interaction table (\emph{txt file}), named after the normalization method and the bait protein (e.g. quantile_bait_IntSaint.txt).
+\item In case of filtering: the filtered (and normalized) interaction table (\emph{Inttable_filtered.txt}).
+\item The Saint output: \sQuote{unique_interactions}, reporting the interaction candidates with SAINT scores, calculated on normalized data (file name ending \emph{_orig}), and filtered: (file name ending \emph{_orgF}).
+\item Permutation data: scores calculated for each permutation data set (permutation matrix as \emph{perm.avgp.Rata}, \emph{perm.maxp.Rdata}).
+}
+}
+
+\references{
+Choi H, Larsen B, Lin Z-Y, et al. SAINT: probabilistic scoring of affinity purification-mass spectrometry data. \emph{Nature Methods} 2011.
+
+Choi H, Liu G, Mellacheruvu D, et al. Analyzing Protein-Protein Interactions from Affinity Purification-Mass Spectrometry Data with SAINT. \emph{Current Protocols in Bioinformatics} 2012.
+
+Anders S, Huber W. Differential expression analysis for sequence count data. \emph{Genome Biology} 2010.
+
+Robinson MD, Oshlack A. A scaling normalization method for differential expression analysis of RNA-seq data. \emph{Genome Biology} 2010.
+
+Bolstad BM, Irizarry RA, Astrand M, et al. A comparison of normalization methods for high density oligonucleotide array data based on variance and bias. \emph{Bioinformatics} 2003.
+
+Westfall PH, Young SS. Resampling-based multiple testing: examples and methods for p-value adjustment. 1993.
+
+Bourgon R, Gentleman R, Huber W. Independent filtering increases detection power for high-throughput experiments. \emph{Proceedings of the National Academy of Sciences} 2010.
+}
+
+\author{
+Martina Fischer
+}
+
+\note{
+SAINT is run as part of the workflow. It is important to note that the function \code{saint_permF} requires a linux environment and was tested on SAINT version 2.3.4.
+}
+
+
+
+
+\examples{
+#input dara
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+protfile <- system.file("extdata", "prottable.txt", package="apmsWAPP")
+# Pre-processing: quantile normalization and filtering
+# Important: Define a working directory for storage of the resulting files
+# Workflow call:
+saint_permF(baitfile,intfile,protfile, norm="quantile", Filter=TRUE, filter.method="overallVar", var.cutoff=0.3, intern.norm=FALSE)
+}
+
+\keyword{Preprocessing}
+\keyword{APMS}
diff --git a/man/tspm_apms.Rd b/man/tspm_apms.Rd
new file mode 100644
index 0000000..6bf1a80
--- /dev/null
+++ b/man/tspm_apms.Rd
@@ -0,0 +1,95 @@
+\name{tspm_apms}
+\alias{tspm_apms}
+\title{
+Workflow for AP-MS data analysis using TSPM
+}
+\description{
+A complete workflow for the analysis of AP-MS data, using a two-stage-poisson model and a pre- and postprocessing framework.
+
+}
+\usage{
+tspm_apms(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, adj.method = c("BH", "WY"))
+}
+
+\arguments{
+ \item{counts}{
+ matrix of spectral counts, proteins in rows and samples in columns.
+}
+ \item{baittab}{
+ a character string specifying the pathname of the baittable. see Details.
+}
+ \item{norm}{
+ method to normalize the data. If \code{norm="none"}, no normalization of the data is performed.
+}
+ \item{Filter}{
+ logical value, whether filtering of the data is applied (Default \code{TRUE}).
+}
+ \item{filter.method}{
+ method to use for filtering, must be one of \code{"IQR"}, \code{"overallVar"} or \code{"noVar"}, only used when \code{Filter=TRUE}.
+}
+ \item{var.cutoff}{
+ percentile (between 0 and 1) or \code{NA}. Cutoff for filtering the data, defined by a quantile or shortest-interval (=\code{NA}, Default), only used when \code{Filter=TRUE}.
+}
+ \item{limit}{
+ minimal number of expected true interaction proteins in the data.
+}
+ \item{adj.method}{
+ method to adjust p-values for multiple testing.
+}
+}
+\details{
+The baittable corresponds to a tab/space delimited file as required for SAINT - consisting of three columns: IP name, bait or control name, indicator for bait and control experiment (T=bait purification, C=control).
+
+Pre-processing comprises normalization and filtering of the data: \cr
+Here, it can be chosen from five different normalization methods, adapted from microarray and RNA-seq analysis to AP-MS data. For further details see \code{\link{norm.inttable}}. \cr
+The filter consists of a biological filter and a statistical variance filter and aims to remove obvious contaminants from further analysis. \cr
+If \code{filter.method="noVar"}, only the biological filter is conducted.
+Both are conducted, if \code{filter.method="IQR"}, here the variance is calculated by the inter-quartile-range, or if \code{filter.method="overallVar"}, here the variance is calculated across all samples. \cr
+The \code{var.cutoff} defines the fraction of proteins with the lowest overall variance, which are considered as contaminants and are removed.
+\code{var.cutoff=NA} refers to a cutoff defined by the mean of the shortest intervall containing 50\% of the data (default). Alternatively, a quantile can be set as cutoff, e.g. a cutoff of 0.5 filters 50\% of the data showing the smallest overall variance or IQR. see also \code{\link{varFilter}}\cr
+The parameter \code{limit} assures, that filtering results in a number of proteins above the number of expected true interaction proteins.
+
+For postprocessing, two different adjustment procedures are provided for multiple testing: the Benjamini-Hochberg procedure (\code{"BH"}) (p-values are controlled by FDR),
+and the permutation approach coupled to the Westfall&Young (\code{"WY"}) algorithm (p-values are controlled by FWER).
+}
+
+\value{
+A list containing the following components:
+\item{id }{name of the interaction protein}
+\item{log.fold.change }{a vector containing the estimated log fold changes for each protein}
+\item{pvalues}{a vector containing the raw p-values for each protein, evaluating the interaction }
+\item{padj }{a vector containing the p-values after adjusting for multiple testing using the method of Benjamini-Hochberg }
+\item{LRT}{a vector of Likelihood Ratio statistics, scoring the interaction potential of each protein }
+\item{dispersion}{a vector of yes/no indicating overdispersion for each protein}
+\item{adjusted.p}{a vector containing the adjusted p-values using the permutation-based approach of Westfall&Young}
+\item{counter}{a vector containing the number of exceeding permutation scores using the permutation-based approach of Westfall&Young}
+\item{matrix1}{(filtered) (normalized) matrix of spectral counts}
+\item{matrix2}{permutation matrix of scores, permutation runs in columns and proteins in rows}
+}
+
+\references{
+Fischer M, Zilkenat S, Gerlach R, Wagner S, Renard BY. Pre- and Postprocessing for Affinity Purification Mass Spectrometry Data: More Reliable Detection of Interaction Candidates. \emph{in review}
+
+Auer PL, Doerge RW. A two-stage Poisson model for testing RNA-Seq data. \emph{Statistical Applications in Genetics and Molecular Biology} 2011.
+}
+
+\author{
+Martina Fischer
+}
+
+\examples{
+# input data
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+counts <- int2mat(read.table(intfile))
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+# TSPM with quantile normalization and filtering
+tspm.quaF <- tspm_apms( counts, baitfile, norm="quantile", Filter=TRUE, filter.method="overallVar", var.cutoff=0.1, adj.method="WY")
+# Results:
+# for adjustment with BH:
+cat("Number of Proteins with p-value <0.05: ",length(which(tspm.quaF[[1]]$padj < 0.05) ) )
+# for adjustment with WY:
+cat("Number of Proteins with p-value <0.05: ",length(which(tspm.quaF[[2]][,2] <0.05)))
+}
+
+\keyword{TSPM}
+\keyword{APMS}
diff --git a/man/varFilter.Rd b/man/varFilter.Rd
new file mode 100644
index 0000000..b4d7501
--- /dev/null
+++ b/man/varFilter.Rd
@@ -0,0 +1,64 @@
+\name{varFilter}
+\alias{varFilter}
+\title{
+Filtering of AP-MS data
+}
+\description{
+The filter consists of a biological filter and a statistical variance filter and aims to remove obvious contaminants in AP-MS data.}
+\usage{
+varFilter(mat, baittab, func = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0)
+}
+\arguments{
+ \item{mat}{
+ matrix of spectral counts, proteins in rows and samples in columns.
+}
+ \item{baittab}{
+ a data.frame. The baittable as required for SAINT, classifying control and bait samples.
+}
+ \item{func}{
+ method to use for filtering, must be one of \code{"IQR"}, \code{"overallVar"} or \code{"noVar"}.
+}
+ \item{var.cutoff}{
+ percentile (between 0 and 1) or \code{NA}. Cutoff for filtering the data, defined by a quantile or shortest-interval (=\code{NA}, Default).
+}
+ \item{limit}{
+ minimal number of expected true interaction proteins in the data.
+}
+}
+\details{
+If \code{filter.method="noVar"}, only the biological filter is conducted.
+The biological and statistical filter are applied, if \code{filter.method="IQR"}, here the variance is calculated by the inter-quartile-range, or if \code{filter.method="overallVar"}, here the variance is calculated across all samples.
+
+The \code{var.cutoff} defines the fraction of proteins with the lowest overall variance, which are considered as contaminants and are removed.
+\code{var.cutoff=NA} refers to a cutoff defined by the mean of the shortest intervall containing 50\% of the data (default). Alternatively, a quantile can be set as cutoff, e.g. a cutoff of 0.5 filters 50\% of the data showing the smallest overall variance or IQR.
+
+The parameter \code{limit} assures, that filtering results in a number of proteins above the number of expected true interaction proteins.\cr
+It is recommended to set the parameters \code{var.cutoff} and \code{limit} according to biological knowledge, if available.
+
+}
+\value{
+filtered matrix of spectral counts
+}
+\references{
+Bourgon R, Gentleman R, Huber W. Independent filtering increases detection power for high-throughput experiments. \emph{Proceedings of the National Academy of Sciences} 2010.
+}
+\author{
+Martina Fischer
+}
+
+\seealso{
+\code{\link{shorth}}
+}
+\examples{
+#input data
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+counts <- int2mat(read.table(intfile))
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+baittab <- read.table(baitfile)
+dim(counts)
+# Filtering:
+counts.filtered <- varFilter(counts, baittab, func = "overallVar", var.cutoff = 0.3, limit = 0)
+dim(counts.filtered)
+}
+\keyword{filtering}
+\keyword{APMS}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-other-apmswapp.git
More information about the debian-med-commit
mailing list