[med-svn] [r-other-apmswapp] 10/12: New upstream version 1.0

Andreas Tille tille at debian.org
Fri Nov 10 12:26:39 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-other-apmswapp.

commit bd013a90c0c59a8b5a6a760470efbf6bf94abe06
Author: Andreas Tille <tille at debian.org>
Date:   Fri Nov 10 13:23:07 2017 +0100

    New upstream version 1.0
---
 DESCRIPTION                      |  18 ++
 NAMESPACE                        |   1 +
 R/Inttab_Norm.R                  | 102 +++++++++++
 R/NonspecFilter.r                |  75 ++++++++
 R/Saintout_process.r             |  48 +++++
 R/TSPM_apms.r                    | 111 ++++++++++++
 R/mat_Int_change.r               |  39 ++++
 R/myTSPM.r                       | 121 +++++++++++++
 R/myWFalg_Dudoit.r               |  48 +++++
 R/saint_permF.r                  | 235 ++++++++++++++++++++++++
 debian/changelog                 |   7 -
 debian/compat                    |   1 -
 debian/control                   |  36 ----
 debian/copyright                 |  11 --
 debian/patches/series            |   1 -
 debian/patches/uses_deseq2.patch | 111 ------------
 debian/rules                     |   5 -
 debian/source/format             |   1 -
 debian/upstream/metadata         |  11 --
 debian/watch                     |   3 -
 inst/extdata/baittab.txt         |   7 +
 inst/extdata/inttable.txt        | 378 +++++++++++++++++++++++++++++++++++++++
 inst/extdata/prottable.txt       |  63 +++++++
 man/apmsWAPP-internal.Rd         |  22 +++
 man/apmsWAPP-package.Rd          |  41 +++++
 man/int_mat.Rd                   |  41 +++++
 man/norm.inttable.Rd             |  68 +++++++
 man/saint_permF.Rd               | 117 ++++++++++++
 man/tspm_apms.Rd                 |  95 ++++++++++
 man/varFilter.Rd                 |  64 +++++++
 30 files changed, 1694 insertions(+), 187 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..e47c8d4
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: apmsWAPP
+Type: Package
+Title: Pre- and Postprocessing for AP-MS data analysis based on
+        spectral counts
+Version: 1.0
+Date: 2013-03-14
+Author: Martina Fischer
+Maintainer: Martina Fischer <fischerm at rki.de>
+Description: apmsWAPP provides a complete workflow for the analysis of AP-MS data (replicate single-bait purifications including negative controls) based on spectral counts. 
+		It comprises pre-processing, scoring and postprocessing of protein interactions.
+		A final list of interaction candidates is reported: it provides a ranking of the candidates according 
+		to their p-values which allow estimating the number of false-positive interactions.
+Depends: genefilter, seqinr, multtest, gtools, limma, edgeR, DESeq,
+        aroma.light
+License: Review License
+LazyLoad: yes
+SystemRequirements: SAINT_v2.3.4
+Packaged: 2013-04-09 15:13:20 UTC; martina
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..40ea8e2
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/Inttab_Norm.R b/R/Inttab_Norm.R
new file mode 100644
index 0000000..16917d9
--- /dev/null
+++ b/R/Inttab_Norm.R
@@ -0,0 +1,102 @@
+### Function for Normalization of Counts in the interactiontable:
+# Input: - original interaction matrix   (interaction table including zeros, raw discrete counts!)
+#        - baittable = classification of samples and controls
+#        - different normalization methods can be chosen in "norm":
+#         "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"
+# Output: normed interaction-matrix (normed count table), scaling factors
+
+norm.inttable <- function( inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile")) {
+
+require(limma)
+require(edgeR)
+require(DESeq)
+require(aroma.light)
+norm <- match.arg(norm)
+
+baittab$V3 <- as.character(baittab$V3)
+baittab$V1 <- as.character(baittab$V1)
+
+Cpos <- match(baittab$V1[grep("C",baittab$V3)], colnames(inttab.mat))     # columns in inttab.mat for "C"controls
+Tpos <- match(baittab$V1[grep("T",baittab$V3)], colnames(inttab.mat))     # columns in inttab.mat for "T"samples
+
+
+                                  ##########    Normalization     ############
+                                  
+# "sumtotal" :
+if(norm=="sumtotal") {
+sumcount <- apply(inttab.mat, 2, function(x){sum(x)})    # sum total in samples
+scal.fac <- sumcount
+scal.fac[Cpos] <- scal.fac[Cpos] / median(sumcount[Cpos])
+scal.fac[Tpos] <- scal.fac[Tpos] / median(sumcount[Tpos])
+inttab.norm <- apply(inttab.mat, 2, function(x){x/sum(x)} )
+
+inttab.norm[ ,Cpos] <- inttab.norm[ ,Cpos] * median(sumcount[Cpos])   # within Ctrl replicates
+inttab.norm[ ,Tpos] <- inttab.norm[ ,Tpos] * median(sumcount[Tpos])   # within Bait replicates
+}
+
+
+# "upper-quartile" (proposed in Bullard et al. 2010):
+if(norm=="upperquartile") {
+upper.quartiles <- apply(inttab.mat, 2, function(x){quantile(x, probs=0.75)})
+zerocount <- which(upper.quartiles==0)
+scal.fac <- apply(inttab.mat, 2, function(x){qu<-quantile(x,probs=0.75); if(qu!=0)qu else 1})
+scal.fac[setdiff(Cpos,zerocount)] <- scal.fac[setdiff(Cpos,zerocount)] / median(scal.fac[Cpos])
+scal.fac[setdiff(Tpos,zerocount)] <- scal.fac[setdiff(Tpos,zerocount)] / median(scal.fac[Tpos])         # scaling factors
+
+inttab.norm <- apply(inttab.mat, 2, function(x){qu<-quantile(x,probs=0.75); if(qu!=0)x/qu else x})             # normed by 75% quantile of the samples
+scaling <-  ifelse(upper.quartiles==0,1,upper.quartiles)
+inttab.norm[,setdiff(Cpos,zerocount)] <- inttab.norm[,setdiff(Cpos,zerocount)] * median(scaling[Cpos]) # transfer back to the count scales
+inttab.norm[,setdiff(Tpos,zerocount)] <- inttab.norm[,setdiff(Tpos,zerocount)] * median(scaling[Tpos]) # normalized count matrix
+}                                                                              
+
+
+# "DESeq" - normalization method in the DESeq package (Anders et al. 2010):
+if(norm=="DESeq") {
+cds <- newCountDataSet( inttab.mat, baittab$V3)       # build countDataSet
+
+cds1 <- estimateSizeFactors( cds[ ,Cpos] )           
+cds2 <- estimateSizeFactors( cds[ ,Tpos] )
+sizeFactors( cds ) [Cpos] <- sizeFactors( cds1 )
+sizeFactors( cds ) [Tpos] <- sizeFactors( cds2 )
+scal.fac <- sizeFactors( cds )                                               # scaling factors
+
+inttab.norm <- scale(counts(cds), center=FALSE, scale=sizeFactors(cds))
+colnames(inttab.norm) <-colnames(counts(cds))                                # normalized count matrix
+}
+
+
+# "TMM" - normalization method in the edgeR package (Robinson et al. 2010):
+if(norm=="TMM") {
+d1 <- DGEList(inttab.mat[, Cpos])
+d1 <- calcNormFactors(d1, method="TMM")
+eff.libsize1 <- d1$samples$lib.size * d1$samples$norm.factors
+
+d2 <- DGEList(inttab.mat[, Tpos])
+d2 <- calcNormFactors(d2, method="TMM")
+eff.libsize2 <- d2$samples$lib.size * d2$samples$norm.factors
+
+eff.libsize <- rep(NA, times=dim(inttab.mat)[2])
+eff.libsize[Cpos] <- eff.libsize1
+eff.libsize[Tpos] <- eff.libsize2
+scal.fac <- eff.libsize                                               # scaling factors
+scal.fac[Cpos] <- scal.fac[Cpos] / median(eff.libsize1)
+scal.fac[Tpos] <- scal.fac[Tpos] / median(eff.libsize2)
+
+inttab.norm <- scale(inttab.mat, center=FALSE, scale=eff.libsize)
+inttab.norm[ ,Cpos] <- inttab.norm[ ,Cpos] * median(eff.libsize1)   
+inttab.norm[ ,Tpos] <- inttab.norm[ ,Tpos] * median(eff.libsize2)     # normalized count matrix
+}
+
+
+# "quantile" normalization:
+if(norm=="quantile") {
+inttab.norm <- inttab.mat
+inttab.norm[ ,Cpos] <- normalizeQuantileRank (inttab.mat[ ,Cpos], robust=TRUE)  # quantile normalization within replicates
+inttab.norm[ ,Tpos] <- normalizeQuantileRank (inttab.mat[ ,Tpos], robust=TRUE)
+scal.fac <- NA                                                                  # no scaling factors available for this method
+}
+
+
+return(list(inttab.norm, scal.fac))         # output
+
+}               # function end
diff --git a/R/NonspecFilter.r b/R/NonspecFilter.r
new file mode 100644
index 0000000..2995bff
--- /dev/null
+++ b/R/NonspecFilter.r
@@ -0,0 +1,75 @@
+### Nonspecific Filtering via Variance
+# Input Matrix: cols=Samples, rows=Protein IDs
+# Filtering via:  - IQR
+#                 - overall Variance
+#                 - noVar: no variance filter, only 1.filter is applied
+# Cutoff Selection via: - shorth (shortest intervall containing 50% of the data) , default: NA
+#                       - define quantile cutoff, e.g. 0.5 -> 50% data filtered of smallest Var or IQR
+# limit = number of expected candidates in the data
+
+#### Functions
+
+my.rowIQR <- function(mat) {          #adapted for matrix from genefilter:::rowIQRs
+require(genefilter) 
+rowQ(mat, ceiling(0.75 * ncol(mat))) - rowQ(mat, floor(0.25 * ncol(mat)))
+}
+
+varFilter <- function (mat, baittab, func= c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit=0)
+{
+  require(genefilter)
+  func <- match.arg(func)
+  
+  Cpos <- match(baittab$V1[grep("C",baittab$V3)], colnames(mat) )     # columns in mat for "C"controls
+  Tpos <- match(baittab$V1[grep("T",baittab$V3)], colnames(mat) )     # columns in mat for "T"samples
+  ctrl.med <- apply(mat[,Cpos], 1, function(x){median(x)} )   # 1.filter: ctrl counts > bait counts
+  bait.med <- apply(mat[,Tpos], 1, function(x){median(x)} )
+  mat <- mat[-which(ctrl.med > bait.med), ]
+cat("Biological filter reduced the number of proteins to: ", dim(mat)[1], "\n")
+if (dim(mat)[1] < limit) {func <- "noVar"; cat("Argument limit is chosen too small, in order to hold it the variance filter cannot be applied\n") } 
+
+  if (func == "IQR") {                                       #2. filter: small Var over all samples
+        vars <- my.rowIQR(mat)     }
+  else if (func == "overallVar")  {
+       vars <- rowVars(mat)        }
+  else if (func == "noVar")  {
+       return(mat)        }
+  else stop("Define filter function!\n")
+
+
+    if (is.na(var.cutoff))  {                       # calculate shorth for cutoff
+     var.cut <- shorth(vars, tie.action="min")
+     if(var.cut <= median(vars))  selected <- which(vars > var.cut)   
+     else stop("shorth calculation is not appropriate in this case, define a quantile for cutoff calculation!\n")  
+     }
+     
+    else if (0 < var.cutoff && var.cutoff < 1)  {
+     quant <- quantile(vars, probs = var.cutoff)   # cutoff corresponding to defined quantile
+     selected <- which(vars > quant)        }
+    else stop("Cutoff Quantile has to be between 0 and 1, alternative 'NA' for shortest interval calculation \n")
+
+    mat.f <- mat[selected,]
+    
+    if (dim(mat.f)[1] > limit )  {
+    cat("Variance filter reduced the number of proteins to: ", dim(mat.f)[1], "\n")
+    return(mat.f)                 }
+    else {
+    cat("Chosen parameter setup filtered data below the threshold of the expected number of candidates \n")
+    if (is.na(var.cutoff)) var.cutoff=0.4 
+                                                # filter parameter setup failed to hold the limit, cutoff adjustment 
+    while( dim(mat.f)[1] < limit)        {
+    if (var.cutoff<=0.05) {cat("Decrease limit, variance filter cannot be applied \n"); return(mat) }
+    else var.cutoff <- var.cutoff-0.05
+    quant <- quantile(vars, probs = var.cutoff)   
+    selected <- which(vars > quant) 
+    mat.f <- mat[selected,]              
+    }
+    
+    cat("Variance filter reduced the number of proteins to: ", dim(mat.f)[1], "\n")
+    return(mat.f)
+    }
+
+}
+
+
+
+
diff --git a/R/Saintout_process.r b/R/Saintout_process.r
new file mode 100644
index 0000000..917546d
--- /dev/null
+++ b/R/Saintout_process.r
@@ -0,0 +1,48 @@
+#######  Saint output correction for Tab-misplacements:  function "new.saintoutput"
+# Input: file-name of original saint interaction-output = unique-interaction table  -> define path before!
+# Output: corrected saint-output of interactions
+
+
+            ### Correction SAINT output  ###
+
+saintout <- function(test) {
+require(seqinr)
+test <- test[2:length(test)]                 
+test <- strsplit(test,split="\t")
+length(test)                                
+#table(unlist(lapply(test, function(x){length(x)} ) ))     #check number of columns: expected 12
+
+lost12 <- which(is.na(unlist(lapply(test, function(x){x[12]} ) ) ))  #column 12 missing
+
+for ( i in lost12) {
+a <- strsplit(test[[i]][10], split="|", fixed=TRUE)
+ll <- length(a[[1]])
+if (ll==1)                          # no ctrl counts, pure shifting of columns 11+12 to the previous ones
+  {test[[i]][12]<-test[[i]][11] ;
+   test[[i]][11]<-test[[i]][10] ;
+   test[[i]][10] <- NA
+  }
+else                                # column 11 displaced in column 10 
+  {
+  if(is.integer(a[[1]][ll])==FALSE)
+  {test[[i]][12]<-test[[i]][11] ;
+  test[[i]][11]<- a[[1]][ll] ;
+  g <- c2s(paste(a[[1]][-ll],sep="|",collaps=""))
+  test[[i]][10] <- substr(g,1,nchar(g)-1) }
+  else print("different case in:",i )
+  }
+}
+return(test)
+}
+#######
+                                                                                 
+            ########## SAINT OUTPUT in Data Frame ##############
+
+new.saintoutput <- function(filename){
+require(seqinr)
+saint <- readLines(filename)                      # read original saint output
+saint.out <- saintout(saint)                      # function to correct tab-mistakes from the original Saint output
+saint.out2 <- as.data.frame(do.call("rbind",saint.out))
+colnames(saint.out2) <- unlist(strsplit(readLines(filename)[1], split="\t", fixed=TRUE))
+return(saint.out2)
+}
diff --git a/R/TSPM_apms.r b/R/TSPM_apms.r
new file mode 100644
index 0000000..ff2bd4e
--- /dev/null
+++ b/R/TSPM_apms.r
@@ -0,0 +1,111 @@
+######  Analysis of AP-MS Data: Detection of protein interaction partners and separation from contaminants
+#####   on the base of spectral count Data
+###### Method: Application of the TSPM model + pre- and postprocessing steps
+
+#Steps:   - Normalization
+#         - Filtering
+#         - TSPM model
+#         - p-value Adjustment for multiple testing by:
+#             * Benjamini-Hochberg procedure: FDR controlled p-values
+#             * permutation approach + Westfall&Young algorithm: FWER controlled p-values for each protein interaction-candidate
+
+# Assumptions:
+#     - Input of only one bait protein; controls required; - replicates for each group required
+#     - minimum number of replicates for bait and control experiment preferred to be 3 
+
+# Output:
+#     - TSPM output (id, LFC, rawp, BH.padj, LRT dispersion)
+#     - case WY-adjustment: WY adj. pvalues + counter 
+#     - (filtered) (normalized) count matrix
+
+
+tspm_apms  <- function(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, adj.method=c("BH","WY")) {
+require(multtest)
+require(gtools)
+
+norm <- match.arg(norm)
+adj.method <- match.arg(adj.method)
+
+baittab <- read.table(baittab) 
+baittab$V3 <- as.character(baittab$V3)
+baittab$V2 <- as.character(baittab$V2)
+baittab$V1 <- as.character(baittab$V1)
+if(all(baittab$V3%in%c("C","T"))==FALSE) stop("Sample and Controls need to be defined by T respectively C")
+if(setequal(baittab$V1, colnames(counts))==FALSE) stop("Names of samples need to be consistent in Baittable and Count-matrix")
+
+baittab <- baittab[order(baittab$V3),]                    # baittab sorted: controls first, followed by bait samples
+counts <- counts[ ,match(baittab$V1, colnames(counts))]   # counts in the same order as baittable
+
+counts.org <- counts
+
+#setwd("O:/NG4/staff/Martina/MS_Salmonellen")
+#source("Inttab_Norm.r")           # normalization
+#source("myTSPM.r")                # TSPM model
+#source("NonspecFilter.r")         # filtering
+#source("myWFalg_Dudoit.r")        # Westfall&Young algorithm
+
+                            
+                              ####  NORMALIZATION  ####
+if (norm!="none")  {
+  if(norm=="quantile")  {
+  norm.out <- norm.inttable (counts, baittab, norm)
+  counts <- round(norm.out[[1]])
+  lib.size <- rep(1, times=dim(counts)[2])      }
+  else  {
+  norm.out <- norm.inttable (counts, baittab, norm)
+  lib.size <- norm.out[[2]]
+  counts <- norm.out[[1]]          }
+}
+else { lib.size <- rep(1, times=dim(counts)[2]) }
+
+
+                             ####    FILTERING   ####
+if(Filter==TRUE) {
+filter.method <- match.arg(filter.method)
+counts.nf <- varFilter (counts, baittab, func=filter.method, var.cutoff, limit)  
+pos.f <- match(rownames(counts.nf), rownames(counts))
+if (norm!="quantile") { counts.f <- counts.org[pos.f,]   }    # filtered unnormalized counts
+else   {counts.f <- counts[pos.f,] }                          # case quantile-norm
+}
+
+                            #####  TSPM model  #####
+# TSPM parameter:
+x1 <- factor(baittab$V3[match(colnames(counts),baittab$V1)], levels=c("C","T")) # important: order C -> T in levels
+x0 <- rep(1, times=dim(counts)[2])                            
+                                                             
+# TSPM Modell for diff.expr.
+if (Filter==TRUE) new.counts <- counts.f
+else {if(norm=="quantile") new.counts <- counts else new.counts <- counts.org }
+
+tspm <- TSPM (new.counts, x1, x0, lib.size, alpha.wh=0.05)
+
+if(adj.method=="BH") {
+if (Filter==TRUE) return(list(tspm, counts.nf)) else return(list(tspm, counts))    # output1: filtered (normalized) counts, output2: (normalized) counts
+}
+
+
+if (adj.method=="WY")   {
+org.LRT <- tspm$LRT
+classlabel <- ifelse(x1=="C",0,1)
+perms <- mt.sample.label(classlabel,test="t",fixed.seed.sampling="n",B=0)
+perms2 <- apply(perms, 2, function(x){ifelse(x==0,"C","T")} )
+# permutation table:
+proteins <- rownames(new.counts)
+permmat <- as.data.frame(matrix(data=NA, nrow=length(proteins), ncol=(dim(perms)-1) ) )
+rownames(permmat) <- proteins
+
+for ( j in 2:nrow(perms2)) {
+x1 <- factor(perms2[j,], levels=c("C","T"))
+tspm.perm <- TSPM (new.counts, x1, x0, lib.size, alpha.wh=0.05)    # permuted TSPM model
+permmat[,(j-1)] <- tspm.perm$LRT
+}
+
+set.seed(12345)                                   # Westfall&Young Algorithm
+pp <- permute(c(1:length(proteins)))
+WY.padj <- WY.permalg(org.LRT[pp], permmat[pp,])
+
+if (Filter==TRUE) return(list(tspm, WY.padj, counts.nf, permmat)) else return(list(tspm, WY.padj, counts, permmat))     # output1: filtered (normalized) counts, output2: (normalized) counts
+}        # wy end
+
+
+}        # end
diff --git a/R/mat_Int_change.r b/R/mat_Int_change.r
new file mode 100644
index 0000000..9bf9c05
--- /dev/null
+++ b/R/mat_Int_change.r
@@ -0,0 +1,39 @@
+##### Translation Count-Matrix <-> Interactiontable (format required for SAINT)
+
+####     Function Interactiontable -> Matrix:
+# Input: Interactiontable as required in SAINT (including zero counts)
+
+int2mat <- function(IntSaint) {
+prots <- as.character(unique(IntSaint$V3))
+                                              #transfer inttab -> inttab.mat (matrix)
+rowspec <- function(i){                       # = matrix row i
+protframe <-IntSaint[grep(paste("^",i,"$",sep=""), IntSaint$V3), ]
+return(protframe$V4[match(as.character(unique(IntSaint$V1)),protframe$V1)])}
+# spectral counts in matrix-form:
+IntSaint.mat <- t(vapply(prots, rowspec , rep(0, times=length(as.character(unique(IntSaint$V1))))))
+colnames(IntSaint.mat) <- as.character(unique(IntSaint$V1))
+return(IntSaint.mat)
+}
+
+# example
+# baitint.mat <- int2mat("bait_LBcl_IntSaint.txt")
+
+
+#####     Function Matrix -> Interactiontable
+# Input: matrix count table, baittable as classifier for ctrls and bait-samples
+
+mat2int <- function(mat, baittab){
+inttab <- data.frame(V1=NA, V2=NA, V3=NA, V4=0.0)
+for ( i in 1:dim(mat)[2]) {
+V1 <- rep(colnames(mat)[i], times= dim(mat)[1] )
+V2 <- rep(as.character(baittab$V2[match(colnames(mat)[i], baittab$V1)]), times=dim(mat)[1] )
+V3 <- rownames(mat)
+V4 <- mat[,i]
+inttab <- rbind(inttab, cbind(V1,V2,V3,V4))
+}
+inttab <- inttab[-1,]
+inttab$V4 <- as.numeric(inttab$V4)
+rownames(inttab) <- NULL
+return(inttab)
+}
+
diff --git a/R/myTSPM.r b/R/myTSPM.r
new file mode 100644
index 0000000..997bb39
--- /dev/null
+++ b/R/myTSPM.r
@@ -0,0 +1,121 @@
+##### Origin:
+##-------------------------------------------------------------------
+## 	Name: TSPM.R
+##  	R code for the paper by Paul L. Auer and R.W. Doerge:
+## 	"A Two-Stage Poisson Model for Testing RNA-Seq Data"
+## 	Date: February 2011
+## 	Contact: Paul Auer 		plivermo at fhcrc.org
+##		 R.W. Doerge 		doerge at purdue.edu
+##------------------------------------------------------------------------
+
+####  !!!
+####  TSPM function adapted here for the application to affinity-purification mass-spectrometry data (AP-MS) 
+####  based on spectral counts
+
+
+
+###### The TSPM function ##############################################
+#######################################################################
+
+
+TSPM <- function(counts, x1, x0, lib.size, alpha.wh){
+
+## Input:
+#counts:   a matrix of spectral counts (rows=proteins, columns=samples)
+#x1: 		   a vector of bait/control assignement (under the alternative hypothesis)
+#x0: 		   a vector of bait/control assignement (under the null hypothesis)
+#lib.size: a vector of scaling factors for normalization
+#alpha.wh: the significance threshold to use for deciding whether a protein is overdispersed.
+#          Defaults to 0.05.
+
+
+## Output:
+#id:                name of the protein
+#log.fold.change:		a vector containing the estimated log fold changes for each protein
+#pvalues: 			a vector containing the raw p-values testing differential expression for each protein
+#LRT:           a vector of Likelihood-Ratio statistics for each protein  
+#dispersion:    a vector of yes/no indicating overdispersion for each protein
+#padj:			a vector containing the p-values after adjusting for multiple testing using the method of Benjamini-Hochberg
+
+
+######## The main loop that fits the GLMs to each protein ########################
+
+### Initializing model parameters ####
+n <- dim(counts)[1]
+per.gene.disp <- NULL
+LRT <- NULL
+score.test <- NULL
+LFC <- NULL
+
+###### Fitting the GLMs for each protein #################
+	for(i in 1:n){
+		### Fit full and reduced models ###
+		model.1 <- glm(as.numeric(counts[i,]) ~ x1, offset=log(lib.size), family=poisson)
+		model.0 <- glm(as.numeric(counts[i,]) ~ x0, offset=log(lib.size), family=poisson)
+
+    if (model.1$coef[2]<0) {model.1 <- model.0}     # constraint: parameter beta1>0 ! one-sided-test situation
+    
+		### Obtain diagonals of Hat matrix from the full model fit ###
+		hats <- hatvalues(model.1)
+
+		### Obtain Pearson overdispersion estimate ####
+		per.gene.disp[i] <- sum(residuals(model.1, type="pearson")^2)/model.1$df.residual
+
+		### Obtain Likelihood ratio statistic ####
+		LRT[i] <- deviance(model.0)-deviance(model.1)
+
+		### Obtain score test statistic ####
+		score.test[i] <- 1/(2*length(counts[i,])) * sum(residuals(model.1, type="pearson")^2 - ((counts[i,] - hats*model.1$fitted.values)/model.1$fitted.values))^2
+
+		### Obtain the estimated log fold change ###
+		LFC[i] <- model.1$coef[2]
+	}
+
+## Initialize parameters for Working-Hotelling bands around the score TSs ###
+qchi <- qchisq(df=1, (1:n-0.5)/n)
+MSE <- 2
+UL <- NULL
+
+#### Obtain the upper boundary of the WH bands #######################################
+xbar <- mean(qchi)
+bottom <- sum((qchi-xbar)^2)
+top <- (qchi-xbar)^2
+s <- sqrt(MSE*(1/n) + (top/bottom))
+W <- sqrt(2*qf(df1=1, df2=n-1, p=1-(alpha.wh/n)))
+UL <- pmax(qchi + W*s,1)
+
+###### Obtain the indices of the over-dispersed and not-over-dispersed proteins, respectively ##########
+
+if(length(which(sort(score.test)-UL > 0))>0){     # overdispersed proteins existent
+cutoff <- min(which(sort(score.test)-UL > 0))
+temp <- cutoff-1 + seq(cutoff:length(score.test))
+over.disp <- which(score.test %in% sort(score.test)[temp])
+not.over.disp <- setdiff(1:length(score.test), over.disp)
+}
+else  {                                           # no overdispersed proteins
+not.over.disp <- c(1:length(score.test))
+over.disp <- NULL
+}
+
+
+###### Compute p-values ####################################
+p.f <- pf(LRT[over.disp]/per.gene.disp[over.disp], df1=1, df2=model.1$df.residual, lower.tail=FALSE)
+p.chi <- pchisq(LRT[not.over.disp], df=1, lower.tail=FALSE)
+p <- NULL
+p[over.disp] <- p.f
+p[not.over.disp] <- p.chi
+
+##### Adjust the p-values using the B-H method ####################
+p.bh.f <- p.adjust(p.f, method="BH")
+p.bh.chi <- p.adjust(p.chi, method="BH")
+final.p.bh.tagwise <- NULL
+final.p.bh.tagwise[over.disp] <- p.bh.f
+final.p.bh.tagwise[not.over.disp] <- p.bh.chi
+
+index.disp <- NULL
+index.disp[over.disp] <- "yes"
+index.disp[not.over.disp] <- "no"
+### Output ###
+data.frame(id=rownames(counts), log.fold.change=LFC, pvalues=p,
+padj=final.p.bh.tagwise, LRT=LRT, dispersion=index.disp)
+}
diff --git a/R/myWFalg_Dudoit.r b/R/myWFalg_Dudoit.r
new file mode 100644
index 0000000..fc98632
--- /dev/null
+++ b/R/myWFalg_Dudoit.r
@@ -0,0 +1,48 @@
+# Westfall & Young Algorithm implemented after Dudoit in "Dudoit-class (package ClassComparison)"
+
+
+##### Westfall&Young Algorithm:
+# Input: - original Saint.out scores (org.statistic)
+#        - score- Permutationmatrix (perm.avgp / perm.maxp)
+# Output: - adjusted p-values, counter=number of exceeding scores, either of the permuted score exceeding its original one a permuted score from on of the the lower ranked original candidates
+
+WY.permalg <- function(org.statistic, perm.p)  {
+  tor <- rev(order(abs(org.statistic)))       # sort observed org.values 
+                                            
+  grounded <- abs(org.statistic[tor])         # org.statistic in decreasing order
+  num.gene <- length(org.statistic)
+  counter <- rep(0, num.gene)                # counter for each gene via permutations
+  names(counter) <- rownames(perm.p[tor,])
+  nPerm <- dim(perm.p)[2]
+  for (i in 1:nPerm) {
+  #cat("\n", i, ". Run \n", sep="")
+    dudoit.t <- abs(perm.p[tor,i])            
+    dudoit.u <- rep(dudoit.t[num.gene], num.gene)    
+    trigger <- sum(dudoit.t > dudoit.u)
+    while(trigger > 0) {                      
+      dudoit.t <- (dudoit.u + dudoit.t + abs(dudoit.u - dudoit.t))/2  
+      trigger <- sum(dudoit.t > dudoit.u)
+      if (trigger > 0) {
+        target <- (1:num.gene)[dudoit.t >dudoit.u][trigger] 
+        dudoit.u[1:target] <- dudoit.t[target]
+      }
+    }
+    #print (head(cbind(grounded, abs(perm.p[tor,i]) , dudoit.u) ))    
+    counter <- counter + (dudoit.u >= grounded)  
+  }
+  adjusted.p <- counter/nPerm
+  return(cbind(counter,adjusted.p))
+}
+
+
+
+
+# example:
+
+# data generated from uniform distribution
+#perm.p <- as.data.frame(matrix(data=NA, nrow=10, ncol=8 ) )
+#rownames(perm.p) <- c(1:10)
+#perm.p <-apply (perm.p, c(1,2), function(x){x<-runif(1,min=0.1,max=9)})   # permuted statistics
+#org <- sapply(1:10, function(x){x<-runif(1,min=0.1,max=9)})               # original statistic
+
+#WY.permalg(org,perm.p)
diff --git a/R/saint_permF.r b/R/saint_permF.r
new file mode 100644
index 0000000..332b7ea
--- /dev/null
+++ b/R/saint_permF.r
@@ -0,0 +1,235 @@
+####  Analysis of AP_MS data: SAINT framed by Pre- and Postprocessing steps  - whole pipeline   ####
+# Aim: Identify interaction partners of a bait protein and separation from the contaminants
+
+#Steps:   - Normalization
+#         - Filtering
+#         - original SAINT run on (normalized)(filtered) data
+#         - Permutation approach on SAINT: shuffle of controls and bait samples
+#         - Westfall&Young algorithm to receive FWER controlled p-values for each protein interaction-candidate
+
+# Assumptions:
+#     - Input of only one bait protein; controls required; - replicates for each group required
+#     - bait protein name(V2 in baittable) same for all bait replicates, same for all controls
+#     - minimum number of replicates for bait and control experiment is 3 
+
+#####  Input:
+##    - Baittable as required for SAINT: classification of bait(T) and ctrls(C) including their names
+##    - Interactiontable as required for SAINT
+#           * important: a protein which was not detected in a sample receives a zero count
+##    - Proteintable = Preyfile as required for SAINT
+      ### these 3 files should be stored in the working directory !! ##
+
+#     - choose a normalization method; default="none" (no normalization)
+#     - Filtering TRUE/FALSE, set all parameters for filtering (method, cutoff, limit)
+
+#Inputfiles example:
+# file_baittable <- "baittable.txt"               # read Baittable  
+# file_inttable <- "LBcl_IntSaint.txt"            # read Interactiontable
+
+
+#Output: - in case of normalization: normed interaction-table (txt file)
+#        - in case of filtering: filtered (normed) interaction-table (txt file)
+#        - Saint output: "unique_interaction" file on the original data (normalized: "_orig" ;filtered: "_orgF")
+#        - permutation matrix perm.avgp, perm.maxp (Rdata) = scores for each protein from all permutation runs
+#        - overall result "WY_Result.csv": Saint interaction output + WY adjusted p-values for each candidate
+
+
+
+saint_permF  <- function(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, intern.norm=FALSE, saint.options="2000 10000 0 1 0") {
+require(multtest)
+require(gtools)
+
+norm <- match.arg(norm)
+saint.options <- match.arg(saint.options)
+
+baittable <- read.table(file_baittable)            # data input
+inttable<- read.table(file_inttable)
+
+baittable$V3 <- as.character(baittable$V3)
+baittable$V2 <- as.character(baittable$V2)
+baittable$V1 <- as.character(baittable$V1)  
+inttable$V1 <- as.character(inttable$V1)
+inttable$V2 <- as.character(inttable$V2)
+
+
+if(all(baittable$V3%in%c("C","T"))==FALSE)  {
+stop("Define bait and control samples only by C and T in the Baittable \n")    }
+
+if (setequal(names(table(baittable$V2)), names(table(inttable$V2)))==FALSE | setequal(names(table(baittable$V1)), names(table(inttable$V1)))==FALSE ) {
+stop("Names of bait and control samples need to be consistent in the Bait- and Interactiontable.  \n")    }
+
+
+baittab <- baittable[order(baittable$V3),]       # baittab sorted: controls first followed by bait samples
+inttable.raw <- inttable
+
+
+
+                               #####    NORMALIZATION    #####
+if (norm!="none")  { 
+inttab.mat <- int2mat(inttable) 
+norm.out <- norm.inttable (inttab.mat, baittab, norm)        
+inttable <- mat2int(norm.out[[1]], baittab)
+baitname <- as.character(unique(baittab$V2[grep("T", baittab$V3)]))
+normSaint_filename <- paste(norm, paste(baitname, "IntSaint.txt",sep="_"),sep="_")
+write.table(inttable, file=normSaint_filename, quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+}
+
+
+                              #######     FILTERING     #######
+if(Filter==TRUE) {  
+filter.method <- match.arg(filter.method)              
+intmat <- int2mat(inttable)                                   
+intmat <- varFilter(intmat, baittab, func=filter.method, var.cutoff, limit)        # FILTER FUNCTION
+inttable <- mat2int(intmat, baittab)    # new filtered inttable with selected proteins
+                                        
+if(intern.norm==TRUE) {                 # repeated normalization on filtered interaction table
+del.prots <- setdiff(unique(inttable.raw$V3), unique(inttable$V3)) 
+del.pos <- unlist(lapply(del.prots, function(x){which(inttable.raw$V3==x)} ))
+intmat.f <- int2mat(inttable.raw[-del.pos,] )
+norm2.out <- norm.inttable (intmat.f, baittab, norm)    
+inttable <- mat2int(norm2.out[[1]], baittab)
+}
+
+write.table(inttable, file="Inttable_filtered.txt", quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+
+# 1.SAINT run on original data on a filtered protein-set:
+saint.command <- paste("saint-spc-ctrl", "Inttable_filtered.txt", prottable, file_baittable, saint.options,sep=" ") 
+system("rm -r LOG")                       
+system("rm -r MAPPING")          # deletion of former folders
+system("rm -r MCMC")
+system("rm -r RESULT")
+system(saint.command)            # linux run
+#SAINT output "unique_interactions":
+filename.org <- "./RESULT/unique_interactions"
+orig.filter <- new.saintoutput(filename.org)
+write.table(orig.filter, file="Unique_Interactions_orgF.txt", quote=FALSE, sep="\t", row.names=FALSE)
+
+proteins <- as.character(orig.filter$Prey)
+}
+##  NO Filtering:
+else {      
+    if (norm=="none")  {                 # 1.SAINT run on original data (unnormalized + no filter) 
+    saint.command2 <- paste("saint-spc-ctrl", file_inttable, prottable, file_baittable, saint.options,sep=" ") 
+    system("rm -r LOG")                       
+    system("rm -r MAPPING")         
+    system("rm -r MCMC")
+    system("rm -r RESULT")
+    system(saint.command2)   }        
+    if (norm!="none")  {                # 1.SAINT run on original data (normalized+ no filter) 
+    saint.command.norm <- paste("saint-spc-ctrl", normSaint_filename, prottable, file_baittable, saint.options,sep=" ") 
+    system("rm -r LOG")                       
+    system("rm -r MAPPING")          
+    system("rm -r MCMC")
+    system("rm -r RESULT")
+    system(saint.command.norm)    }
+  
+  #SAINT output "unique_interactions":
+  filename.org <- "./RESULT/unique_interactions"
+  orig.out <- new.saintoutput(filename.org)
+  write.table(orig.out, file="Unique_Interactions_Orig.txt", quote=FALSE, sep="\t", row.names=FALSE)
+  proteins <- as.character(orig.out$Prey)
+}
+
+
+                            #########    Permutation     ###########
+
+ctrl <- as.character(baittab$V1[grep("C", baittab$V3)])               # control replicates
+ctrl.sup <- unique(as.character(baittab$V2[grep("C", baittab$V3)]))   #control sup-name
+baits <- as.character(baittab$V1[grep("T", baittab$V3)])              # bait replicates
+bait.sup <- unique(as.character(baittab$V2[grep("T", baittab$V3)]))   #superior bait name
+
+# permutation setup:
+shuf.vec <- baittab$V3
+classlabel <- ifelse(shuf.vec=="C",0,1)
+perms <- mt.sample.label(classlabel,test="t",fixed.seed.sampling="n",B=0)
+perms2 <- apply(perms, 2, function(x){ifelse(x==0,"C","T")} )
+perms.baitname <- apply( perms, 2, function(x) {ifelse(x==0,baittab$V2[1],baittab$V2[length(baittab$V2)]) } )
+
+# permutation table to store the resulting scores from each permutation:
+perm.avgp <- as.data.frame(matrix(data=NA, nrow=length(proteins), ncol=(dim(perms)-1) ) )
+rownames(perm.avgp) <- proteins
+perm.maxp <- as.data.frame(matrix(data=NA, nrow=length(proteins), ncol=(dim(perms)-1) ) )
+rownames(perm.maxp) <- proteins
+
+#### Each permutation: create new baittable + interactiontable -> new Saint run -> store resulting scores
+#1.row = original data setup (skipped)
+
+for ( j in 2:nrow(perms2)) {                          
+cat(j, ". Run started \n", sep="")
+system("rm -r LOG")
+system("rm -r MAPPING")                   # delete folders for next permutation run 
+system("rm -r MCMC")
+system("rm -r RESULT")
+
+baittab.perm <- baittab
+baittab.perm$V3 <- perms2[j,]
+baittab.perm$V2 <- perms.baitname[j,]     # permuted baittable
+
+new.ctrls <- baittab.perm$V1[grep("C",baittab.perm$V3)]
+new.ctrl <- new.ctrls [which(new.ctrls %in% ctrl ==FALSE) ]   # bait turned into control 
+new.baits <- baittab.perm$V1[grep("T",baittab.perm$V3)]
+new.bait <- new.baits [which(new.baits %in% baits ==FALSE) ]  # control turned into bait 
+
+perm.inttable <- inttable                                     # permuted inttable
+newctrl.pos <- unlist(sapply(new.ctrl, function(x){which(perm.inttable$V1==x)}))  
+perm.inttable$V2[newctrl.pos] <- ctrl.sup                                         
+newbait.pos <- unlist(sapply(new.bait, function(x){which(perm.inttable$V1==x)}))
+perm.inttable$V2[newbait.pos] <- bait.sup
+
+
+print(table(perm.inttable$V1, perm.inttable$V2))              # check permutation
+
+write.table(baittab.perm, file="Baittable_perm.txt", quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+write.table(perm.inttable, file="Inttable_perm.txt", quote=FALSE, sep="\t", col.names=FALSE, row.names=FALSE)
+
+### SAINT run on permutated data
+saint.commandp1 <- paste("saint-spc-ctrl", "Inttable_perm.txt", prottable, "Baittable_perm.txt",saint.options,sep=" ") 
+system(saint.commandp1)
+
+# SAINT output "unique_interactions"
+filename <- "./RESULT/unique_interactions"
+saintout.perm <- try(new.saintoutput(filename))
+while (class(saintout.perm)=="try-error")         # case of file generation failure
+      { system("rm -r LOG")                       # do it again
+        system("rm -r MAPPING")          
+        system("rm -r MCMC")
+        system("rm -r RESULT")
+        saint.commandp1 <- paste("saint-spc-ctrl", "Inttable_perm.txt", prottable, "Baittable_perm.txt", saint.options,sep=" ") 
+	 system(saint.commandp1) 
+        saintout.perm <- try(new.saintoutput(filename))
+      }
+saintout.perm$AvgP <- as.numeric(as.vector(saintout.perm$AvgP))
+saintout.perm$MaxP <- as.numeric(as.vector(saintout.perm$MaxP))
+saintout.perm$Prey <- as.character(saintout.perm$Prey)
+
+# extract scores from output "unique-interactions"
+protpos.perm <- match(proteins, saintout.perm$Prey) 
+perm.avgp[,(j-1)] <- saintout.perm$AvgP[protpos.perm]
+perm.maxp[,(j-1)] <- saintout.perm$MaxP[protpos.perm]
+
+}          
+# permutation end
+
+save(perm.avgp, file="perm_avgP.RData")
+save(perm.maxp, file="perm_maxP.RData")
+
+system("rm -r LOG")                       
+system("rm -r MAPPING")          
+system("rm -r MCMC")
+system("rm -r RESULT")
+                          #######     Generation p-values    #######
+
+if(Filter==TRUE) {orig <- orig.filter} else  {orig <- orig.out}
+
+orig$AvgP <- as.numeric(as.vector(orig$AvgP))
+set.seed(12345)
+pp <- permute(c(1:length(proteins)))
+wy <- WY.permalg(orig$AvgP[pp], perm.avgp[pp,])
+tor <- rev(order(abs(orig$AvgP)))
+write.csv2(cbind(orig[tor,] ,"WY_counter"=wy[match(orig[tor,]$Prey,rownames(wy)),1],"WY_P.adjust"=wy[match(orig[tor,]$Prey,rownames(wy)),2]) ,file="WY_Result.csv")
+
+}
+
+# end 
+
+
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 6ccb5b1..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,7 +0,0 @@
-r-other-apmswapp (1.0-1) UNRELEASED; urgency=low
-
-  * Initial release (Closes: #<bug>)
-    TODO: http://www.bioconductor.org/packages/2.6/bioc/html/DESeq.html
-           -> r-bioc-deseq (seems to depend from locfit)
-
- -- Andreas Tille <tille at debian.org>  Fri, 04 Nov 2016 11:28:50 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 3198240..0000000
--- a/debian/control
+++ /dev/null
@@ -1,36 +0,0 @@
-Source: r-other-apmswapp
-Section: gnu-r
-Priority: optional
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Build-Depends: debhelper (>= 9),
-               dh-r,
-               r-base-dev,
-               r-cran-gtools,
-               r-bioc-edger,
-               r-bioc-limma,
-               r-bioc-multtest,
-               r-bioc-genefilter,
-               r-bioc-aroma.light,
-               r-bioc-deseq2,
-               r-cran-seqinr,
-               r-cran-foreign,
-               r-cran-nnet
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-other-apmswapp/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-other-apmswapp/trunk/
-Homepage: http://sourceforge.net/projects/apmswapp/
-
-Package: r-other-apmswapp
-Architecture: all
-Depends: ${misc:Depends},
-         ${R:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: GNU R Pre- and Postprocessing For Affinity Purification Mass Spectrometry 
- The reliable detection of protein-protein-interactions by affinity
- purification mass spectrometry (AP-MS) is a crucial stepping stone for
- the understanding of biological processes. The main challenge in a
- typical AP-MS experiment is to separate true interaction proteins from
- contaminants by contrasting counts of proteins binding to specific baits
- with counts of negative controls.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index cb8a69b..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,11 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: Martina Fischer <fischerm at rki.de>
-Source: http://sourceforge.net/projects/apmswapp/files/
-
-Files: *
-Copyright: 2013-2014 Martina Fischer <fischerm at rki.de>
-License: <license>
-
-Files: debian/*
-Copyright: 2015 Andreas Tille <tille at debian.org>
-License: <license>
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index b1daabd..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-uses_deseq2.patch
diff --git a/debian/patches/uses_deseq2.patch b/debian/patches/uses_deseq2.patch
deleted file mode 100644
index b26a665..0000000
--- a/debian/patches/uses_deseq2.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Fri, 04 Nov 2016 11:28:50 +0100
-Description: No idea whether this makes sense but Debian has now DESeq2 - just
- try whether this might work as well.
-
---- a/DESCRIPTION
-+++ b/DESCRIPTION
-@@ -10,7 +10,7 @@ Description: apmsWAPP provides a complet
- 		It comprises pre-processing, scoring and postprocessing of protein interactions.
- 		A final list of interaction candidates is reported: it provides a ranking of the candidates according 
- 		to their p-values which allow estimating the number of false-positive interactions.
--Depends: genefilter, seqinr, multtest, gtools, limma, edgeR, DESeq,
-+Depends: genefilter, seqinr, multtest, gtools, limma, edgeR, DESeq2,
-         aroma.light
- License: Review License
- LazyLoad: yes
---- a/R/Inttab_Norm.R
-+++ b/R/Inttab_Norm.R
-@@ -2,14 +2,14 @@
- # Input: - original interaction matrix   (interaction table including zeros, raw discrete counts!)
- #        - baittable = classification of samples and controls
- #        - different normalization methods can be chosen in "norm":
--#         "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"
-+#         "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"
- # Output: normed interaction-matrix (normed count table), scaling factors
- 
--norm.inttable <- function( inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile")) {
-+norm.inttable <- function( inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq2", "TMM", "quantile")) {
- 
- require(limma)
- require(edgeR)
--require(DESeq)
-+require(DESeq2)
- require(aroma.light)
- norm <- match.arg(norm)
- 
-@@ -50,8 +50,8 @@ inttab.norm[,setdiff(Tpos,zerocount)] <-
- }                                                                              
- 
- 
--# "DESeq" - normalization method in the DESeq package (Anders et al. 2010):
--if(norm=="DESeq") {
-+# "DESeq2" - normalization method in the DESeq2 package (Anders et al. 2010):
-+if(norm=="DESeq2") {
- cds <- newCountDataSet( inttab.mat, baittab$V3)       # build countDataSet
- 
- cds1 <- estimateSizeFactors( cds[ ,Cpos] )           
---- a/R/TSPM_apms.r
-+++ b/R/TSPM_apms.r
-@@ -19,7 +19,7 @@
- #     - (filtered) (normalized) count matrix
- 
- 
--tspm_apms  <- function(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, adj.method=c("BH","WY")) {
-+tspm_apms  <- function(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, adj.method=c("BH","WY")) {
- require(multtest)
- require(gtools)
- 
---- a/R/saint_permF.r
-+++ b/R/saint_permF.r
-@@ -35,7 +35,7 @@
- 
- 
- 
--saint_permF  <- function(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, intern.norm=FALSE, saint.options="2000 10000 0 1 0") {
-+saint_permF  <- function(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"),Filter=TRUE, filter.method= c("IQR", "overallVar", "noVar"), var.cutoff=NA, limit=0, intern.norm=FALSE, saint.options="2000 10000 0 1 0") {
- require(multtest)
- require(gtools)
- 
---- a/man/norm.inttable.Rd
-+++ b/man/norm.inttable.Rd
-@@ -8,7 +8,7 @@ Normalization of spectral count data
- Normalization of spectral counts in bait and control samples based on an AP-MS experiment.
- }
- \usage{
--norm.inttable(inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile"))
-+norm.inttable(inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"))
- }
- 
- \arguments{
-@@ -30,7 +30,7 @@ Five different normalization methods, ad
- In the \sQuote{sumtotal} normalization counts are divided by the total number of counts in the sample. 
- The  \sQuote{upperquartile} normalization corrects counts by dividing each count by the 75\% quantile of its sample counts.
- The  \sQuote{quantile} method equalizes the distributions of protein counts across all samples.
--In the  \sQuote{DESeq} approach by Anders and Huber (2010), counts are divided by the the median of the ratio of its count over its geometric mean across all samples.
-+In the  \sQuote{DESeq2} approach by Anders and Huber (2010), counts are divided by the the median of the ratio of its count over its geometric mean across all samples.
- In the  \sQuote{TMM} approach by Robinson and Oshlack (2010), a scaling factor is computed as the weighted mean of log ratios between chosen test and reference samples.
- 
- }
---- a/man/saint_permF.Rd
-+++ b/man/saint_permF.Rd
-@@ -7,7 +7,7 @@ Pre- and Postprocessing for AP-MS data a
- A complete workflow for the identification of true interaction proteins based on AP-MS data, embedding the scoring method SAINT into a pre- and postprocessing framework.
- }
- \usage{
--saint_permF(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, intern.norm = FALSE, saint.options = "2000 10000 0 1 0")
-+saint_permF(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, intern.norm = FALSE, saint.options = "2000 10000 0 1 0")
- }
- %- maybe also 'usage' for other objects documented here.
- \arguments{
---- a/man/tspm_apms.Rd
-+++ b/man/tspm_apms.Rd
-@@ -8,7 +8,7 @@ A complete workflow for the analysis of
- 
- }
- \usage{
--tspm_apms(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, adj.method = c("BH", "WY"))
-+tspm_apms(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq2", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, adj.method = c("BH", "WY"))
- }
- 
- \arguments{
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 529c38a..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/make -f
-
-%:
-	dh $@ --buildsystem R
-
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index 6765b59..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,11 +0,0 @@
-Reference:
-  Author: Martina Fischer and Susann Zilkenat and Roman G. Gerlach and Samuel Wagner and Bernhard Y. Renard
-  Title: "Pre- and post-processing workflow for affinity purification mass spectrometry data"
-  Journal: Journal of Proteome Research
-  Year: 2014
-  Volume: 13
-  Number: 5
-  Pages: 2239-49
-  DOI: 10.1021/pr401249b
-  PMID: 24641689
-  URL: http://pubs.acs.org/doi/abs/10.1021/pr401249b
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 897a45f..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=3
-
-http://sf.net/apmswapp/apmsWAPP_(\d[\d\.]+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff --git a/inst/extdata/baittab.txt b/inst/extdata/baittab.txt
new file mode 100644
index 0000000..e7087d9
--- /dev/null
+++ b/inst/extdata/baittab.txt
@@ -0,0 +1,7 @@
+ctrl1	ctrl	C
+ctrl2	ctrl	C
+ctrl3	ctrl	C
+bait1	bait	T
+bait2	bait	T
+bait3	bait	T
+
diff --git a/inst/extdata/inttable.txt b/inst/extdata/inttable.txt
new file mode 100644
index 0000000..3acbe4e
--- /dev/null
+++ b/inst/extdata/inttable.txt
@@ -0,0 +1,378 @@
+ctrl1	ctrl	con1	0
+ctrl2	ctrl	con1	0
+ctrl3	ctrl	con1	0
+bait1	bait	con1	0
+bait2	bait	con1	0
+bait3	bait	con1	1
+ctrl1	ctrl	con2	0
+ctrl2	ctrl	con2	0
+ctrl3	ctrl	con2	0
+bait1	bait	con2	3
+bait2	bait	con2	0
+bait3	bait	con2	0
+ctrl1	ctrl	con3	0
+ctrl2	ctrl	con3	0
+ctrl3	ctrl	con3	0
+bait1	bait	con3	1
+bait2	bait	con3	0
+bait3	bait	con3	0
+ctrl1	ctrl	con4	0
+ctrl2	ctrl	con4	0
+ctrl3	ctrl	con4	0
+bait1	bait	con4	0
+bait2	bait	con4	2
+bait3	bait	con4	0
+ctrl1	ctrl	con5	0
+ctrl2	ctrl	con5	0
+ctrl3	ctrl	con5	0
+bait1	bait	con5	0
+bait2	bait	con5	0
+bait3	bait	con5	3
+ctrl1	ctrl	con6	0
+ctrl2	ctrl	con6	0
+ctrl3	ctrl	con6	0
+bait1	bait	con6	0
+bait2	bait	con6	1
+bait3	bait	con6	0
+ctrl1	ctrl	con7	0
+ctrl2	ctrl	con7	0
+ctrl3	ctrl	con7	0
+bait1	bait	con7	0
+bait2	bait	con7	1
+bait3	bait	con7	0
+ctrl1	ctrl	con8	0
+ctrl2	ctrl	con8	0
+ctrl3	ctrl	con8	0
+bait1	bait	con8	0
+bait2	bait	con8	3
+bait3	bait	con8	0
+ctrl1	ctrl	con9	0
+ctrl2	ctrl	con9	0
+ctrl3	ctrl	con9	0
+bait1	bait	con9	0
+bait2	bait	con9	0
+bait3	bait	con9	3
+ctrl1	ctrl	con10	0
+ctrl2	ctrl	con10	0
+ctrl3	ctrl	con10	0
+bait1	bait	con10	0
+bait2	bait	con10	0
+bait3	bait	con10	1
+ctrl1	ctrl	con260	1
+ctrl2	ctrl	con260	2
+ctrl3	ctrl	con260	2
+bait1	bait	con260	3
+bait2	bait	con260	1
+bait3	bait	con260	1
+ctrl1	ctrl	con261	3
+ctrl2	ctrl	con261	5
+ctrl3	ctrl	con261	2
+bait1	bait	con261	2
+bait2	bait	con261	1
+bait3	bait	con261	3
+ctrl1	ctrl	con262	1
+ctrl2	ctrl	con262	1
+ctrl3	ctrl	con262	4
+bait1	bait	con262	1
+bait2	bait	con262	1
+bait3	bait	con262	1
+ctrl1	ctrl	con263	2
+ctrl2	ctrl	con263	3
+ctrl3	ctrl	con263	6
+bait1	bait	con263	5
+bait2	bait	con263	4
+bait3	bait	con263	9
+ctrl1	ctrl	con264	2
+ctrl2	ctrl	con264	5
+ctrl3	ctrl	con264	5
+bait1	bait	con264	4
+bait2	bait	con264	4
+bait3	bait	con264	3
+ctrl1	ctrl	con265	1
+ctrl2	ctrl	con265	0
+ctrl3	ctrl	con265	1
+bait1	bait	con265	0
+bait2	bait	con265	1
+bait3	bait	con265	2
+ctrl1	ctrl	con266	1
+ctrl2	ctrl	con266	3
+ctrl3	ctrl	con266	1
+bait1	bait	con266	2
+bait2	bait	con266	1
+bait3	bait	con266	0
+ctrl1	ctrl	con267	3
+ctrl2	ctrl	con267	4
+ctrl3	ctrl	con267	7
+bait1	bait	con267	7
+bait2	bait	con267	3
+bait3	bait	con267	4
+ctrl1	ctrl	con268	7
+ctrl2	ctrl	con268	5
+ctrl3	ctrl	con268	5
+bait1	bait	con268	2
+bait2	bait	con268	6
+bait3	bait	con268	7
+ctrl1	ctrl	con269	6
+ctrl2	ctrl	con269	1
+ctrl3	ctrl	con269	2
+bait1	bait	con269	5
+bait2	bait	con269	2
+bait3	bait	con269	1
+ctrl1	ctrl	con270	2
+ctrl2	ctrl	con270	2
+ctrl3	ctrl	con270	2
+bait1	bait	con270	3
+bait2	bait	con270	2
+bait3	bait	con270	3
+ctrl1	ctrl	con380	11
+ctrl2	ctrl	con380	13
+ctrl3	ctrl	con380	21
+bait1	bait	con380	18
+bait2	bait	con380	23
+bait3	bait	con380	18
+ctrl1	ctrl	con381	36
+ctrl2	ctrl	con381	37
+ctrl3	ctrl	con381	28
+bait1	bait	con381	41
+bait2	bait	con381	36
+bait3	bait	con381	38
+ctrl1	ctrl	con382	22
+ctrl2	ctrl	con382	25
+ctrl3	ctrl	con382	21
+bait1	bait	con382	26
+bait2	bait	con382	24
+bait3	bait	con382	23
+ctrl1	ctrl	con383	43
+ctrl2	ctrl	con383	54
+ctrl3	ctrl	con383	45
+bait1	bait	con383	49
+bait2	bait	con383	39
+bait3	bait	con383	48
+ctrl1	ctrl	con384	42
+ctrl2	ctrl	con384	39
+ctrl3	ctrl	con384	34
+bait1	bait	con384	32
+bait2	bait	con384	38
+bait3	bait	con384	37
+ctrl1	ctrl	con385	35
+ctrl2	ctrl	con385	47
+ctrl3	ctrl	con385	38
+bait1	bait	con385	40
+bait2	bait	con385	36
+bait3	bait	con385	32
+ctrl1	ctrl	con386	17
+ctrl2	ctrl	con386	17
+ctrl3	ctrl	con386	28
+bait1	bait	con386	27
+bait2	bait	con386	33
+bait3	bait	con386	30
+ctrl1	ctrl	con387	22
+ctrl2	ctrl	con387	12
+ctrl3	ctrl	con387	26
+bait1	bait	con387	24
+bait2	bait	con387	24
+bait3	bait	con387	15
+ctrl1	ctrl	con388	27
+ctrl2	ctrl	con388	33
+ctrl3	ctrl	con388	27
+bait1	bait	con388	27
+bait2	bait	con388	31
+bait3	bait	con388	33
+ctrl1	ctrl	con389	34
+ctrl2	ctrl	con389	33
+ctrl3	ctrl	con389	31
+bait1	bait	con389	22
+bait2	bait	con389	33
+bait3	bait	con389	31
+ctrl1	ctrl	con390	39
+ctrl2	ctrl	con390	56
+ctrl3	ctrl	con390	46
+bait1	bait	con390	43
+bait2	bait	con390	53
+bait3	bait	con390	41
+ctrl1	ctrl	prot20	0
+ctrl2	ctrl	prot20	0
+ctrl3	ctrl	prot20	0
+bait1	bait	prot20	7
+bait2	bait	prot20	3
+bait3	bait	prot20	0
+ctrl1	ctrl	prot21	0
+ctrl2	ctrl	prot21	0
+ctrl3	ctrl	prot21	0
+bait1	bait	prot21	11
+bait2	bait	prot21	14
+bait3	bait	prot21	12
+ctrl1	ctrl	prot22	0
+ctrl2	ctrl	prot22	0
+ctrl3	ctrl	prot22	0
+bait1	bait	prot22	7
+bait2	bait	prot22	7
+bait3	bait	prot22	10
+ctrl1	ctrl	prot23	0
+ctrl2	ctrl	prot23	0
+ctrl3	ctrl	prot23	0
+bait1	bait	prot23	23
+bait2	bait	prot23	29
+bait3	bait	prot23	28
+ctrl1	ctrl	prot24	0
+ctrl2	ctrl	prot24	0
+ctrl3	ctrl	prot24	0
+bait1	bait	prot24	14
+bait2	bait	prot24	17
+bait3	bait	prot24	18
+ctrl1	ctrl	prot25	0
+ctrl2	ctrl	prot25	0
+ctrl3	ctrl	prot25	0
+bait1	bait	prot25	14
+bait2	bait	prot25	15
+bait3	bait	prot25	11
+ctrl1	ctrl	prot26	0
+ctrl2	ctrl	prot26	0
+ctrl3	ctrl	prot26	0
+bait1	bait	prot26	17
+bait2	bait	prot26	10
+bait3	bait	prot26	10
+ctrl1	ctrl	prot27	0
+ctrl2	ctrl	prot27	0
+ctrl3	ctrl	prot27	0
+bait1	bait	prot27	4
+bait2	bait	prot27	9
+bait3	bait	prot27	0
+ctrl1	ctrl	prot28	0
+ctrl2	ctrl	prot28	0
+ctrl3	ctrl	prot28	0
+bait1	bait	prot28	16
+bait2	bait	prot28	16
+bait3	bait	prot28	19
+ctrl1	ctrl	prot29	0
+ctrl2	ctrl	prot29	0
+ctrl3	ctrl	prot29	0
+bait1	bait	prot29	9
+bait2	bait	prot29	13
+bait3	bait	prot29	11
+ctrl1	ctrl	prot30	0
+ctrl2	ctrl	prot30	0
+ctrl3	ctrl	prot30	0
+bait1	bait	prot30	19
+bait2	bait	prot30	18
+bait3	bait	prot30	19
+ctrl1	ctrl	prot31	0
+ctrl2	ctrl	prot31	0
+ctrl3	ctrl	prot31	0
+bait1	bait	prot31	8
+bait2	bait	prot31	7
+bait3	bait	prot31	6
+ctrl1	ctrl	prot32	0
+ctrl2	ctrl	prot32	0
+ctrl3	ctrl	prot32	0
+bait1	bait	prot32	17
+bait2	bait	prot32	9
+bait3	bait	prot32	10
+ctrl1	ctrl	prot33	0
+ctrl2	ctrl	prot33	0
+ctrl3	ctrl	prot33	0
+bait1	bait	prot33	8
+bait2	bait	prot33	7
+bait3	bait	prot33	15
+ctrl1	ctrl	prot34	0
+ctrl2	ctrl	prot34	0
+ctrl3	ctrl	prot34	0
+bait1	bait	prot34	15
+bait2	bait	prot34	15
+bait3	bait	prot34	12
+ctrl1	ctrl	prot35	0
+ctrl2	ctrl	prot35	0
+ctrl3	ctrl	prot35	0
+bait1	bait	prot35	7
+bait2	bait	prot35	6
+bait3	bait	prot35	6
+ctrl1	ctrl	prot36	0
+ctrl2	ctrl	prot36	0
+ctrl3	ctrl	prot36	0
+bait1	bait	prot36	15
+bait2	bait	prot36	14
+bait3	bait	prot36	18
+ctrl1	ctrl	prot37	0
+ctrl2	ctrl	prot37	0
+ctrl3	ctrl	prot37	0
+bait1	bait	prot37	23
+bait2	bait	prot37	29
+bait3	bait	prot37	27
+ctrl1	ctrl	prot38	0
+ctrl2	ctrl	prot38	0
+ctrl3	ctrl	prot38	0
+bait1	bait	prot38	30
+bait2	bait	prot38	23
+bait3	bait	prot38	26
+ctrl1	ctrl	prot39	0
+ctrl2	ctrl	prot39	0
+ctrl3	ctrl	prot39	0
+bait1	bait	prot39	4
+bait2	bait	prot39	9
+bait3	bait	prot39	4
+ctrl1	ctrl	prot40	0
+ctrl2	ctrl	prot40	0
+ctrl3	ctrl	prot40	0
+bait1	bait	prot40	20
+bait2	bait	prot40	31
+bait3	bait	prot40	34
+ctrl1	ctrl	sprot1	5
+ctrl2	ctrl	sprot1	8
+ctrl3	ctrl	sprot1	3
+bait1	bait	sprot1	17
+bait2	bait	sprot1	13
+bait3	bait	sprot1	24
+ctrl1	ctrl	sprot2	5
+ctrl2	ctrl	sprot2	3
+ctrl3	ctrl	sprot2	1
+bait1	bait	sprot2	9
+bait2	bait	sprot2	15
+bait3	bait	sprot2	7
+ctrl1	ctrl	sprot3	0
+ctrl2	ctrl	sprot3	2
+ctrl3	ctrl	sprot3	1
+bait1	bait	sprot3	21
+bait2	bait	sprot3	22
+bait3	bait	sprot3	15
+ctrl1	ctrl	sprot4	2
+ctrl2	ctrl	sprot4	3
+ctrl3	ctrl	sprot4	2
+bait1	bait	sprot4	15
+bait2	bait	sprot4	17
+bait3	bait	sprot4	22
+ctrl1	ctrl	sprot5	1
+ctrl2	ctrl	sprot5	2
+ctrl3	ctrl	sprot5	7
+bait1	bait	sprot5	11
+bait2	bait	sprot5	11
+bait3	bait	sprot5	19
+ctrl1	ctrl	sprot6	7
+ctrl2	ctrl	sprot6	4
+ctrl3	ctrl	sprot6	10
+bait1	bait	sprot6	23
+bait2	bait	sprot6	17
+bait3	bait	sprot6	25
+ctrl1	ctrl	sprot7	4
+ctrl2	ctrl	sprot7	3
+ctrl3	ctrl	sprot7	2
+bait1	bait	sprot7	16
+bait2	bait	sprot7	18
+bait3	bait	sprot7	13
+ctrl1	ctrl	sprot8	1
+ctrl2	ctrl	sprot8	3
+ctrl3	ctrl	sprot8	1
+bait1	bait	sprot8	16
+bait2	bait	sprot8	6
+bait3	bait	sprot8	13
+ctrl1	ctrl	sprot9	4
+ctrl2	ctrl	sprot9	5
+ctrl3	ctrl	sprot9	1
+bait1	bait	sprot9	20
+bait2	bait	sprot9	20
+bait3	bait	sprot9	15
+ctrl1	ctrl	sprot10	1
+ctrl2	ctrl	sprot10	3
+ctrl3	ctrl	sprot10	2
+bait1	bait	sprot10	13
+bait2	bait	sprot10	20
+bait3	bait	sprot10	16
diff --git a/inst/extdata/prottable.txt b/inst/extdata/prottable.txt
new file mode 100644
index 0000000..e6716f2
--- /dev/null
+++ b/inst/extdata/prottable.txt
@@ -0,0 +1,63 @@
+con1	217	con1
+con2	361	con2
+con3	289	con3
+con4	247	con4
+con5	336	con5
+con6	71	con6
+con7	203	con7
+con8	348	con8
+con9	536	con9
+con10	96	con10
+con260	349	con260
+con261	48	con261
+con262	358	con262
+con263	250	con263
+con264	393	con264
+con265	124	con265
+con266	194	con266
+con267	835	con267
+con268	588	con268
+con269	43	con269
+con270	119	con270
+con380	314	con380
+con381	226	con381
+con382	217	con382
+con383	612	con383
+con384	228	con384
+con385	111	con385
+con386	222	con386
+con387	265	con387
+con388	385	con388
+con389	166	con389
+con390	347	con390
+prot20	251	prot20
+prot21	225	prot21
+prot22	141	prot22
+prot23	345	prot23
+prot24	125	prot24
+prot25	270	prot25
+prot26	226	prot26
+prot27	574	prot27
+prot28	244	prot28
+prot29	651	prot29
+prot30	150	prot30
+prot31	187	prot31
+prot32	427	prot32
+prot33	457	prot33
+prot34	265	prot34
+prot35	587	prot35
+prot36	395	prot36
+prot37	80	prot37
+prot38	546	prot38
+prot39	110	prot39
+prot40	392	prot40
+sprot1	77	sprot1
+sprot2	416	sprot2
+sprot3	389	sprot3
+sprot4	152	sprot4
+sprot5	212	sprot5
+sprot6	121	sprot6
+sprot7	373	sprot7
+sprot8	57	sprot8
+sprot9	285	sprot9
+sprot10	449	sprot10
diff --git a/man/apmsWAPP-internal.Rd b/man/apmsWAPP-internal.Rd
new file mode 100644
index 0000000..bb93029
--- /dev/null
+++ b/man/apmsWAPP-internal.Rd
@@ -0,0 +1,22 @@
+\name{internals}
+\alias{my.rowIQR}
+\alias{new.saintoutput}
+\alias{saintout}
+\alias{TSPM}
+\alias{WY.permalg}
+
+\title{
+internal functions
+}
+\description{
+Functions for internal usage only
+}
+\usage{
+my.rowIQR(mat)
+new.saintoutput(filename)
+saintout(test)
+TSPM(counts, x1, x0, lib.size, alpha.wh)
+WY.permalg(org.statistic, perm.p)
+}
+
+\keyword{internal}
diff --git a/man/apmsWAPP-package.Rd b/man/apmsWAPP-package.Rd
new file mode 100644
index 0000000..667a9f3
--- /dev/null
+++ b/man/apmsWAPP-package.Rd
@@ -0,0 +1,41 @@
+\name{apmsWAPP-package}
+\alias{apmsWAPP-package}
+\alias{apmsWAPP}
+\docType{package}
+\title{
+Pre- and Postprocessing for AP-MS data analysis
+}
+\description{
+The package \pkg{apmsWAPP} provides a complete workflow for the analysis of AP-MS data, based on replicate single-bait purifications including negative controls. \cr
+It comprises the three main parts of pre-processing, scoring and postprocessing of interaction proteins:   \cr
+For pre-processing, five different normalization methods and a filtering procedure is provided.          \cr
+For scoring protein-protein-interactions, either the method of SAINT or a two-stage-poisson model (TSPM) adapted to AP-MS data can be chosen. \cr
+For postprocessing, the user can choose between the permutation-based approach of Westfall&Young (applicable to both, SAINT and TSPM) and the adjustment procedure of Benjamini-Hochberg (applicable to TSPM).
+Postprocessing results in the generation of p-values for each interaction candidate, allowing to control the number of false-positive interactions.
+}
+\details{                    
+\tabular{ll}{
+Package: \tab apmsWAPP\cr
+Type: \tab Package\cr
+Version: \tab 1.0\cr
+Date: \tab 2013-03-14\cr
+License: \tab review license\cr
+}
+The two main function calls are:  \code{\link{saint_permF}} (framework based on SAINT) and  \code{\link{tspm_apms}} (framework based on TSPM).
+}
+
+\author{
+Martina Fischer (fischerm at rki.de)
+}
+
+\references{
+Fischer M, Zilkenat S, Gerlach R, Wagner S, Renard BY. Pre- and Postprocessing for Affinity Purification Mass Spectrometry Data: More Reliable Detection of Interaction Candidates. \emph{in review} 
+
+Choi H, Larsen B, Lin Z-Y, et al. SAINT: probabilistic scoring of affinity purification-mass spectrometry data. \emph{Nature Methods} 2011.
+
+Auer PL, Doerge RW. A two-stage Poisson model for testing RNA-Seq data. \emph{Statistical Applications in Genetics and Molecular Biology} 2011.
+}
+\keyword{package}
+\keyword{APMS}
+
+
diff --git a/man/int_mat.Rd b/man/int_mat.Rd
new file mode 100644
index 0000000..4f3f863
--- /dev/null
+++ b/man/int_mat.Rd
@@ -0,0 +1,41 @@
+\name{int_mat}
+\alias{int2mat}
+\alias{mat2int}
+\title{
+Format transformation of spectral counts
+}
+\description{
+Transformation of a count matrix into an interaction table (format required for SAINT) and vice versa.
+}
+\usage{
+int2mat(IntSaint)
+
+mat2int(mat, baittab)
+}
+\arguments{
+  \item{IntSaint}{a data.frame. The interaction table as required for SAINT (including zero counts).
+}
+  \item{mat}{matrix of spectral counts, proteins in rows and samples in columns.
+}
+  \item{baittab}{a data.frame. The baittable as required for SAINT, classifying control and bait samples.
+}
+}
+\details{
+The \emph{interaction table} consists of four columns: IP name, bait or control name, protein name, spectral count (\bold{note}: a protein which was not detected in one of the samples receives a zero count). \cr
+\code{int2mat} transfers the interaction table into a matrix form. \code{mat2int} transfers a matrix of spectral counts into the interaction table format defined by SAINT.
+}
+\value{
+Either a matrix of spectral counts or a data.frame representing the interaction table is returned.
+}
+
+\author{
+Martina Fischer
+}
+
+\examples{
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+interactiontable <- read.table(intfile)
+count.mat <- int2mat(interactiontable)
+class(count.mat)
+dim(count.mat)
+}
\ No newline at end of file
diff --git a/man/norm.inttable.Rd b/man/norm.inttable.Rd
new file mode 100644
index 0000000..8135748
--- /dev/null
+++ b/man/norm.inttable.Rd
@@ -0,0 +1,68 @@
+\name{norm.inttable}
+\alias{norm.inttable}
+
+\title{
+Normalization of spectral count data
+}
+\description{
+Normalization of spectral counts in bait and control samples based on an AP-MS experiment.
+}
+\usage{
+norm.inttable(inttab.mat, baittab, norm = c("sumtotal", "upperquartile", "DESeq", "TMM", "quantile"))
+}
+
+\arguments{
+  \item{inttab.mat}{
+  matrix of spectral counts, proteins in rows and samples in columns.  
+}
+  \item{baittab}{
+  a data.frame. The baittable as required for SAINT, classifying control and bait samples.
+}
+  \item{norm}{
+  method to normalize the data. 
+}
+}
+\details{
+The baittable corresponds to a format as required for SAINT, consisting of three columns: IP name, bait or control name, indicator for bait and control experiment (T=bait purification, C=control).\cr
+Note that the IP names in the baittable must be in agreement with the sample names.
+
+Five different normalization methods, adapted from microarray and RNA-seq analysis to AP-MS data, are available: \cr
+In the \sQuote{sumtotal} normalization counts are divided by the total number of counts in the sample. 
+The  \sQuote{upperquartile} normalization corrects counts by dividing each count by the 75\% quantile of its sample counts.
+The  \sQuote{quantile} method equalizes the distributions of protein counts across all samples.
+In the  \sQuote{DESeq} approach by Anders and Huber (2010), counts are divided by the the median of the ratio of its count over its geometric mean across all samples.
+In the  \sQuote{TMM} approach by Robinson and Oshlack (2010), a scaling factor is computed as the weighted mean of log ratios between chosen test and reference samples.
+
+}
+
+\value{
+A list containing the following components:
+\item{1}{normalized spectral count matrix}
+\item{2}{scaling factors (if available)}
+}
+
+\references{
+Anders S, Huber W. Differential expression analysis for sequence count data. \emph{Genome Biology} 2010.
+
+Robinson MD, Oshlack A. A scaling normalization method for differential expression analysis of RNA-seq data. \emph{Genome Biology} 2010.
+
+Bolstad BM, Irizarry RA, Astrand M, et al. A comparison of normalization methods for high density oligonucleotide array data based on variance and bias. \emph{Bioinformatics} 2003.
+
+Dillies M-A, Rau A, Aubert J, et al. A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis. \emph{Briefings in Bioinformatics} 2012.
+}
+\author{
+Martina Fischer
+}
+
+
+\examples{
+#input data
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+counts <- int2mat(read.table(intfile))
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+baittab <- read.table(baitfile)
+# Normalization:
+norm.counts <- norm.inttable(counts, baittab, norm = "upperquartile")
+}
+\keyword{normalization}
+\keyword{APMS}
diff --git a/man/saint_permF.Rd b/man/saint_permF.Rd
new file mode 100644
index 0000000..5300581
--- /dev/null
+++ b/man/saint_permF.Rd
@@ -0,0 +1,117 @@
+\name{saint_permF}
+\alias{saint_permF}
+\title{
+Pre- and Postprocessing for AP-MS data analysis using SAINT
+}
+\description{
+A complete workflow for the identification of true interaction proteins based on AP-MS data, embedding the scoring method SAINT into a pre- and postprocessing framework.
+}
+\usage{
+saint_permF(file_baittable, file_inttable, prottable, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, intern.norm = FALSE, saint.options = "2000 10000 0 1 0")
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{file_baittable}{
+  a character string specifying the pathname of the baittable. see \emph{Details}.
+}
+  \item{file_inttable}{
+  a character string specifying the pathname of the interaction table. see \emph{Details}. 
+}
+  \item{prottable}{
+  a character string specifying the pathname of the protein table. see \emph{Details}.
+
+}
+  \item{norm}{
+  method to normalize the data. If \code{norm="none"}, no normalization of the data is performed.
+}
+  \item{Filter}{
+  logical value, whether filtering of the data is applied (Default \code{TRUE}).
+}
+  \item{filter.method}{
+  method to use for filtering, must be one of \code{"IQR"}, \code{"overallVar"} or \code{"noVar"}, only used when \code{Filter=TRUE}.
+}
+  \item{var.cutoff}{
+  percentile (between 0 and 1) or \code{NA}. Cutoff for filtering the data, defined by a quantile or shortest-interval (=\code{NA}, Default), only used when \code{Filter=TRUE}. 
+}
+  \item{limit}{
+  minimal number of expected true interaction proteins in the data.
+}
+  \item{intern.norm}{
+  logical value. If \code{TRUE}, normalization is repeated on the filtered data (Default \code{FALSE}).
+}
+  \item{saint.options}{
+  parameters set for SAINT. 
+}
+}
+\details{
+The input files correspond to the input formats used by SAINT: the baittable, prey- and interaction table in the form of tab-delimited files. \cr
+The \emph{baittable} consists of three columns: IP name, bait or control name, indicator for bait and control experiment (T=bait purification, C=control). \cr
+The \emph{interaction table} consists of four columns: IP name, bait or control name, protein name, spectral count (\bold{note}: a protein which was not detected in one of the samples receives a zero count). \cr
+The \emph{protein table} refers to the preyfile, it consists of three columns: protein names, protein length, protein names or associated gene names (if available).\cr
+A more detailed description on the generation of these files is given in Choi et.al. \emph{(Current Protocols in Bioinformatics 2012)}.
+
+Pre-processing comprises normalization and filtering of the data: \cr
+Here, it can be chosen from five different normalization methods, adapted from microarray and RNA-seq analysis to AP-MS data. For further details see \code{\link{norm.inttable}}. \cr
+The filter consists of a biological filter and a statistical variance filter and aims to remove obvious contaminants from further analysis. \cr
+If \code{filter.method="noVar"}, only the biological filter is conducted.
+Both are conducted, if \code{filter.method="IQR"}, here the variance is calculated by the inter-quartile-range, or if \code{filter.method="overallVar"}, here the variance is calculated across all samples. \cr
+The \code{var.cutoff} defines the fraction of proteins with the lowest overall variance, which are considered as contaminants and are removed.
+\code{var.cutoff=NA} refers to a cutoff defined by the mean of the shortest intervall containing 50\% of the data (default). Alternatively, a quantile can be set as cutoff, e.g. a cutoff of 0.5 filters 50\% of the data showing the smallest overall variance or IQR. see also \code{\link{varFilter}}\cr
+The parameter \code{limit} assures, that filtering results in a number of proteins above the number of expected true interaction proteins.
+
+The corresponding parameters in SAINT \code{[nburn][niter][lowMode][minFold][normalize]} are set as recommended by SAINT. Further details on the parameter setting can be found in Choi et.al.\emph{(Current Protocols in Bioinformatics 2012)}. 
+}
+
+\value{
+The overall result is reported in the file \emph{WY_Result.csv}: \cr 
+It is based on the original Saint output \sQuote{unique_interactions}, but additionally Westfall&Young adjusted p-values are assigned to each interaction candidate. These p-values control the FWER, allowing to estimate the portion of false-positive interactions.
+
+Different .txt and .xls files are generated, enabling the user to follow the different intermediate results:  \cr
+\enumerate{
+\item In case of normalization: normalized count data in form of the interaction table (\emph{txt file}), named after the normalization method and the bait protein (e.g. quantile_bait_IntSaint.txt).
+\item In case of filtering: the filtered (and normalized) interaction table (\emph{Inttable_filtered.txt}). 
+\item The Saint output: \sQuote{unique_interactions}, reporting the interaction candidates with SAINT scores, calculated on normalized data (file name ending \emph{_orig}), and filtered: (file name ending \emph{_orgF}).
+\item Permutation data: scores calculated for each permutation data set (permutation matrix as \emph{perm.avgp.Rata}, \emph{perm.maxp.Rdata}). 
+}
+}
+
+\references{
+Choi H, Larsen B, Lin Z-Y, et al. SAINT: probabilistic scoring of affinity purification-mass spectrometry data. \emph{Nature Methods} 2011.
+
+Choi H, Liu G, Mellacheruvu D, et al. Analyzing Protein-Protein Interactions from Affinity Purification-Mass Spectrometry Data with SAINT. \emph{Current Protocols in Bioinformatics} 2012.
+ 
+Anders S, Huber W. Differential expression analysis for sequence count data. \emph{Genome Biology} 2010.
+
+Robinson MD, Oshlack A. A scaling normalization method for differential expression analysis of RNA-seq data. \emph{Genome Biology} 2010.
+
+Bolstad BM, Irizarry RA, Astrand M, et al. A comparison of normalization methods for high density oligonucleotide array data based on variance and bias. \emph{Bioinformatics} 2003.
+
+Westfall PH, Young SS. Resampling-based multiple testing: examples and methods for p-value adjustment. 1993.
+
+Bourgon R, Gentleman R, Huber W. Independent filtering increases detection power for high-throughput experiments. \emph{Proceedings of the National Academy of Sciences} 2010.
+}
+
+\author{
+Martina Fischer
+}
+
+\note{
+SAINT is run as part of the workflow. It is important to note that the function \code{saint_permF} requires a linux environment and was tested on SAINT version 2.3.4.
+}
+
+
+
+
+\examples{
+#input dara
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+protfile <- system.file("extdata", "prottable.txt", package="apmsWAPP")
+# Pre-processing: quantile normalization and filtering  
+# Important: Define a working directory for storage of the resulting files
+# Workflow call:
+saint_permF(baitfile,intfile,protfile, norm="quantile", Filter=TRUE, filter.method="overallVar", var.cutoff=0.3, intern.norm=FALSE)
+}
+
+\keyword{Preprocessing}
+\keyword{APMS}
diff --git a/man/tspm_apms.Rd b/man/tspm_apms.Rd
new file mode 100644
index 0000000..6bf1a80
--- /dev/null
+++ b/man/tspm_apms.Rd
@@ -0,0 +1,95 @@
+\name{tspm_apms}
+\alias{tspm_apms}
+\title{
+Workflow for AP-MS data analysis using TSPM
+}
+\description{
+A complete workflow for the analysis of AP-MS data, using a two-stage-poisson model and a pre- and postprocessing framework.
+
+}
+\usage{
+tspm_apms(counts, baittab, norm = c("none", "sumtotal", "upperquartile", "DESeq", "TMM", "quantile"), Filter = TRUE, filter.method = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0, adj.method = c("BH", "WY"))
+}
+
+\arguments{
+  \item{counts}{
+  matrix of spectral counts, proteins in rows and samples in columns.  
+}
+  \item{baittab}{
+  a character string specifying the pathname of the baittable. see Details. 
+}
+  \item{norm}{
+  method to normalize the data. If \code{norm="none"}, no normalization of the data is performed.
+}
+  \item{Filter}{
+  logical value, whether filtering of the data is applied (Default \code{TRUE}).
+}
+  \item{filter.method}{
+  method to use for filtering, must be one of \code{"IQR"}, \code{"overallVar"} or \code{"noVar"}, only used when \code{Filter=TRUE}.
+}
+  \item{var.cutoff}{
+  percentile (between 0 and 1) or \code{NA}. Cutoff for filtering the data, defined by a quantile or shortest-interval (=\code{NA}, Default), only used when \code{Filter=TRUE}. 
+}
+  \item{limit}{
+  minimal number of expected true interaction proteins in the data.
+}
+  \item{adj.method}{
+  method to adjust p-values for multiple testing.
+}
+}
+\details{
+The baittable corresponds to a tab/space delimited file as required for SAINT - consisting of three columns: IP name, bait or control name, indicator for bait and control experiment (T=bait purification, C=control).
+
+Pre-processing comprises normalization and filtering of the data: \cr
+Here, it can be chosen from five different normalization methods, adapted from microarray and RNA-seq analysis to AP-MS data. For further details see \code{\link{norm.inttable}}. \cr
+The filter consists of a biological filter and a statistical variance filter and aims to remove obvious contaminants from further analysis. \cr
+If \code{filter.method="noVar"}, only the biological filter is conducted.
+Both are conducted, if \code{filter.method="IQR"}, here the variance is calculated by the inter-quartile-range, or if \code{filter.method="overallVar"}, here the variance is calculated across all samples. \cr
+The \code{var.cutoff} defines the fraction of proteins with the lowest overall variance, which are considered as contaminants and are removed.
+\code{var.cutoff=NA} refers to a cutoff defined by the mean of the shortest intervall containing 50\% of the data (default). Alternatively, a quantile can be set as cutoff, e.g. a cutoff of 0.5 filters 50\% of the data showing the smallest overall variance or IQR. see also \code{\link{varFilter}}\cr
+The parameter \code{limit} assures, that filtering results in a number of proteins above the number of expected true interaction proteins.
+
+For postprocessing, two different adjustment procedures are provided for multiple testing: the Benjamini-Hochberg procedure (\code{"BH"}) (p-values are controlled by FDR), 
+and the permutation approach coupled to the Westfall&Young (\code{"WY"}) algorithm (p-values are controlled by FWER).
+}
+
+\value{
+A list containing the following components:
+\item{id }{name of the interaction protein}
+\item{log.fold.change }{a vector containing the estimated log fold changes for each protein}
+\item{pvalues}{a vector containing the raw p-values for each protein, evaluating the interaction }
+\item{padj }{a vector containing the p-values after adjusting for multiple testing using the method of Benjamini-Hochberg } 				
+\item{LRT}{a vector of Likelihood Ratio statistics, scoring the interaction potential of each protein  }
+\item{dispersion}{a vector of yes/no indicating overdispersion for each protein}
+\item{adjusted.p}{a vector containing the adjusted p-values using the permutation-based approach of Westfall&Young}
+\item{counter}{a vector containing the number of exceeding permutation scores using the permutation-based approach of Westfall&Young}
+\item{matrix1}{(filtered) (normalized) matrix of spectral counts}
+\item{matrix2}{permutation matrix of scores, permutation runs in columns and proteins in rows}
+}
+
+\references{
+Fischer M, Zilkenat S, Gerlach R, Wagner S, Renard BY. Pre- and Postprocessing for Affinity Purification Mass Spectrometry Data: More Reliable Detection of Interaction Candidates. \emph{in review} 
+
+Auer PL, Doerge RW. A two-stage Poisson model for testing RNA-Seq data. \emph{Statistical Applications in Genetics and Molecular Biology} 2011.
+}
+
+\author{
+Martina Fischer
+}
+
+\examples{
+# input data
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+counts <- int2mat(read.table(intfile))
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+# TSPM with quantile normalization and filtering
+tspm.quaF <- tspm_apms( counts, baitfile, norm="quantile", Filter=TRUE, filter.method="overallVar", var.cutoff=0.1, adj.method="WY")
+# Results:
+# for adjustment with BH:
+cat("Number of Proteins with p-value <0.05: ",length(which(tspm.quaF[[1]]$padj < 0.05) ) )
+# for adjustment with WY:
+cat("Number of Proteins with p-value <0.05: ",length(which(tspm.quaF[[2]][,2] <0.05)))
+}
+
+\keyword{TSPM}
+\keyword{APMS}
diff --git a/man/varFilter.Rd b/man/varFilter.Rd
new file mode 100644
index 0000000..b4d7501
--- /dev/null
+++ b/man/varFilter.Rd
@@ -0,0 +1,64 @@
+\name{varFilter}
+\alias{varFilter}
+\title{
+Filtering of AP-MS data
+}
+\description{
+The filter consists of a biological filter and a statistical variance filter and aims to remove obvious contaminants in AP-MS data.}
+\usage{
+varFilter(mat, baittab, func = c("IQR", "overallVar", "noVar"), var.cutoff = NA, limit = 0)
+}
+\arguments{
+  \item{mat}{
+  matrix of spectral counts, proteins in rows and samples in columns.
+}
+  \item{baittab}{
+  a data.frame. The baittable as required for SAINT, classifying control and bait samples.
+}
+  \item{func}{
+  method to use for filtering, must be one of \code{"IQR"}, \code{"overallVar"} or \code{"noVar"}.
+}
+  \item{var.cutoff}{
+  percentile (between 0 and 1) or \code{NA}. Cutoff for filtering the data, defined by a quantile or shortest-interval (=\code{NA}, Default).
+}
+  \item{limit}{
+  minimal number of expected true interaction proteins in the data.
+}
+}
+\details{
+If \code{filter.method="noVar"}, only the biological filter is conducted.
+The biological and statistical filter are applied, if \code{filter.method="IQR"}, here the variance is calculated by the inter-quartile-range, or if \code{filter.method="overallVar"}, here the variance is calculated across all samples. 
+
+The \code{var.cutoff} defines the fraction of proteins with the lowest overall variance, which are considered as contaminants and are removed.
+\code{var.cutoff=NA} refers to a cutoff defined by the mean of the shortest intervall containing 50\% of the data (default). Alternatively, a quantile can be set as cutoff, e.g. a cutoff of 0.5 filters 50\% of the data showing the smallest overall variance or IQR. 
+
+The parameter \code{limit} assures, that filtering results in a number of proteins above the number of expected true interaction proteins.\cr
+It is recommended to set the parameters \code{var.cutoff} and \code{limit} according to biological knowledge, if available.
+
+}
+\value{
+filtered matrix of spectral counts
+}
+\references{
+Bourgon R, Gentleman R, Huber W. Independent filtering increases detection power for high-throughput experiments. \emph{Proceedings of the National Academy of Sciences} 2010.
+}
+\author{
+Martina Fischer
+}
+
+\seealso{
+\code{\link{shorth}}
+}
+\examples{
+#input data
+intfile <- system.file("extdata", "inttable.txt", package="apmsWAPP")
+counts <- int2mat(read.table(intfile))
+baitfile <- system.file("extdata", "baittab.txt", package="apmsWAPP")
+baittab <- read.table(baitfile)
+dim(counts)
+# Filtering:
+counts.filtered <- varFilter(counts, baittab, func = "overallVar", var.cutoff = 0.3, limit = 0)
+dim(counts.filtered)
+}
+\keyword{filtering}
+\keyword{APMS}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-other-apmswapp.git



More information about the debian-med-commit mailing list