[med-svn] [metabit] 02/04: New upstream version 0.0+20160923
Andreas Tille
tille at debian.org
Tue Feb 14 10:52:55 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository metabit.
commit 028b2292853c3d69629a5966d59266ab5e4a3666
Author: Andreas Tille <tille at debian.org>
Date: Tue Feb 14 09:27:20 2017 +0100
New upstream version 0.0+20160923
---
nodes/samtools.py | 16 ----------
nodes/tools/get_stats.py | 14 +++++++--
nodes/tools/statax_Rmodule/doBarplot.R | 29 +++++++++++++++---
nodes/tools/statax_Rmodule/doClust.R | 54 +++++++++++++++++++++-------------
nodes/tools/statax_Rmodule/doDiv.R | 44 ++++++++++++++++++++++-----
nodes/tools/statax_Rmodule/doHeatmap.R | 23 +++++++++++----
nodes/tools/statax_Rmodule/doPcoa.R | 33 +++++++++++++--------
7 files changed, 144 insertions(+), 69 deletions(-)
diff --git a/nodes/samtools.py b/nodes/samtools.py
index df7c1ef..3e54c22 100644
--- a/nodes/samtools.py
+++ b/nodes/samtools.py
@@ -13,15 +13,6 @@ from pypeline.atomiccmd.builder import \
import pypeline.common.versions as versions
-# _VERSION_REGEX = r"Version: (\d+)\.(\d+)(?:\.(\d+))?"
-# SAMTOOLS_VERSION = versions.Requirement(call=("samtools",),
-# search=_VERSION_REGEX,
-# checks=(0, 1, 18))
-
-## v0.2.0 was the pre-release version of v1.0, and lacks required features
-#_COMMON_CHECK = versions.And(versions.GE(0, 1, 18),
-# versions.LT(0, 2, 0))
-
_COMMON_CHECK = versions.GE(1, 1, 0)
SAMTOOLS_VERSION = versions.Requirement(
@@ -39,13 +30,6 @@ class SortSamNode(CommandNode):
IN_SAM = input_file,
OUT_STDOUT = AtomicCmd.PIPE,
CHECK_VERSION = SAMTOOLS_VERSION)
- # old samtoolsx0.1
- # cmd_sortbam = AtomicCmd(["samtools", "sort", "-f", "-", "%(OUT_BAM)s"],
- # IN_STDIN = cmd_sam2bam,
- # OUT_BAM = output_file)
- # new samtoolsx1
- ## samtools view -buS reads.singles.bowtie2out.sam | samtools sort -T temp_root -o here.bam -O bam -
-
cmd_sortbam = AtomicCmd(["samtools", "sort",
"-T", "%(TEMP_OUT_PREFIX)s",
"-o", "%(OUT_BAM)s",
diff --git a/nodes/tools/get_stats.py b/nodes/tools/get_stats.py
index bfe6934..52f7125 100755
--- a/nodes/tools/get_stats.py
+++ b/nodes/tools/get_stats.py
@@ -13,7 +13,8 @@ import re
import sys
import pysam
import os.path
-
+import shlex
+from subprocess import check_output
from glob import glob
@@ -96,7 +97,16 @@ def get_bowtie2stats(file_):
return tot, mapped
-def get_readcount(bamfile):
+
+def get_readcount(bamfile, cmd="samtools view -c {}".format):
+ if bamfile:
+ readcount = check_output(shlex.split(cmd(bamfile)))
+ return(int(readcount.rstrip("\n")), )
+ else:
+ return (None, )
+
+
+def _get_readcount(bamfile):
if bamfile:
readcount = pysam.view("-c", bamfile)[0]
return (int(readcount),)
diff --git a/nodes/tools/statax_Rmodule/doBarplot.R b/nodes/tools/statax_Rmodule/doBarplot.R
index ffbab72..32f5441 100755
--- a/nodes/tools/statax_Rmodule/doBarplot.R
+++ b/nodes/tools/statax_Rmodule/doBarplot.R
@@ -20,18 +20,28 @@ doBarplot <- function(d, out.pdf=NULL,
if ( sample.order=="none" || ncol(d)<2 ) {
sample.order <- seq_len(ncol(d))
+ in.bp <- d
} else if (sample.order=="alpha") {
sample.order <- order(colnames(d))
+ in.bp <- d[,sample.order]
} else {
sample.order <- hclust(dist(t(d), method=sample.order))$order
+ in.bp <- d[,sample.order]
}
- in.bp <- d[,sample.order]
+
color.gradient <- gglike_colors(nrow(d))
taxa.names <- rownames(in.bp)
legend.order <- seq_along(taxa.names)
if (!is.null(uncl_col)) {
- in.bp <- rbind(in.bp, unclassified=100-colSums(in.bp))
+ if(ncol(in.bp) < 2){
+ in.bp <- rbind(in.bp, "unclassified"=100-sum(in.bp[,1]))
+ } else {
+ in.bp <- rbind(in.bp, "unclassified"=100-colSums(in.bp))
+ }
+
+ # #in.bp <- rbind(in.bp, unclassified=100-colSums(in.bp))
+
color.gradient <- c(color.gradient, uncl_col)
taxa.names <- c(sort(taxa.names), "unclassified")
legend.order <- match(taxa.names, sort(taxa.names))
@@ -114,10 +124,21 @@ if( !interactive() ) {
d <- read.table(cl$args[1], row.names=1, sep='\t', as.is=TRUE)
if( row.names(d)[1] == "ID" ) {
names(d) <- d[1,]
- d <- d[-1,]
+ if(ncol(d) < 2){
+ d <- subset(d, row(d)>=2)
+ } else {
+ d <- d[-1,]
+ }
}
taxa.names <- rownames(d)
- d <- apply(d, 2, as.numeric)
+
+ if(ncol(d) < 2){
+ d[,1] <- as.numeric(d[,1])
+ } else {
+ d <- apply(d, 2, as.numeric)
+ }
+
+
rownames(d) <- taxa.names
doBarplot(d, out.pdf=cl$args[2],
diff --git a/nodes/tools/statax_Rmodule/doClust.R b/nodes/tools/statax_Rmodule/doClust.R
index f88cd81..ff77485 100755
--- a/nodes/tools/statax_Rmodule/doClust.R
+++ b/nodes/tools/statax_Rmodule/doClust.R
@@ -8,13 +8,24 @@ source(paste0(statax_dir, "/pvegclust-internal.R"))
source(paste0(statax_dir, "/pvegclust.R"))
read.abundance.table <- function(filename) {
- d <- read.table(cl$args[1], row.names=1, sep='\t', as.is=TRUE)
+ d <- read.table(filename, row.names=1, sep='\t', as.is=TRUE)
if( row.names(d)[1] == "ID" ) {
names(d) <- d[1,]
- d <- d[-1,]
+ if(ncol(d) < 2){
+ d <- subset(d, row(d)>=2)
+ } else {
+ d <- d[-1,]
+ }
}
taxa.names <- rownames(d)
- d <- apply(d, 2, as.numeric)
+
+ if(ncol(d) < 2){
+ d[,1] <- as.numeric(d[,1])
+ } else {
+ d <- apply(d, 2, as.numeric)
+ }
+
+
rownames(d) <- taxa.names
return(d)
}
@@ -33,24 +44,6 @@ doClust <- function(d, out.clust=NULL, out.clust.pdf=NULL, out.clust.tsv=NULL,
paste0(out.clust,".RData"),
out.clust.RData)
}
- if (ncol(d) < 3)
- msg <- c("Error: hierarchical clustering cannot be done with less than 3 samples",
- paste0("(Input file: ", cl$args[1], ")"))
- if (nrow(d) < 3)
- msg <- c("Error: hierarchical clustering cannot be done with less than 3 objects (taxa)",
- paste0("(Input file: ", cl$args[1], ")"))
- if (exists("msg")) {
- write(msg, file=stderr())
- if( !interactive() ) {
- pdf(out.clust.pdf)
- plot.new()
- text(0, c(1, .95), msg, adj=0, col='red')
- invisible(dev.off())
- write(msg, file=out.clust.tsv)
- save(msg, file=out.clust.RData)
- quit(save="no", status=0)
- }
- }
if (ncores>1) {
pvclust.func <- function(...) {
@@ -125,6 +118,25 @@ if( !interactive() ) {
d <- read.abundance.table(cl$args[1])
+ if (ncol(d) < 3)
+ msg <- c("Error: hierarchical clustering cannot be done with less than 3 samples",
+ paste0("(Input file: ", cl$args[1], ")"))
+ if (nrow(d) < 3)
+ msg <- c("Error: hierarchical clustering cannot be done with less than 3 objects (taxa)",
+ paste0("(Input file: ", cl$args[1], ")"))
+ if (exists("msg")) {
+ write(msg, file=stderr())
+ if( !interactive() ) {
+ pdf(cl$options$pdf)
+ plot.new()
+ text(0, c(1, .95), msg, adj=0, col='red')
+ invisible(dev.off())
+ write(msg, file=cl$options$tsv)
+ save(msg, file=cl$options$RData)
+ quit(save="no", status=0)
+ }
+ }
+
doClust(d, out.clust=cl$options$basename,
out.clust.pdf=cl$options$pdf,
out.clust.tsv=cl$options$tsv,
diff --git a/nodes/tools/statax_Rmodule/doDiv.R b/nodes/tools/statax_Rmodule/doDiv.R
index 4d25e77..3ad347f 100755
--- a/nodes/tools/statax_Rmodule/doDiv.R
+++ b/nodes/tools/statax_Rmodule/doDiv.R
@@ -4,10 +4,19 @@ read.abundance.table <- function(filename) {
d <- read.table(filename, row.names=1, sep='\t', as.is=TRUE)
if( row.names(d)[1] == "ID" ) {
names(d) <- d[1,]
- d <- d[-1,]
+ if(ncol(d) < 2){
+ d <- subset(d, row(d)>=2)
+ } else {
+ d <- d[-1,]
+ }
}
taxa.names <- rownames(d)
- d <- apply(d, 2, as.numeric)
+
+ if(ncol(d) < 2){
+ d[,1] <- as.numeric(d[,1])
+ } else {
+ d <- apply(d, 2, as.numeric)
+ }
rownames(d) <- taxa.names
return(d)
}
@@ -20,15 +29,21 @@ my.write.table <- function(Table, file, sep="\t", quote=F) {
split.table <- function(Table, rm.strains=TRUE,
default.taxlevels=c("k", "p", "c", "o", "f", "g", "s")) {
- if( rm.strains ) Table <- Table[!grepl('\\|t__', row.names(Table)),]
+ if( rm.strains ) {
+ Table <- subset(Table, !grepl('\\|t__', row.names(Table)))
+ # Table <- Table[!grepl('\\|t__', row.names(Table)),]
+ }
sample_names <- names(Table)
taxa_names <- strsplit(row.names(Table), '|', fixed=T)
taxlevel <- sapply(taxa_names, function(e) default.taxlevels[length(e)])
lasttaxon <- sapply(taxa_names, tail, 1)
+ ## Data1 <- sapply(default.taxlevels, function(x) Table[taxlevel==x,],
+ ## simplify=F, USE.NAMES=T)
- Data <- sapply(default.taxlevels, function(x) Table[taxlevel==x,],
+ Data <- sapply(default.taxlevels, function(x) subset(Table, taxlevel==x),
simplify=F, USE.NAMES=T)
+ Data
#warning: you should check that the row.names do not contain duplicated taxa.
}
@@ -40,6 +55,7 @@ filter.table <- function(Data) {
colnames(Data$k)[unclassified]),
file=stderr())
Data <- sapply(Data, `[`, TRUE, !unclassified)
+ Data
} else{
write("No samples are 100% unclassified", file=stderr())
Data
@@ -48,7 +64,7 @@ filter.table <- function(Data) {
doDiv <- function(Data, index="shannon") {
suppressPackageStartupMessages(require(vegan, quietly=T))
- diversities <- sapply(Data, diversity, index=cl$options$index, MARGIN=2,
+ diversities <- sapply(Data, diversity, index=index, MARGIN=2,
USE.NAMES=T)
return(diversities)
}
@@ -79,10 +95,24 @@ if( !interactive() ) {
default.taxlevels <- unlist(strsplit(cl$options$taxlevels, ""))
Data <- split.table(Table, rm.strains=cl$options$rmstrains,
default.taxlevels=default.taxlevels)
- Data <- filter.table(Data)
+ Data <- filter.table(Data)
+ # diversity cannot calculate the distance when only one row, so setting to zero and add a dummyrow
+ for (d in names(Data)){
+ if(dim(Data[[d]])[1]<2){
+ Data[[d]] = Data[[d]] * 0
+ Data[[d]] <-rbind(Data[[d]], 0)
+ }
+ }
diversities <- doDiv(Data, index=cl$options$index)
-
+
+ if(is.null(dim(diversities))){
+ diversities = t(as.data.frame(diversities))
+ rownames(diversities) = colnames(Data[[1]])[1] # Get sample name
+ } else{
+
+ }
+
out.div <- stdout()
if( length(cl$args) == 2 ) out.div <- cl$args[2]
my.write.table(diversities, file=out.div)
diff --git a/nodes/tools/statax_Rmodule/doHeatmap.R b/nodes/tools/statax_Rmodule/doHeatmap.R
index 43e5dec..317f4c5 100755
--- a/nodes/tools/statax_Rmodule/doHeatmap.R
+++ b/nodes/tools/statax_Rmodule/doHeatmap.R
@@ -1,13 +1,23 @@
#!/usr/bin/env Rscript
read.abundance.table <- function(filename) {
- d <- read.table(cl$args[1], row.names=1, sep='\t', as.is=TRUE)
+ d <- read.table(filename, row.names=1, sep='\t', as.is=TRUE)
if( row.names(d)[1] == "ID" ) {
names(d) <- d[1,]
- d <- d[-1,]
+ if(ncol(d) < 2){
+ d <- subset(d, row(d)>=2)
+ } else {
+ d <- d[-1,]
+ }
}
taxa.names <- rownames(d)
- d <- apply(d, 2, as.numeric)
+
+ if(ncol(d) < 2){
+ d[,1] <- as.numeric(d[,1])
+ } else {
+ d <- apply(d, 2, as.numeric)
+ }
+
rownames(d) <- taxa.names
return(d)
}
@@ -22,11 +32,12 @@ doHeatmap <- function(d, out.pdf=NULL, graph.title=NULL, taxon.title="Taxon",
taxa.order <- hclust(dist(d))$order
if( ncol(d)>2 ) {
samples.order <- hclust(dist(t(d)))$order
+ heatmap.data <- d[taxa.order, samples.order]
} else {
- samples.order <- seq_len(ncol(d))
+ # samples.order <- seq_len(ncol(d))
+ heatmap.data <- d[taxa.order,,drop=FALSE] # drop=FALSE to keep data.frame
}
-
- heatmap.data <- d[taxa.order, samples.order]
+
heatmap.data <- melt(t(heatmap.data), varnames=c("sample", taxon.title),
value.name="abundance")
diff --git a/nodes/tools/statax_Rmodule/doPcoa.R b/nodes/tools/statax_Rmodule/doPcoa.R
index 4d8b7d0..cb493b2 100755
--- a/nodes/tools/statax_Rmodule/doPcoa.R
+++ b/nodes/tools/statax_Rmodule/doPcoa.R
@@ -40,19 +40,6 @@ doPCoA <- function(d, # column names will be used for the distance
paste0(out.pcoa,".tsv"),
out.pcoa.tsv)
- if (nrow(d) <= 2) {
- msg <- c("Error: PCoA cannot be done with less than 3 columns",
- paste0("(Input file: ", cl$args[1], ")"))
- write(msg, file=stderr())
- if( !interactive() ) {
- pdf(out.pcoa.pdf)
- plot.new()
- text(0, c(1, .95), msg, adj=0, col='red')
- invisible(dev.off())
- write(msg, file=out.pcoa.tsv)
- quit(save="no", status=0)
- }
- }
require(ape, quietly=T)
suppressPackageStartupMessages(require(vegan, quietly=T))
@@ -121,6 +108,10 @@ doPCoA <- function(d, # column names will be used for the distance
legend(x = par("usr")[2], y = par("usr")[4], legend = legend.text,
pch=legend.pch, col=legend.out, pt.bg=legend.in, text.col=text.col,
bty='n', xpd=TRUE, xjust=0, cex=.8, pt.cex=1.5, ncol=legend.cols)
+
+# number_of_samples = 37 ## uncomment and replace XX with number of the samples or libs analyzed.
+# point.labels <- row.names(pcoa.result$vectors)[1:number_of_samples]
+# text(x=x.pcoa.r[1:number_of_samples], y=y.pcoa.r[1:number_of_samples], labels=point.labels, font=1, cex=0.2, pos=3, col="black")
if (!is.interactive) invisible(dev.off())
}
@@ -170,8 +161,24 @@ if( !interactive() ) {
option_list=option_list)
cl <- parse_args(Parser, positional_arguments=1)
+
if (cl$args == "-") cl$args <- "stdin"
d <- read.table(cl$args, row.names=1, sep='\t', as.is=TRUE)
+
+ if (ncol(d) <= 2 ) {
+ msg <- c("Error: PCoA cannot be done with less than 3 samples",
+ paste0("(Input file: ", cl$args[1], ")"))
+ write(msg, file=stderr())
+ if( !interactive() ) {
+ pdf(cl$options$pdf)
+ plot.new()
+ text(0, c(1, .95), msg, adj=0, col='red')
+ invisible(dev.off())
+ write(msg, file=cl$options$tsv)
+ quit(save="no", status=0)
+ }
+ }
+
if( row.names(d)[1] == "ID" ) {
names(d) <- d[1,]
d <- d[-1,]
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/metabit.git
More information about the debian-med-commit
mailing list