[med-svn] [Git][med-team/pigx-scrnaseq][upstream] New upstream version 1.1.7+ds
Steffen Möller
gitlab at salsa.debian.org
Sun Dec 13 23:10:08 GMT 2020
Steffen Möller pushed to branch upstream at Debian Med / pigx-scrnaseq
Commits:
bfa5c028 by Steffen Moeller at 2020-12-14T00:07:42+01:00
New upstream version 1.1.7+ds
- - - - -
6 changed files:
- Snake_Dropseq.py
- VERSION
- requirements.txt
- scripts/Sample_Sheet_Class.py
- + scripts/scrnaReport.Rmd
- scripts/scrnaReport.Rmd.in
Changes:
=====================================
Snake_Dropseq.py
=====================================
@@ -153,7 +153,6 @@ for sample_name in SAMPLE_NAMES:
read_file = os.path.join(PATH_MAPPED, sample_name, genome, sample_name + '_2.fastq.gz')
FILTER_READS.append(read_file)
-
# ----------------------------------------------------------------------------- #
# MAPPING
MAP_scRNA = expand(os.path.join(PATH_MAPPED, "{name}", "{genome}", "{name}_Aligned.out.bam"), genome = REFERENCE_NAMES, name = SAMPLE_NAMES)
@@ -211,11 +210,9 @@ RULE_ALL = RULE_ALL + [LINK_REFERENCE_PRIMARY, LINK_GTF_PRIMARY]
if len(COMBINE_REFERENCE) > 0:
RULE_ALL = RULE_ALL + COMBINE_REFERENCE
-#RULE_ALL = RULE_ALL + DICT + REFFLAT + MAKE_STAR_INDEX + FASTQC + MERGE_FASTQ_TO_BAM + MERGE_BAM_PER_SAMPLE + MAP_scRNA + BAM_HISTOGRAM + FIND_READ_CUTOFF + READS_MATRIX + UMI + READ_STATISTICS + BIGWIG + UMI_LOOM + COMBINED_UMI_MATRICES + SCE_RDS_FILES + SEURAT_RDS_FILES + REPORT_FILES
RULE_ALL = RULE_ALL + MAKE_STAR_INDEX + MERGE_TECHNICAL_REPLICATES + FILTER_READS + BAM_HISTOGRAM + FIND_CELL_NUMBER_CUTOFF + MAP_scRNA + SORT_BAM + INDEX_BAM + UMI_LOOM + COMBINED_LOOM_MATRICES + SCE_RDS_FILES + SEURAT_RDS_FILES + BIGWIG + READ_STATISTICS + REPORT_FILES
-
# ----------------------------------------------------------------------------- #
rule all:
input:
@@ -392,6 +389,7 @@ if GENOME_SECONDARY_IND:
# ----------------------------------------------------------------------------- #
def fetch_fastq_technical_replicates(wc):
sample_name = str(wc.name)
+
if(str(wc.type) == 'R1'):
input_fastq = SAMPLE_SHEET.fetch_barcode_path(sample_name)
@@ -400,6 +398,7 @@ def fetch_fastq_technical_replicates(wc):
if not input_fastq.__class__ == list:
input_fastq = list(input_fastq)
+
return(input_fastq)
@@ -659,6 +658,11 @@ rule map_star:
'--soloFeatures', str(params.features),
'--outSAMtype', 'BAM Unsorted',
'--readFilesCommand', str(params.zcat),
+ # CR/UR: raw (uncorrected) CellBarcode/UMI
+ # CY/UY: quality score for CellBarcode/UMI
+ # GX/GN: for gene ID/names
+ # sS/sQ: for sequence/quality combined CellBarcode and UMI; sM for barcode match status.
+ '--outSAMattributes NH HI nM AS CR UR GX GN sS sQ sM',
join_params("STAR", PARAMS, params.params_STAR),
'2>',str(log.logfile)
])
@@ -686,7 +690,7 @@ rule sort_bam:
run:
command = ' '.join([
- 'samtools sort',
+ params.samtools, 'sort',
'-@', str(params.threads),
'-o', str(output.outfile),
str(input.infile)
@@ -714,7 +718,7 @@ rule index_bam:
run:
command = ' '.join([
- 'samtools index',
+ params.samtools, 'index',
'-b',
'-@', str(params.threads),
str(input.infile),
=====================================
VERSION
=====================================
@@ -1 +1 @@
-1.1.6
+1.1.7
=====================================
requirements.txt
=====================================
@@ -1,7 +1,4 @@
STAR
-java
-picard
-Drop-seq_tools-1.12 #see software at http://mccarrolllab.com/dropseq/
snakemake=4.0.0
python=3.6*
@@ -11,8 +8,11 @@ pandas
yaml
snakemake
fastqc
+flexbar
+jellyfish
#R dependencies
+#argparser
#dplyr
#dropbead
#GenomicAlignments
@@ -24,6 +24,7 @@ fastqc
#rtracklayer
#stringr
#yaml
+#Seurat
#cowplot
#data.table
=====================================
scripts/Sample_Sheet_Class.py
=====================================
@@ -133,13 +133,13 @@ class experiment:
+
# ----------------------------------------------------------------------- #
# pivots the sample_sheet by sample_name to get unique technical replicates
def merge_technical_replicates(self):
- #print('Merging technical replicates ...')
# defines the sample descriptors
- sample_sheet = pd.pivot_table(self.SAMPLE_SHEET, index='sample_name', aggfunc=set)
+ sample_sheet = pd.pivot_table(self.SAMPLE_SHEET, index='sample_name', aggfunc=list)
sample_sheet['sample_name'] = sample_sheet.index
column_names = set(list(self.SAMPLE_SHEET.columns))
@@ -148,13 +148,12 @@ class experiment:
# checks whether all technical replicates have the same metadata
for cname in column_names:
cid = sample_sheet[cname]
- for i in cid:
- if(len(i) > 1):
- sys.exit('Technical replicates have different metadata')
+ if(len(set(list(cid)[0])) > 1):
+ sys.exit('Technical replicates have different metadata')
# names for merged files
sample_sheet['barcode_merged'] = sample_sheet['sample_name'] + '_R1.fastq.gz'
- sample_sheet['reads_merged'] = sample_sheet['sample_name'] + '_R2.fastq.gz'
+ sample_sheet['reads_merged'] = sample_sheet['sample_name'] + '_R2.fastq.gz'
# names for mapped reads
sample_sheet['mapped_reads'] = sample_sheet['sample_name'] + '.bam'
@@ -192,8 +191,13 @@ class experiment:
sys.exit('unindentified column selected: ' + str(column) + '\n')
field = sheet[sheet['sample_name'] == sample_name][column]
- if field[0].__class__ == set:
+
+ if field.__class__ == set:
field = list(field[0])
+
+ elif isinstance(field, pd.Series):
+ field = list(field)[0]
+
else:
field = str(field[0])
=====================================
scripts/scrnaReport.Rmd
=====================================
@@ -0,0 +1,213 @@
+---
+title: "PiGx-scRNAseq Report"
+author: "BIMSB Bioinformatics Platform"
+date: "`r date()`"
+output:
+ html_document:
+ df_print: paged
+params:
+ sceRds_file:
+ read_stats: NULL
+ output_file: 'test.scRNA.html'
+ workdir: './'
+ path_mapped: NULL
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = params$workdir)
+```
+
+```{r libraries, include=FALSE}
+suppressPackageStartupMessages({
+ library(cowplot)
+ library(SingleCellExperiment)
+ library(data.table)
+ library(dplyr)
+ library(pheatmap)
+ library(DT)
+ library(ggplot2)
+})
+```
+
+# Description
+
+PiGx scRNAseq
+
+This report was generated with PiGx-scRNAseq version 1.1.6.
+
+# Input Settings
+
+```{r printInputSettings}
+
+sceRds_file = params$sceRds_file
+workdir = params$workdir
+output_file = params$output_file
+
+inputParameterDesc = c('RDS format file containing a SingleCellExperiment object',
+ 'Working directory',
+ 'Path to HTML report output'
+ )
+inputParameterValues = c(sceRds_file,
+ workdir,
+ output_file)
+inputSettings = data.frame(parameters = inputParameterDesc,
+ values = inputParameterValues,
+ stringsAsFactors = FALSE)
+DT::datatable(data = inputSettings,
+ extensions = 'FixedColumns',
+ options = list(fixedColumns = TRUE,
+ scrollX = TRUE,
+ pageLength = length(inputParameterValues),
+ dom = 't'))
+```
+
+
+```{r read-sce-object, include=FALSE, echo=FALSE}
+sce = readRDS(file = sceRds_file)
+```
+
+```{r input, include=FALSE, echo=FALSE}
+read_statistics = do.call(rbind, lapply(params$read_stats, read.table, header=TRUE))
+```
+
+
+# Sample Statistics
+
+The following set of figures provide the basic statistics about the quality of the single cell data sets.
+
+### Total number of reads per sample
+
+The plot displays the numbers of mapped reads (in millions) for total (green bars) and uniquely-mapped UMI (orange bars) for each sample.
+
+```{r TotalReads, results='asis', echo = FALSE}
+read_statistics %>%
+ dplyr::filter(mapped %in% c('reads.total','map.uniq')) %>%
+ mutate(mapped = case_when(
+ mapped == 'reads.total' ~ 'total UMI',
+ mapped == 'map.uniq' ~ 'uniquely mapped UMI'
+ )) %>%
+ ggplot(aes(sample, cnts, fill=mapped)) +
+ geom_bar(stat='identity', position='dodge') +
+ xlab('Sample') +
+ ylab('Number of mapped reads (in milions)') +
+ scale_fill_brewer(name='Mapped reads', palette='Set2') +
+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
+```
+
+
+### Number of cell barcodes
+
+The figure shows the total number of reads mapping to exons per sample.
+
+```{r Read_Statistics}
+read_statistics %>%
+ filter(mapped == 'nCellBarcodes') %>%
+ filter(type == 'Gene') %>%
+ ggplot(aes(sample, cnts)) +
+ geom_bar(stat='identity') +
+ theme(axis.text.x=element_text(angle=45, hjust=1)) +
+ xlab('Sample Name') +
+ ylab('Number of detected cells') +
+ scale_fill_manual(values=c('black','lightgray'))
+```
+
+### Number of UMI
+
+The figure shows the total number of detected UMIs per sample.
+
+```{r exon_reads}
+read_statistics %>%
+ filter(mapped == 'nUMIs') %>%
+ ggplot(aes(sample, cnts)) +
+ geom_bar(stat='identity') +
+ theme(axis.text.x=element_text(angle=45, hjust=1)) +
+ xlab('Sample Name') +
+ ylab('Number of detected UMI') +
+ scale_fill_manual(values=c('black','lightgray'))
+```
+
+### Read Distribution in Genomic Features
+
+The plot displays the numbers of mapped reads (in millions) categorized by annotation (as indicated by the legend) for each sample.
+
+```{r ReadDistribution, results='asis', echo = FALSE}
+read_statistics %>%
+ mutate(type = as.character(type)) %>%
+ mutate(type = case_when(
+ type == 'GeneFull' ~ 'gene',
+ type == 'Gene' ~ 'exon',
+ TRUE ~ type
+ )) %>%
+ dplyr::filter(type %in% c('exon','gene','mapped')) %>%
+ dplyr::filter(mapped %in% c('map.total','nUMIs')) %>%
+ reshape2::dcast(sample ~ type, value.var='cnts') %>%
+ mutate(intron = gene - exon) %>%
+ mutate(other = mapped - gene) %>%
+ dplyr::select(sample, other, exon, intron) %>%
+ reshape2::melt() %>%
+ mutate(sample = factor(sample, ordered=TRUE, levels=unique(sample))) %>%
+ mutate(value = value * 1e-6) %>%
+ ggplot(aes(sample, value, fill=variable)) +
+ geom_bar(stat='identity') +
+ xlab('Sample') +
+ ylab('Number of mapped reads (in milions)') +
+ scale_fill_brewer(name = 'Annotation', palette='Set2') +
+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
+```
+
+
+```{r cell-stats}
+plotCellStats = function(df, x, y, label_x, label_y, title) {
+ p = ggplot(df, aes_string(x=x, y=y)) +
+ geom_boxplot() +
+ labs(x = label_x, y = label_y, title = title) +
+ theme(plot.title = element_text(hjust = 0.5),
+ axis.text.x = element_text(angle = 45, hjust = 1))
+ return(p)
+}
+
+plotDesc = list('nGene' = c('Sample',
+ 'Number of detected genes',
+ 'Number of detected genes per cell'),
+ 'max_gene' = c('Sample',
+ 'Maximum gene expression per cell',
+ 'Maximum gene expression'),
+ 'mean_expr' = c('Sample',
+ 'Average gene expression',
+ 'Average gene expression per cell\nfor genes with >0 UMI'))
+
+
+cellStatsPlots = lapply(names(plotDesc), function(y){
+ p = plotCellStats(
+ df = as.data.frame(colData(sce)),
+ x = 'sample_name',
+ y = y,
+ label_x = plotDesc[[y]][1],
+ label_y = plotDesc[[y]][2],
+ title = plotDesc[[y]][3])
+ return(p)
+})
+names(cellStatsPlots) = paste(lapply(plotDesc, function(x) x[2]))
+```
+
+### Cell Statistics{.tabset}
+
+These are boxplots of cell statistics.
+
+__Number of detected genes__ displays the distributions of numbers of detected genes per cell.
+__Maximum gene expression per cell__ displays the distributions of maximum gene expression per cell.
+__Average gene expression__ displays the distributions of average gene expression for genes with at least 1 UMI per cell.
+
+```{r cellStatsPlots, results='asis', echo = FALSE}
+for (i in 1:length(cellStatsPlots)) {
+ cat("### ",names(cellStatsPlots)[i],"\n")
+ print(cellStatsPlots[[i]])
+ cat('\n\n')
+}
+```
+
+# Session Information
+```{r sessionInfo}
+sessionInfo()
+```
=====================================
scripts/scrnaReport.Rmd.in
=====================================
@@ -103,8 +103,8 @@ The figure shows the total number of detected cells per sample.
```{r Read_Statistics}
read_statistics %>%
filter(mapped == 'nCellBarcodes') %>%
- filter(type == 'exon') %>%
- ggplot(aes(sample, cnts, fill=mapped)) +
+ filter(type == 'Gene') %>%
+ ggplot(aes(sample, cnts)) +
geom_bar(stat='identity') +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
xlab('Sample Name') +
@@ -119,8 +119,8 @@ The figure shows the total number of detected UMIs per sample.
```{r exon_reads}
read_statistics %>%
filter(mapped == 'nUMIs') %>%
- filter(type == 'exon') %>%
- ggplot(aes(sample, cnts, fill=mapped)) +
+ filter(type == 'Gene') %>%
+ ggplot(aes(sample, cnts)) +
geom_bar(stat='identity') +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
xlab('Sample Name') +
@@ -142,11 +142,11 @@ read_statistics %>%
)) %>%
dplyr::filter(type %in% c('exon','gene','mapped')) %>%
dplyr::filter(mapped %in% c('map.total','nUMIs')) %>%
- dcast(sample ~ type, value.var='cnts') %>%
+ reshape2::dcast(sample ~ type, value.var='cnts') %>%
mutate(intron = gene - exon) %>%
mutate(other = mapped - gene) %>%
dplyr::select(sample, other, exon, intron) %>%
- melt() %>%
+ reshape2::melt() %>%
mutate(sample = factor(sample, ordered=TRUE, levels=unique(sample))) %>%
mutate(value = value * 1e-6) %>%
ggplot(aes(sample, value, fill=variable)) +
@@ -164,7 +164,7 @@ plotCellStats = function(df, x, y, label_x, label_y, title) {
geom_boxplot() +
labs(x = label_x, y = label_y, title = title) +
theme(plot.title = element_text(hjust = 0.5),
- axis.text.x = element_text(angle = 90, hjust = 1))
+ axis.text.x = element_text(angle = 45, hjust = 1))
return(p)
}
View it on GitLab: https://salsa.debian.org/med-team/pigx-scrnaseq/-/commit/bfa5c0280f033e356fd31d35a63518e67ac6d2dd
--
View it on GitLab: https://salsa.debian.org/med-team/pigx-scrnaseq/-/commit/bfa5c0280f033e356fd31d35a63518e67ac6d2dd
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201213/6a7a44d7/attachment-0001.html>
More information about the debian-med-commit
mailing list