[med-svn] [Git][med-team/pigx-scrnaseq][master] 2 commits: New upstream version 1.1.7+ds

Sun Dec 13 23:10:02 GMT 2020


Steffen Möller pushed to branch master at Debian Med / pigx-scrnaseq


Commits:
bfa5c028 by Steffen Moeller at 2020-12-14T00:07:42+01:00
New upstream version 1.1.7+ds
- - - - -
41f15864 by Steffen Moeller at 2020-12-14T00:09:04+01:00
Update upstream source from tag 'upstream/1.1.7+ds'

Update to upstream version '1.1.7+ds'
with Debian dir d423d3374780638ef076dc25ba136f13540211ee

- - - - -


7 changed files:

- Snake_Dropseq.py
- VERSION
- debian/watch
- requirements.txt
- scripts/Sample_Sheet_Class.py
- + scripts/scrnaReport.Rmd
- scripts/scrnaReport.Rmd.in


Changes:

=====================================
Snake_Dropseq.py
=====================================
@@ -153,7 +153,6 @@ for sample_name in SAMPLE_NAMES:
         read_file    = os.path.join(PATH_MAPPED, sample_name, genome, sample_name + '_2.fastq.gz')
         FILTER_READS.append(read_file)
 
-
 # ----------------------------------------------------------------------------- #
 # MAPPING
 MAP_scRNA = expand(os.path.join(PATH_MAPPED, "{name}", "{genome}", "{name}_Aligned.out.bam"), genome = REFERENCE_NAMES, name = SAMPLE_NAMES)
@@ -211,11 +210,9 @@ RULE_ALL = RULE_ALL + [LINK_REFERENCE_PRIMARY, LINK_GTF_PRIMARY]
 if len(COMBINE_REFERENCE) > 0:
     RULE_ALL = RULE_ALL + COMBINE_REFERENCE
 
-#RULE_ALL = RULE_ALL + DICT + REFFLAT + MAKE_STAR_INDEX + FASTQC + MERGE_FASTQ_TO_BAM + MERGE_BAM_PER_SAMPLE + MAP_scRNA + BAM_HISTOGRAM + FIND_READ_CUTOFF + READS_MATRIX + UMI + READ_STATISTICS  + BIGWIG + UMI_LOOM + COMBINED_UMI_MATRICES + SCE_RDS_FILES + SEURAT_RDS_FILES + REPORT_FILES
 
 RULE_ALL = RULE_ALL + MAKE_STAR_INDEX + MERGE_TECHNICAL_REPLICATES + FILTER_READS + BAM_HISTOGRAM + FIND_CELL_NUMBER_CUTOFF + MAP_scRNA + SORT_BAM + INDEX_BAM + UMI_LOOM + COMBINED_LOOM_MATRICES + SCE_RDS_FILES + SEURAT_RDS_FILES + BIGWIG + READ_STATISTICS + REPORT_FILES
 
-
 # ----------------------------------------------------------------------------- #
 rule all:
     input:
@@ -392,6 +389,7 @@ if GENOME_SECONDARY_IND:
 # ----------------------------------------------------------------------------- #
 def fetch_fastq_technical_replicates(wc):
     sample_name = str(wc.name)
+
     if(str(wc.type) == 'R1'):
         input_fastq = SAMPLE_SHEET.fetch_barcode_path(sample_name)
 
@@ -400,6 +398,7 @@ def fetch_fastq_technical_replicates(wc):
 
     if not input_fastq.__class__ == list:
         input_fastq = list(input_fastq)
+
     return(input_fastq)
 
 
@@ -659,6 +658,11 @@ rule map_star:
             '--soloFeatures',    str(params.features),
             '--outSAMtype', 'BAM Unsorted',
             '--readFilesCommand', str(params.zcat),
+            # CR/UR: raw (uncorrected) CellBarcode/UMI
+            # CY/UY: quality score for CellBarcode/UMI
+            # GX/GN: for gene ID/names
+            # sS/sQ: for sequence/quality combined CellBarcode and UMI; sM for barcode match status.
+            '--outSAMattributes NH HI nM AS CR UR GX GN sS sQ sM',
             join_params("STAR", PARAMS, params.params_STAR),
             '2>',str(log.logfile)
         ])
@@ -686,7 +690,7 @@ rule sort_bam:
 
     run:
         command = ' '.join([
-            'samtools sort',
+            params.samtools, 'sort',
             '-@', str(params.threads),
             '-o', str(output.outfile),
             str(input.infile)
@@ -714,7 +718,7 @@ rule index_bam:
 
     run:
         command = ' '.join([
-            'samtools index',
+            params.samtools, 'index',
             '-b',
             '-@', str(params.threads),
             str(input.infile),


=====================================
VERSION
=====================================
@@ -1 +1 @@
-1.1.6
+1.1.7


=====================================
debian/watch
=====================================
@@ -1,4 +1,4 @@
 version=4
-opts="repack,repacksuffix=+ds,dversionmange=auto,filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%pigx-scrnaseq-$1.tar.gz%" \
+opts="repack,repacksuffix=+ds,dversionmangle=auto,filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%pigx-scrnaseq-$1.tar.gz%" \
    https://github.com/BIMSBbioinfo/pigx_scrnaseq/tags \
    (?:.*?/)?v?(\d[\d.]*)\.tar\.gz debian uupdate


=====================================
requirements.txt
=====================================
@@ -1,7 +1,4 @@
 STAR
-java
-picard
-Drop-seq_tools-1.12 #see software at http://mccarrolllab.com/dropseq/
 
 snakemake=4.0.0
 python=3.6*
@@ -11,8 +8,11 @@ pandas
 yaml
 snakemake
 fastqc
+flexbar
+jellyfish
 
 #R dependencies
+#argparser
 #dplyr
 #dropbead
 #GenomicAlignments
@@ -24,6 +24,7 @@ fastqc
 #rtracklayer
 #stringr
 #yaml
+#Seurat
 
 #cowplot
 #data.table


=====================================
scripts/Sample_Sheet_Class.py
=====================================
@@ -133,13 +133,13 @@ class experiment:
 
 
 
+
     # ----------------------------------------------------------------------- #
     # pivots the sample_sheet by sample_name to get unique technical replicates
     def merge_technical_replicates(self):
 
-        #print('Merging technical replicates ...')
         # defines the sample descriptors
-        sample_sheet = pd.pivot_table(self.SAMPLE_SHEET, index='sample_name', aggfunc=set)
+        sample_sheet = pd.pivot_table(self.SAMPLE_SHEET, index='sample_name', aggfunc=list)
         sample_sheet['sample_name'] = sample_sheet.index
 
         column_names = set(list(self.SAMPLE_SHEET.columns))
@@ -148,13 +148,12 @@ class experiment:
         # checks whether all technical replicates have the same metadata
         for cname in column_names:
             cid = sample_sheet[cname]
-            for i in cid:
-                if(len(i) > 1):
-                    sys.exit('Technical replicates have different metadata')
+            if(len(set(list(cid)[0])) > 1):
+                sys.exit('Technical replicates have different metadata')
 
         # names for merged files
         sample_sheet['barcode_merged'] = sample_sheet['sample_name'] + '_R1.fastq.gz'
-        sample_sheet['reads_merged'] = sample_sheet['sample_name'] + '_R2.fastq.gz'
+        sample_sheet['reads_merged']   = sample_sheet['sample_name'] + '_R2.fastq.gz'
 
         # names for mapped reads
         sample_sheet['mapped_reads'] = sample_sheet['sample_name'] + '.bam'
@@ -192,8 +191,13 @@ class experiment:
             sys.exit('unindentified column selected: ' + str(column) + '\n')
 
         field = sheet[sheet['sample_name'] == sample_name][column]
-        if field[0].__class__ == set:
+
+        if field.__class__ == set:
             field = list(field[0])
+
+        elif isinstance(field, pd.Series):
+            field = list(field)[0]
+
         else:
             field = str(field[0])
 


=====================================
scripts/scrnaReport.Rmd
=====================================
@@ -0,0 +1,213 @@
+---
+title: "PiGx-scRNAseq Report"
+author: "BIMSB Bioinformatics Platform"
+date: "`r date()`"
+output:
+  html_document:
+    df_print: paged
+params:
+  sceRds_file:
+  read_stats: NULL
+  output_file: 'test.scRNA.html'
+  workdir: './'
+  path_mapped: NULL
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
+knitr::opts_knit$set(root.dir = params$workdir)
+```
+
+```{r libraries, include=FALSE}
+suppressPackageStartupMessages({
+  library(cowplot)
+  library(SingleCellExperiment)
+  library(data.table)
+  library(dplyr)
+  library(pheatmap)
+  library(DT)
+  library(ggplot2)
+})
+```
+
+# Description
+
+PiGx scRNAseq
+
+This report was generated with PiGx-scRNAseq version 1.1.6.
+
+# Input Settings
+
+```{r printInputSettings}
+
+sceRds_file = params$sceRds_file
+workdir     = params$workdir
+output_file = params$output_file
+
+inputParameterDesc = c('RDS format file containing a SingleCellExperiment object',
+                     'Working directory',
+                     'Path to HTML report output'
+                     )
+inputParameterValues = c(sceRds_file,
+                          workdir,
+                          output_file)
+inputSettings = data.frame(parameters = inputParameterDesc,
+                            values = inputParameterValues,
+                            stringsAsFactors = FALSE)
+DT::datatable(data = inputSettings,
+              extensions = 'FixedColumns',
+              options = list(fixedColumns = TRUE,
+                         scrollX = TRUE,
+                         pageLength = length(inputParameterValues),
+                         dom = 't'))
+```
+
+
+```{r read-sce-object, include=FALSE, echo=FALSE}
+sce = readRDS(file = sceRds_file)
+```
+
+```{r input, include=FALSE, echo=FALSE}
+read_statistics = do.call(rbind, lapply(params$read_stats, read.table, header=TRUE))
+```
+
+
+# Sample Statistics
+
+The following set of figures provide the basic statistics about the quality of the single cell data sets.
+
+### Total number of reads per sample
+
+The plot displays the numbers of mapped reads (in millions) for total (green bars) and uniquely-mapped UMI (orange bars) for each sample.
+
+```{r TotalReads, results='asis', echo = FALSE}
+read_statistics %>%
+    dplyr::filter(mapped %in% c('reads.total','map.uniq')) %>%
+    mutate(mapped = case_when(
+        mapped == 'reads.total' ~ 'total UMI',
+        mapped == 'map.uniq'    ~ 'uniquely mapped UMI'
+    )) %>%
+    ggplot(aes(sample, cnts, fill=mapped)) +
+    geom_bar(stat='identity', position='dodge') +
+    xlab('Sample') +
+    ylab('Number of mapped reads (in milions)') +
+    scale_fill_brewer(name='Mapped reads', palette='Set2') +
+    theme(axis.text.x = element_text(angle = 45, hjust = 1))
+```
+
+
+### Number of cell barcodes
+
+The figure shows the total number of reads mapping to exons per sample.
+
+```{r Read_Statistics}
+read_statistics %>%
+  filter(mapped == 'nCellBarcodes') %>%
+  filter(type   ==  'Gene') %>%
+  ggplot(aes(sample, cnts)) +
+  geom_bar(stat='identity') +
+  theme(axis.text.x=element_text(angle=45, hjust=1)) +
+  xlab('Sample Name') +
+  ylab('Number of detected cells') +
+  scale_fill_manual(values=c('black','lightgray'))
+```
+
+### Number of UMI
+
+The figure shows the total number of detected UMIs per sample.
+
+```{r exon_reads}
+read_statistics %>%
+  filter(mapped == 'nUMIs') %>%
+  ggplot(aes(sample, cnts)) +
+  geom_bar(stat='identity') +
+  theme(axis.text.x=element_text(angle=45, hjust=1)) +
+  xlab('Sample Name') +
+  ylab('Number of detected UMI') +
+  scale_fill_manual(values=c('black','lightgray'))
+```
+
+### Read Distribution in Genomic Features
+
+The plot displays the numbers of mapped reads (in millions) categorized by annotation (as indicated by the legend) for each sample.
+
+```{r ReadDistribution, results='asis', echo = FALSE}
+read_statistics %>%
+  mutate(type = as.character(type)) %>%
+  mutate(type  = case_when(
+    type == 'GeneFull' ~ 'gene',
+    type == 'Gene'     ~ 'exon',
+    TRUE ~ type
+  )) %>%
+    dplyr::filter(type   %in% c('exon','gene','mapped')) %>%
+    dplyr::filter(mapped %in% c('map.total','nUMIs')) %>%
+    reshape2::dcast(sample ~ type, value.var='cnts')  %>%
+    mutate(intron = gene - exon) %>%
+    mutate(other  = mapped - gene) %>%
+    dplyr::select(sample, other, exon, intron) %>%
+    reshape2::melt() %>%
+    mutate(sample = factor(sample, ordered=TRUE, levels=unique(sample))) %>%
+    mutate(value = value * 1e-6) %>%
+    ggplot(aes(sample, value, fill=variable)) +
+    geom_bar(stat='identity') +
+    xlab('Sample') +
+    ylab('Number of mapped reads (in milions)') +
+    scale_fill_brewer(name = 'Annotation', palette='Set2') +
+    theme(axis.text.x = element_text(angle = 45, hjust = 1))
+```
+
+
+```{r cell-stats}
+plotCellStats = function(df, x, y, label_x, label_y, title) {
+  p = ggplot(df, aes_string(x=x, y=y)) +
+  geom_boxplot() +
+  labs(x = label_x, y = label_y, title = title) +
+  theme(plot.title = element_text(hjust = 0.5),
+        axis.text.x = element_text(angle = 45, hjust = 1))
+  return(p)
+}
+
+plotDesc = list('nGene' = c('Sample',
+                          'Number of detected genes',
+                          'Number of detected genes per cell'),
+              'max_gene' = c('Sample',
+                             'Maximum gene expression per cell',
+                             'Maximum gene expression'),
+              'mean_expr' = c('Sample',
+                              'Average gene expression',
+                              'Average gene expression per cell\nfor genes with >0 UMI'))
+
+
+cellStatsPlots = lapply(names(plotDesc), function(y){
+  p = plotCellStats(
+    df      = as.data.frame(colData(sce)),
+    x       = 'sample_name',
+    y       = y,
+    label_x = plotDesc[[y]][1],
+    label_y = plotDesc[[y]][2],
+    title   = plotDesc[[y]][3])
+  return(p)
+})
+names(cellStatsPlots) = paste(lapply(plotDesc, function(x) x[2]))
+```
+
+### Cell Statistics{.tabset}
+
+These are boxplots of cell statistics.
+
+__Number of detected genes__ displays the distributions of numbers of detected genes per cell.
+__Maximum gene expression per cell__ displays the distributions of maximum gene expression per cell.
+__Average gene expression__ displays the distributions of average gene expression for genes with at least 1 UMI per cell.
+
+```{r cellStatsPlots, results='asis', echo = FALSE}
+for (i in 1:length(cellStatsPlots)) {
+  cat("### ",names(cellStatsPlots)[i],"\n")
+  print(cellStatsPlots[[i]])
+  cat('\n\n')
+}
+```
+
+# Session Information
+```{r sessionInfo}
+sessionInfo()
+```


=====================================
scripts/scrnaReport.Rmd.in
=====================================
@@ -103,8 +103,8 @@ The figure shows the total number of detected cells per sample.
 ```{r Read_Statistics}
 read_statistics %>%
   filter(mapped == 'nCellBarcodes') %>%
-  filter(type   ==  'exon') %>%
-  ggplot(aes(sample, cnts, fill=mapped)) +
+  filter(type   ==  'Gene') %>%
+  ggplot(aes(sample, cnts)) +
   geom_bar(stat='identity') +
   theme(axis.text.x=element_text(angle=45, hjust=1)) +
   xlab('Sample Name') +
@@ -119,8 +119,8 @@ The figure shows the total number of detected UMIs per sample.
 ```{r exon_reads}
 read_statistics %>%
   filter(mapped == 'nUMIs') %>%
-  filter(type   ==  'exon') %>%
-  ggplot(aes(sample, cnts, fill=mapped)) +
+  filter(type   ==  'Gene') %>%
+  ggplot(aes(sample, cnts)) +
   geom_bar(stat='identity') +
   theme(axis.text.x=element_text(angle=45, hjust=1)) +
   xlab('Sample Name') +
@@ -142,11 +142,11 @@ read_statistics %>%
   )) %>%
     dplyr::filter(type   %in% c('exon','gene','mapped')) %>%
     dplyr::filter(mapped %in% c('map.total','nUMIs')) %>%
-    dcast(sample ~ type, value.var='cnts')  %>%
+    reshape2::dcast(sample ~ type, value.var='cnts')  %>%
     mutate(intron = gene - exon) %>%
     mutate(other  = mapped - gene) %>%
     dplyr::select(sample, other, exon, intron) %>%
-    melt() %>%
+    reshape2::melt() %>%
     mutate(sample = factor(sample, ordered=TRUE, levels=unique(sample))) %>%
     mutate(value = value * 1e-6) %>%
     ggplot(aes(sample, value, fill=variable)) +
@@ -164,7 +164,7 @@ plotCellStats = function(df, x, y, label_x, label_y, title) {
   geom_boxplot() +
   labs(x = label_x, y = label_y, title = title) +
   theme(plot.title = element_text(hjust = 0.5),
-        axis.text.x = element_text(angle = 90, hjust = 1))
+        axis.text.x = element_text(angle = 45, hjust = 1))
   return(p)
 }
 



View it on GitLab: https://salsa.debian.org/med-team/pigx-scrnaseq/-/compare/362f491016e0918f990ad7f8a07875e186575c79...41f15864581e0f5e1efc7e94c5de8e33031b46e2

-- 
View it on GitLab: https://salsa.debian.org/med-team/pigx-scrnaseq/-/compare/362f491016e0918f990ad7f8a07875e186575c79...41f15864581e0f5e1efc7e94c5de8e33031b46e2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201213/8bf08af1/attachment-0001.html>