[med-svn] [Git][med-team/rna-star][upstream] New upstream version 2.7.5a+20200629git1552aa0
Steffen Möller
gitlab at salsa.debian.org
Tue Jul 7 19:19:29 BST 2020
Steffen Möller pushed to branch upstream at Debian Med / rna-star
Commits:
0b30ea34 by Steffen Moeller at 2020-07-07T19:08:24+02:00
New upstream version 2.7.5a+20200629git1552aa0
- - - - -
12 changed files:
- CHANGES.md
- extras/docker/Dockerfile
- source/IncludeDefine.h
- source/ParametersSolo.cpp
- source/ReadAlign_alignBAM.cpp
- source/SoloFeature.h
- source/SoloFeature_collapseUMI.cpp
- source/SoloFeature_outputResults.cpp
- source/SoloFeature_processRecords.cpp
- source/SoloReadBarcode_getCBandUMI.cpp
- source/SuperTranscriptome.cpp
- source/VERSION
Changes:
=====================================
CHANGES.md
=====================================
@@ -1,3 +1,6 @@
+* Docker build: switched to debian:stable-slim in the Dockerfile.
+* --soloType CB_samTagOut now allows output of (uncorrected) UMI sequences and quality scores with SAM tags UR and UY.
+
STAR 2.7.5a 2020/06/16
======================
**Major new features:**
=====================================
extras/docker/Dockerfile
=====================================
@@ -1,4 +1,4 @@
-FROM debian:stretch-slim
+FROM debian:stable-slim
MAINTAINER dobin at cshl.edu
@@ -11,6 +11,7 @@ RUN set -ex
RUN apt-get update && \
apt-get install -y --no-install-recommends ${PACKAGES} && \
apt-get clean && \
+ g++ --version && \
cd /home && \
wget --no-check-certificate https://github.com/alexdobin/STAR/archive/${STAR_VERSION}.zip && \
unzip ${STAR_VERSION}.zip && \
=====================================
source/IncludeDefine.h
=====================================
@@ -22,6 +22,7 @@
#include <limits>
#include <stdint.h>
#include <omp.h>
+#include <math.h>
#include "VERSION"
=====================================
source/ParametersSolo.cpp
=====================================
@@ -59,11 +59,15 @@ void ParametersSolo::initialize(Parameters *pPin)
type=SoloTypes::CB_UMI_Complex;
bL=0;
cbumiL=0;
+
} else if (typeStr=="CB_samTagOut") {
type=SoloTypes::CB_samTagOut;
- cbumiL=cbL;
+ cbumiL=cbL+umiL;
if (bL==1)
- bL=cbL;
+ bL=cbumiL;
+
+ barcodeStart=min(cbS,umiS)-1;
+ barcodeEnd=max(cbS+cbL,umiS+umiL)-2;
} else if (typeStr=="SmartSeq") {
type=SoloTypes::SmartSeq;
@@ -339,6 +343,9 @@ void ParametersSolo::initialize(Parameters *pPin)
errOut << "SOLUTION: re-run STAR with --outSAMtype BAM SortedByCoordinate ...\n";
exitWithError(errOut.str(),std::cerr, pP->inOut->logMain, EXIT_CODE_PARAMETER, *pP);
};
+ } else if ( pP->outSAMattrPresent.UB && type==SoloTypes::CB_samTagOut) {
+ exitWithError("EXITING because of fatal PARAMETERS error: UB attribute (corrected UMI) in --outSAMattributes cannot be used with --soloType CB_samTagOut \n" \
+ "SOLUTION: instead, use UR (uncorrected UMI) in --outSAMattributes\n", std::cerr, pP->inOut->logMain, EXIT_CODE_PARAMETER, *pP);
};
readInfoYes.fill(false);
=====================================
source/ReadAlign_alignBAM.cpp
=====================================
@@ -393,12 +393,11 @@ int ReadAlign::alignBAM(Transcript const &trOut, uint nTrOut, uint iTrOut, uint
attrN+=bamAttrArrayWrite(soloRead->readBar->bQual,"sQ",attrOutArray+attrN);
break;
- //following attributes are not processed here
case ATTR_CB:
- if (soloRead->readBar->cbSeqCorrected!="")
- attrN+=bamAttrArrayWrite(soloRead->readBar->cbSeqCorrected,"CB",attrOutArray+attrN);
+ if (soloRead->readBar->cbSeqCorrected!="") //only if corrected CB is defined (e.g. CB_samTagOut), otherwise it will be added at the BAM sorting stage
+ attrN+=bamAttrArrayWrite(soloRead->readBar->cbSeqCorrected,"CB",attrOutArray+attrN);
break;
- case ATTR_UB:
+ case ATTR_UB: //will be added at the BAM sorting stage
break;
default:
=====================================
source/SoloFeature.h
=====================================
@@ -32,6 +32,7 @@ public:
SoloReadBarcode *readBarSum;
uint64 nReadsMapped, nCB, nReadsInput; //total number of mapped reads
+ uint32 featuresNumber; //number of features (i.e. genes, SJs, etc)
uint32 *rGeneUMI;//mapped reads sorted by CB
uint32 *indCB;//index of detected CBs in the whitelist
=====================================
source/SoloFeature_collapseUMI.cpp
=====================================
@@ -210,8 +210,8 @@ void SoloFeature::collapseUMI(uint32 iCB, uint32 *umiArray)
//compact reads per gene
uint32 gid1=-1;//current gID
uint32 nGenes=0; //number of genes
- uint32 *gID = new uint32[min(Trans.nGe,rN)+1]; //gene IDS
- uint32 *gReadS = new uint32[min(Trans.nGe,rN)+1]; //start of gene reads TODO: allocate this array in the 2nd half of rGU
+ uint32 *gID = new uint32[min(featuresNumber,rN)+1]; //gene IDs
+ uint32 *gReadS = new uint32[min(featuresNumber,rN)+1]; //start of gene reads TODO: allocate this array in the 2nd half of rGU
for (uint32 iR=0; iR<rN*rguStride; iR+=rguStride) {
if (rGU[iR+rguG]!=gid1) {//record gene boundary
gReadS[nGenes]=iR;
=====================================
source/SoloFeature_outputResults.cpp
=====================================
@@ -76,20 +76,8 @@ void SoloFeature::outputResults(bool cellFilterYes)
umiOutCol={0,1,2};
//header
- uint32 featureN=0;
- switch (featureType) {
- case SoloFeatureTypes::Gene :
- case SoloFeatureTypes::GeneFull :
- case SoloFeatureTypes::Velocyto :
- case SoloFeatureTypes::VelocytoSimple :
- featureN=Trans.nGe;//genes
- break;
- case SoloFeatureTypes::SJ :
- featureN=P.sjAll[0].size();//sj
- break;
- };
countMatrixStream <<"%%MatrixMarket matrix coordinate integer general\n%\n";
- countMatrixStream << featureN <<' '<< (cellFilterYes ? filteredCells.nCells : pSolo.cbWLsize) <<' '<< nCellGeneEntries << '\n';
+ countMatrixStream << featuresNumber <<' '<< (cellFilterYes ? filteredCells.nCells : pSolo.cbWLsize) <<' '<< nCellGeneEntries << '\n';
uint32 cbInd1=0;
for (uint32 icb=0; icb<nCB; icb++) {
=====================================
source/SoloFeature_processRecords.cpp
=====================================
@@ -26,6 +26,17 @@ void SoloFeature::processRecords(ReadAlignChunk **RAchunk)
P.inOut->logMain <<"Read splice junctions for Solo SJ feature: "<< P.sjAll[0].size() <<endl;
};
+ //number of features
+ switch (featureType) {
+ case SoloFeatureTypes::Gene :
+ case SoloFeatureTypes::GeneFull :
+ case SoloFeatureTypes::Velocyto :
+ featuresNumber=Trans.nGe;
+ break;
+ case SoloFeatureTypes::SJ :
+ featuresNumber=P.sjAll[0].size();
+ };
+
sumThreads(RAchunk);
=====================================
source/SoloReadBarcode_getCBandUMI.cpp
=====================================
@@ -160,18 +160,34 @@ void SoloReadBarcode::getCBandUMI(const string &readNameExtra, const uint32 &rea
bSeq=readNameExtra.substr(0,bLength);
bQual=readNameExtra.substr(bLength+1,bLength);
- if (pSolo.type==pSolo.SoloTypes::CB_UMI_Simple) {
+ if ( pSolo.type==pSolo.SoloTypes::CB_UMI_Simple ) {
cbSeq=bSeq.substr(pSolo.cbS-1,pSolo.cbL);
umiSeq=bSeq.substr(pSolo.umiS-1,pSolo.umiL);
cbQual=bQual.substr(pSolo.cbS-1,pSolo.cbL);
umiQual=bQual.substr(pSolo.umiS-1,pSolo.umiL);
- if (!convertCheckUMI())
+ if (!convertCheckUMI()) {//UMI conversion
return;
+ };
matchCBtoWL(cbSeq, cbQual, pSolo.cbWL, cbMatch, cbMatchInd, cbMatchString);
-
- } else if (pSolo.type==pSolo.SoloTypes::CB_UMI_Complex) {
+
+
+ } else if ( pSolo.type==pSolo.SoloTypes::CB_samTagOut ) {//similar to CB_UMI_Simple, but no UMI, and define cbSeqCorrected
+ cbSeq=bSeq.substr(pSolo.cbS-1,pSolo.cbL);
+ umiSeq=bSeq.substr(pSolo.umiS-1,pSolo.umiL);
+ cbQual=bQual.substr(pSolo.cbS-1,pSolo.cbL);
+ umiQual=bQual.substr(pSolo.umiS-1,pSolo.umiL);
+
+ matchCBtoWL(cbSeq, cbQual, pSolo.cbWL, cbMatch, cbMatchInd, cbMatchString);
+
+ if ( cbMatch==0 || cbMatch==1 ) {
+ cbSeqCorrected=pSolo.cbWLstr[cbMatchInd[0]];
+ } else {
+ cbSeqCorrected="";
+ };
+
+ } else if ( pSolo.type==pSolo.SoloTypes::CB_UMI_Complex ) {
cbSeq="";
cbQual="";
@@ -236,18 +252,6 @@ void SoloReadBarcode::getCBandUMI(const string &readNameExtra, const uint32 &rea
cbMatchString=to_string(cbMatchInd[0]);
};
- ///////////////////////////////////////////////////////////////////////////
- } else if (pSolo.type==pSolo.SoloTypes::CB_samTagOut) {
- //CB only, on read 3
- cbSeq=bSeq.substr(pSolo.cbS-1,pSolo.cbL);
- cbQual=bQual.substr(pSolo.cbS-1,pSolo.cbL);
- matchCBtoWL(cbSeq, cbQual, pSolo.cbWL, cbMatch, cbMatchInd, cbMatchString);
- if (cbMatch==0 || cbMatch==1) {
- cbSeqCorrected=pSolo.cbWLstr[cbMatchInd[0]];
- } else {
- cbSeqCorrected="";
- };
-
///////////////////////////////////////////////////////////////////////////
} else if (pSolo.type==pSolo.SoloTypes::SmartSeq) {
cbSeq=cbQual=cbSeqCorrected=""; //TODO make cbSeq=file label
=====================================
source/SuperTranscriptome.cpp
=====================================
@@ -18,12 +18,12 @@ void SuperTranscriptome::sjCollapse()
sjCollapsed[ sj[i].super ].push_back({sj[i].start, sj[i].end});
};
- ofstream & superTrSJ = ofstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, P);
+ ofstream & superTrSJstream = ofstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, P);
for(uint64 i = 0; i < sjCollapsed.size(); i++) {
for(auto &sj1 : sjCollapsed[i])
- superTrSJ << i <<"\t"<< sj1[0] <<"\t"<< sj1[1] << "\n";
+ superTrSJstream << i <<"\t"<< sj1[0] <<"\t"<< sj1[1] << "\n";
};
- superTrSJ.close();
+ superTrSJstream.close();
P.inOut->logMain << "Number of splice junctions in superTranscripts = " << sj.size() <<endl;
P.inOut->logMain << "Number of collapsed splice junctions in superTranscripts = " << sjCollapsed.size() <<endl;
@@ -38,7 +38,7 @@ void SuperTranscriptome::load(char *G, vector<uint64> &chrStart, vector<uint64>
superTrs[ii].seqP=(uint8*)G+chrStart[ii];
};
- ifstream & superTrSJ = ifstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, "SOLUTION: re-generate the genome.", P);
+ ifstream & superTrSJstream = ifstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, "SOLUTION: re-generate the genome.", P);
uint32 sutr=0,sutr1=0;
vector<array<uint32,3>> sjC1;
@@ -48,7 +48,8 @@ void SuperTranscriptome::load(char *G, vector<uint64> &chrStart, vector<uint64>
while(true) {//load junctions, assume they are sorted by donor coordinate
- bool inGood = (superTrSJ >> sutr);
+ superTrSJstream >> sutr;
+ bool inGood = superTrSJstream.good();
if (sutr!=sutr1 || !inGood) {//new suTr
//sort sj1 by acceptor position
@@ -77,14 +78,14 @@ void SuperTranscriptome::load(char *G, vector<uint64> &chrStart, vector<uint64>
};
uint32 sjd, sja;
- superTrSJ >> sjd >> sja;
+ superTrSJstream >> sjd >> sja;
if (sjDonor1.empty() || sjDonor1.back() < sjd)
sjDonor1.push_back(sjd);
sjC1.push_back({sjd,sja,(uint32)sjDonor1.size()-1});//record donor, acceptor, and position of the donor in sjDonor
};
- superTrSJ.close();
+ superTrSJstream.close();
P.inOut->logMain << "Max number of splice junctions in a superTranscript = " << sjNmax <<endl;
P.inOut->logMain << "Max number of donor sites in a superTranscript = " << sjDonorNmax <<endl;
=====================================
source/VERSION
=====================================
@@ -1 +1 @@
-#define STAR_VERSION "2.7.5a"
+#define STAR_VERSION "2.7.5a_2020-06-29"
View it on GitLab: https://salsa.debian.org/med-team/rna-star/-/commit/0b30ea3411d55dc874d4945a778afe191b8b8eec
--
View it on GitLab: https://salsa.debian.org/med-team/rna-star/-/commit/0b30ea3411d55dc874d4945a778afe191b8b8eec
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200707/36b04ce0/attachment-0001.html>
More information about the debian-med-commit
mailing list