[med-svn] [Git][med-team/rna-star][upstream] New upstream version 2.7.5a+20200629git1552aa0

Steffen Möller gitlab at salsa.debian.org
Tue Jul 7 19:19:29 BST 2020



Steffen Möller pushed to branch upstream at Debian Med / rna-star


Commits:
0b30ea34 by Steffen Moeller at 2020-07-07T19:08:24+02:00
New upstream version 2.7.5a+20200629git1552aa0
- - - - -


12 changed files:

- CHANGES.md
- extras/docker/Dockerfile
- source/IncludeDefine.h
- source/ParametersSolo.cpp
- source/ReadAlign_alignBAM.cpp
- source/SoloFeature.h
- source/SoloFeature_collapseUMI.cpp
- source/SoloFeature_outputResults.cpp
- source/SoloFeature_processRecords.cpp
- source/SoloReadBarcode_getCBandUMI.cpp
- source/SuperTranscriptome.cpp
- source/VERSION


Changes:

=====================================
CHANGES.md
=====================================
@@ -1,3 +1,6 @@
+* Docker build: switched to debian:stable-slim in the Dockerfile.
+* --soloType CB_samTagOut now allows output of (uncorrected) UMI sequences and quality scores with SAM tags UR and UY.
+
 STAR 2.7.5a 2020/06/16
 ======================
 **Major new features:**


=====================================
extras/docker/Dockerfile
=====================================
@@ -1,4 +1,4 @@
-FROM debian:stretch-slim
+FROM debian:stable-slim
 
 MAINTAINER dobin at cshl.edu
 
@@ -11,6 +11,7 @@ RUN set -ex
 RUN apt-get update && \
     apt-get install -y --no-install-recommends ${PACKAGES} && \
     apt-get clean && \
+    g++ --version && \
     cd /home && \
     wget --no-check-certificate https://github.com/alexdobin/STAR/archive/${STAR_VERSION}.zip && \
     unzip ${STAR_VERSION}.zip && \


=====================================
source/IncludeDefine.h
=====================================
@@ -22,6 +22,7 @@
 #include <limits>
 #include <stdint.h>
 #include <omp.h>
+#include <math.h>
 
 #include "VERSION"
 


=====================================
source/ParametersSolo.cpp
=====================================
@@ -59,11 +59,15 @@ void ParametersSolo::initialize(Parameters *pPin)
         type=SoloTypes::CB_UMI_Complex;
         bL=0;
         cbumiL=0;
+
     } else if (typeStr=="CB_samTagOut") {
         type=SoloTypes::CB_samTagOut;
-        cbumiL=cbL;
+        cbumiL=cbL+umiL;
         if (bL==1)
-            bL=cbL;
+            bL=cbumiL;
+
+        barcodeStart=min(cbS,umiS)-1;
+        barcodeEnd=max(cbS+cbL,umiS+umiL)-2;
         
     } else if (typeStr=="SmartSeq") {
         type=SoloTypes::SmartSeq;     
@@ -339,6 +343,9 @@ void ParametersSolo::initialize(Parameters *pPin)
             errOut << "SOLUTION: re-run STAR with --outSAMtype BAM SortedByCoordinate ...\n";
             exitWithError(errOut.str(),std::cerr, pP->inOut->logMain, EXIT_CODE_PARAMETER, *pP);
         };
+    } else if ( pP->outSAMattrPresent.UB && type==SoloTypes::CB_samTagOut) {
+    	exitWithError("EXITING because of fatal PARAMETERS error: UB attribute (corrected UMI) in --outSAMattributes cannot be used with --soloType CB_samTagOut \n" \
+                      "SOLUTION: instead, use UR (uncorrected UMI) in --outSAMattributes\n",   std::cerr, pP->inOut->logMain, EXIT_CODE_PARAMETER, *pP);
     };
     
     readInfoYes.fill(false);


=====================================
source/ReadAlign_alignBAM.cpp
=====================================
@@ -393,12 +393,11 @@ int ReadAlign::alignBAM(Transcript const &trOut, uint nTrOut, uint iTrOut, uint
                         attrN+=bamAttrArrayWrite(soloRead->readBar->bQual,"sQ",attrOutArray+attrN);
                         break;
                         
-                    //following attributes are not processed here
                     case ATTR_CB:
-                        if (soloRead->readBar->cbSeqCorrected!="")
-                        attrN+=bamAttrArrayWrite(soloRead->readBar->cbSeqCorrected,"CB",attrOutArray+attrN);                        
+                        if (soloRead->readBar->cbSeqCorrected!="") //only if corrected CB is defined (e.g. CB_samTagOut), otherwise it will be added at the BAM sorting stage
+                        	attrN+=bamAttrArrayWrite(soloRead->readBar->cbSeqCorrected,"CB",attrOutArray+attrN);
                         break;
-                    case ATTR_UB:
+                    case ATTR_UB: //will be added at the BAM sorting stage
                         break;
                         
                     default:


=====================================
source/SoloFeature.h
=====================================
@@ -32,6 +32,7 @@ public:
     SoloReadBarcode *readBarSum;
 
     uint64 nReadsMapped, nCB, nReadsInput; //total number of mapped reads
+    uint32 featuresNumber; //number of features (i.e. genes, SJs, etc)
 
     uint32 *rGeneUMI;//mapped reads sorted by CB
     uint32 *indCB;//index of detected CBs in the whitelist


=====================================
source/SoloFeature_collapseUMI.cpp
=====================================
@@ -210,8 +210,8 @@ void SoloFeature::collapseUMI(uint32 iCB, uint32 *umiArray)
     //compact reads per gene
     uint32 gid1=-1;//current gID
     uint32 nGenes=0; //number of genes
-    uint32 *gID = new uint32[min(Trans.nGe,rN)+1]; //gene IDS
-    uint32 *gReadS = new uint32[min(Trans.nGe,rN)+1]; //start of gene reads TODO: allocate this array in the 2nd half of rGU
+    uint32 *gID = new uint32[min(featuresNumber,rN)+1]; //gene IDs
+    uint32 *gReadS = new uint32[min(featuresNumber,rN)+1]; //start of gene reads TODO: allocate this array in the 2nd half of rGU
     for (uint32 iR=0; iR<rN*rguStride; iR+=rguStride) {
         if (rGU[iR+rguG]!=gid1) {//record gene boundary
             gReadS[nGenes]=iR;


=====================================
source/SoloFeature_outputResults.cpp
=====================================
@@ -76,20 +76,8 @@ void SoloFeature::outputResults(bool cellFilterYes)
         umiOutCol={0,1,2};
 
     //header
-    uint32 featureN=0;
-    switch (featureType) {
-        case SoloFeatureTypes::Gene :
-        case SoloFeatureTypes::GeneFull :
-        case SoloFeatureTypes::Velocyto :
-        case SoloFeatureTypes::VelocytoSimple :            
-            featureN=Trans.nGe;//genes
-            break;
-        case SoloFeatureTypes::SJ :
-            featureN=P.sjAll[0].size();//sj
-            break;
-    };
     countMatrixStream <<"%%MatrixMarket matrix coordinate integer general\n%\n";
-    countMatrixStream << featureN <<' '<< (cellFilterYes ? filteredCells.nCells : pSolo.cbWLsize) <<' '<< nCellGeneEntries << '\n';
+    countMatrixStream << featuresNumber <<' '<< (cellFilterYes ? filteredCells.nCells : pSolo.cbWLsize) <<' '<< nCellGeneEntries << '\n';
     
     uint32  cbInd1=0;
     for (uint32 icb=0; icb<nCB; icb++) {


=====================================
source/SoloFeature_processRecords.cpp
=====================================
@@ -26,6 +26,17 @@ void SoloFeature::processRecords(ReadAlignChunk **RAchunk)
         P.inOut->logMain <<"Read splice junctions for Solo SJ feature: "<< P.sjAll[0].size() <<endl;
     };
     
+    //number of features
+    switch (featureType) {
+    	case SoloFeatureTypes::Gene :
+    	case SoloFeatureTypes::GeneFull :
+    	case SoloFeatureTypes::Velocyto :
+    		featuresNumber=Trans.nGe;
+    		break;
+    	case SoloFeatureTypes::SJ :
+    		featuresNumber=P.sjAll[0].size();
+	};
+
     sumThreads(RAchunk);
     
     


=====================================
source/SoloReadBarcode_getCBandUMI.cpp
=====================================
@@ -160,18 +160,34 @@ void SoloReadBarcode::getCBandUMI(const string &readNameExtra, const uint32 &rea
     bSeq=readNameExtra.substr(0,bLength);
     bQual=readNameExtra.substr(bLength+1,bLength);
 
-    if (pSolo.type==pSolo.SoloTypes::CB_UMI_Simple) {
+    if ( pSolo.type==pSolo.SoloTypes::CB_UMI_Simple ) {
         cbSeq=bSeq.substr(pSolo.cbS-1,pSolo.cbL);
         umiSeq=bSeq.substr(pSolo.umiS-1,pSolo.umiL);
         cbQual=bQual.substr(pSolo.cbS-1,pSolo.cbL);
         umiQual=bQual.substr(pSolo.umiS-1,pSolo.umiL);
         
-        if (!convertCheckUMI())
+        if (!convertCheckUMI()) {//UMI conversion
             return;
+        };
         
         matchCBtoWL(cbSeq, cbQual, pSolo.cbWL, cbMatch, cbMatchInd, cbMatchString);
-        
-    } else if (pSolo.type==pSolo.SoloTypes::CB_UMI_Complex) {
+
+
+    } else if ( pSolo.type==pSolo.SoloTypes::CB_samTagOut ) {//similar to CB_UMI_Simple, but no UMI, and define cbSeqCorrected
+        cbSeq=bSeq.substr(pSolo.cbS-1,pSolo.cbL);
+        umiSeq=bSeq.substr(pSolo.umiS-1,pSolo.umiL);
+        cbQual=bQual.substr(pSolo.cbS-1,pSolo.cbL);
+        umiQual=bQual.substr(pSolo.umiS-1,pSolo.umiL);
+
+        matchCBtoWL(cbSeq, cbQual, pSolo.cbWL, cbMatch, cbMatchInd, cbMatchString);
+
+        if ( cbMatch==0 || cbMatch==1 ) {
+        	cbSeqCorrected=pSolo.cbWLstr[cbMatchInd[0]];
+        } else {
+        	cbSeqCorrected="";
+        };
+
+    } else if ( pSolo.type==pSolo.SoloTypes::CB_UMI_Complex ) {
         
         cbSeq="";
         cbQual="";
@@ -236,18 +252,6 @@ void SoloReadBarcode::getCBandUMI(const string &readNameExtra, const uint32 &rea
             cbMatchString=to_string(cbMatchInd[0]);
         };
         
-    ///////////////////////////////////////////////////////////////////////////    
-    } else if (pSolo.type==pSolo.SoloTypes::CB_samTagOut) {
-        //CB only, on read 3
-        cbSeq=bSeq.substr(pSolo.cbS-1,pSolo.cbL);
-        cbQual=bQual.substr(pSolo.cbS-1,pSolo.cbL);
-        matchCBtoWL(cbSeq, cbQual, pSolo.cbWL, cbMatch, cbMatchInd, cbMatchString);
-        if (cbMatch==0 || cbMatch==1) {
-            cbSeqCorrected=pSolo.cbWLstr[cbMatchInd[0]];
-        } else {
-            cbSeqCorrected="";
-        };
-        
     ///////////////////////////////////////////////////////////////////////////    
     } else if (pSolo.type==pSolo.SoloTypes::SmartSeq) {        
         cbSeq=cbQual=cbSeqCorrected=""; //TODO make cbSeq=file label


=====================================
source/SuperTranscriptome.cpp
=====================================
@@ -18,12 +18,12 @@ void SuperTranscriptome::sjCollapse()
             sjCollapsed[ sj[i].super ].push_back({sj[i].start, sj[i].end});
     };
     
-    ofstream & superTrSJ = ofstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, P);
+    ofstream & superTrSJstream = ofstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, P);
     for(uint64 i = 0; i < sjCollapsed.size(); i++) {
         for(auto &sj1 : sjCollapsed[i])
-            superTrSJ << i <<"\t"<< sj1[0] <<"\t"<< sj1[1] << "\n";
+            superTrSJstream << i <<"\t"<< sj1[0] <<"\t"<< sj1[1] << "\n";
     };
-    superTrSJ.close(); 
+    superTrSJstream.close();
 
     P.inOut->logMain << "Number of splice junctions in superTranscripts = " << sj.size() <<endl;
     P.inOut->logMain << "Number of collapsed splice junctions in superTranscripts = " << sjCollapsed.size() <<endl;
@@ -38,7 +38,7 @@ void SuperTranscriptome::load(char *G, vector<uint64> &chrStart, vector<uint64>
         superTrs[ii].seqP=(uint8*)G+chrStart[ii];
     };
     
-    ifstream & superTrSJ = ifstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, "SOLUTION: re-generate the genome.", P);
+    ifstream & superTrSJstream = ifstrOpen(P.pGe.gDir+"/superTranscriptSJcollapsed.tsv", ERROR_OUT, "SOLUTION: re-generate the genome.", P);
     
     uint32 sutr=0,sutr1=0;
     vector<array<uint32,3>> sjC1;
@@ -48,7 +48,8 @@ void SuperTranscriptome::load(char *G, vector<uint64> &chrStart, vector<uint64>
     
     while(true) {//load junctions, assume they are sorted by donor coordinate
         
-        bool inGood = (superTrSJ >> sutr);
+        superTrSJstream >> sutr;
+        bool inGood = superTrSJstream.good();
         
         if (sutr!=sutr1 || !inGood) {//new suTr
             //sort sj1 by acceptor position
@@ -77,14 +78,14 @@ void SuperTranscriptome::load(char *G, vector<uint64> &chrStart, vector<uint64>
         };
         
         uint32 sjd, sja;
-        superTrSJ >> sjd >> sja;
+        superTrSJstream >> sjd >> sja;
         
         if (sjDonor1.empty() || sjDonor1.back() < sjd) 
             sjDonor1.push_back(sjd);
         
         sjC1.push_back({sjd,sja,(uint32)sjDonor1.size()-1});//record donor, acceptor, and position of the donor in sjDonor
     };
-    superTrSJ.close();
+    superTrSJstream.close();
     
     P.inOut->logMain << "Max number of splice junctions in a superTranscript = " << sjNmax <<endl;
     P.inOut->logMain << "Max number of donor sites in a superTranscript = " << sjDonorNmax <<endl;


=====================================
source/VERSION
=====================================
@@ -1 +1 @@
-#define STAR_VERSION "2.7.5a"
+#define STAR_VERSION "2.7.5a_2020-06-29"



View it on GitLab: https://salsa.debian.org/med-team/rna-star/-/commit/0b30ea3411d55dc874d4945a778afe191b8b8eec

-- 
View it on GitLab: https://salsa.debian.org/med-team/rna-star/-/commit/0b30ea3411d55dc874d4945a778afe191b8b8eec
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200707/36b04ce0/attachment-0001.html>


More information about the debian-med-commit mailing list