[med-svn] [Git][med-team/libgclib][upstream] New upstream version 0.11.8

Michael R. Crusoe gitlab at salsa.debian.org
Fri Apr 17 09:03:53 BST 2020



Michael R. Crusoe pushed to branch upstream at Debian Med / gclib


Commits:
d10eacb5 by Michael R. Crusoe at 2020-04-17T09:39:12+02:00
New upstream version 0.11.8
- - - - -


7 changed files:

- GArgs.cpp
- GBam.cpp
- GBam.h
- GBase.h
- GBitVec.h
- GList.hh
- gff.cpp


Changes:

=====================================
GArgs.cpp
=====================================
@@ -172,6 +172,9 @@ int GArgs::parseArgs(bool nodigitopts) {
                    //value is the next argument
                    if (p+1<_argc && _argv[p+1][0]!=0) {
                       p++;
+					#if defined(__APPLE__) && defined(DEBUG)
+                      dbg_dequote(_argv[p]);
+   	   	   	   	   	#endif
                       args[count].value=Gstrdup(_argv[p]);
                       }
                     else {


=====================================
GBam.cpp
=====================================
@@ -374,6 +374,16 @@ void GBamRecord::setupCoordinates() {
    return 0;
   }
 
+ char GBamRecord::tag_char1(const char tag[2]) { //just the first char from Z type tags
+	uint8_t* s=bam_aux_get(this->b, tag);
+	if (s==NULL) return 0;
+ 	int type;
+ 	type = *s++;
+ 	if (s == 0) return 0;
+ 	if (type == 'A' || type == 'Z') return *(char*)s;
+ 	else return 0;
+ }
+
  int GBamRecord::tag_int(const char tag[2]) { //get the numeric value of tag
    uint8_t *s=find_tag(tag);
    if (s) return ( bam_aux2i(s) );
@@ -393,10 +403,10 @@ void GBamRecord::setupCoordinates() {
    }
 
  char GBamRecord::spliceStrand() { // '+', '-' from the XS tag, or 0 if no XS tag
-   char c=tag_char("XS");
+   char c=tag_char1("XS");
    if (c==0) {
     //try minimap2's "ts" tag
-    char m=tag_char("ts");
+    char m=tag_char1("ts");
     if (m=='+' || m=='-') {
        if ((this->b->core.flag & BAM_FREVERSE) != 0) c=((m=='+') ? '-' : '+');
          else c=m;


=====================================
GBam.h
=====================================
@@ -194,6 +194,7 @@ class GBamRecord: public GSeg {
  int tag_int(const char tag[2]); //return numeric value of tag (for numeric types)
  float tag_float(const char tag[2]); //return float value of tag (for float types)
  char tag_char(const char tag[2]); //return char value of tag (for type 'A')
+ char tag_char1(const char tag[2]); //return char value of tag (for type 'A')
  char spliceStrand(); // '+', '-' from the XS tag, or '.' if no XS tag
 
  char* sequence(); //user should free after use


=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
 #ifndef G_BASE_DEFINED
 #define G_BASE_DEFINED
-#define GCLIB_VERSION "0.11.4"
+#define GCLIB_VERSION "0.11.8"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"


=====================================
GBitVec.h
=====================================
@@ -153,7 +153,7 @@ public:
 
 
   void bitSizeError() {
-    GError("Error at GBitVec: unsupported BitWord size (%d)!\n", 
+    GError("Error at GBitVec: unsupported BitWord size (%d)!\n",
         sizeof(BitWord));
     }
   /// count - Returns the number of bits which are set.
@@ -277,6 +277,9 @@ public:
   }
 
   GBitVec &set(uint Idx) {
+#ifndef NDEBUG
+	  indexCheck(Idx, Size);
+#endif
     fBits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
     return *this;
   }
@@ -287,6 +290,9 @@ public:
   }
 
   GBitVec &reset(uint Idx) {
+#ifndef NDEBUG
+	  indexCheck(Idx, Size);
+#endif
     fBits[Idx / BITWORD_SIZE] &= ~(1L << (Idx % BITWORD_SIZE));
     return *this;
   }
@@ -299,6 +305,9 @@ public:
   }
 
   GBitVec &flip(uint Idx) {
+#ifndef NDEBUG
+	  indexCheck(Idx, Size);
+#endif
     fBits[Idx / BITWORD_SIZE] ^= 1L << (Idx % BITWORD_SIZE);
     return *this;
   }
@@ -310,7 +319,7 @@ public:
 
   inline static void indexCheck(uint vIdx, uint vSize) {
     if (vIdx >= vSize)
-      GError("Error at GBitVec: index %d out of bounds (size %d)\n", 
+      GError("Error at GBitVec: index %d out of bounds (size %d)\n",
         (int)vIdx, vSize);
    }
 
@@ -439,7 +448,7 @@ private:
 
     //  Then set any stray high bits of the last used word.
     uint ExtraBits = Size % BITWORD_SIZE;
-    
+
     if (ExtraBits) {
       BitWord ExtraBitMask = ~0UL << ExtraBits;
       if (value)


=====================================
GList.hh
=====================================
@@ -481,7 +481,7 @@ template <class OBJ> int GList<OBJ>::Add(OBJ* item) {
  return result;
 }
 
-//by default, it deletes the item if it has an equal in the list!
+//by default, it deletes item if it an equal is found in the list!
 //returns the existing equal (==) object if it's in the list already
 //or returns the item itself if it's unique (and adds it)
 template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
@@ -490,21 +490,20 @@ template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
  if (Found(item, r)) {
     if (deleteIfFound && (pointer)item != (pointer)(this->fList[r])) {
        this->deallocate_item(item);
-       }
+    }
     if (fidx!=NULL) *fidx=r;
     return this->fList[r]; //found
-    }
+ }
  //not found:
  if (SORTED) {
    //Found() set result to the position where the item should be inserted:
    sortInsert(r, item);
-   }
-  else {
+ } else {
    r = this->fCount;
    if (r==this->fCapacity) GPVec<OBJ>::Grow();
    this->fList[r]=item;
    this->fCount++;
-   }
+ }
  if (fidx!=NULL) *fidx=r;
  return item;
 }


=====================================
gff.cpp
=====================================
@@ -3201,7 +3201,6 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
 		}
 		//-- single-exon qry overlaping multi-exon ref
 		//check full pre-mRNA case (all introns retained): code 'm'
-
 		if (m.start<=r.exons[0]->end && m.end>=r.exons[jmax]->start)
 			return 'm';
 
@@ -3272,27 +3271,32 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
 	//check intron chain overlap (match, containment, intron retention etc.)
 	int i=1; //index of exon to the right of current qry intron
 	int j=1; //index of exon to the right of current ref intron
-	bool intron_conflict=false; //used for checking for retained introns
+	bool intron_conflict=false; //overlapping introns have at least a mismatching splice site
 	//from here on we check all qry introns against ref introns
 	bool junct_match=false; //true if at least a junction match is found
-	bool ichain_match=true; //if there is intron (sub-)chain match, to be updated by any mismatch
+	bool ichain_match=false; //if there is intron (sub-)chain match, to be updated by any mismatch
 	bool intron_ovl=false; //if any intron overlap is found
 	bool intron_retention=false; //if any ref intron is covered by a qry exon
-	int imfirst=0; //index of first intron match in query (valid>0)
-	int jmfirst=0; //index of first intron match in reference (valid>0)
-	int imlast=0;  //index of first intron match in query
-	int jmlast=0;  //index of first intron match in reference
+	//intron chain (partial) match exon-index boundaries:
+	int imfirst=0; //index of exon after first intron match in query (valid>0)
+	int jmfirst=0; //index of exon after first intron match in reference (valid>0)
+	int imlast=0;  //index of exon after last intron match in query
+	int jmlast=0;  //index of  exon after last intron match in reference
+	//--keep track of the last overlapping introns in both qry and ref:
+	//int q_last_iovl=0;
+	//int r_last_iovl=0;
+
 	//check for intron matches
 	while (i<=imax && j<=jmax) {
-		uint mstart=m.exons[i-1]->end;
+		uint mstart=m.exons[i-1]->end; //qry intron start-end
 		uint mend=m.exons[i]->start;
-		uint rstart=r.exons[j-1]->end;
+		uint rstart=r.exons[j-1]->end; //ref intron start-end
 		uint rend=r.exons[j]->start;
 		if (rend<mstart) { //qry intron starts after ref intron ends
 			if (!intron_conflict && r.exons[j]->overlap(mstart+1, mend-1))
-				intron_conflict=true;
-			if (!intron_retention && rstart>=m.exons[i-1]->start)
-				intron_retention=true;
+				intron_conflict=true; //next ref exon overlaps this qry intron
+			if (!intron_retention && rstart>=m.exons[i-1]->start && rend<=m.exons[i-1]->end)
+				intron_retention=true; //this ref intron is covered by previous qry exons[i-1]
 			if (intron_ovl) ichain_match=false;
 			j++;
 			continue;
@@ -3301,64 +3305,136 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
 			//if qry intron overlaps the exon on the left, we have an intron conflict
 			if (!intron_conflict && r.exons[j-1]->overlap(mstart+1, mend-1))
 				intron_conflict=true;
-			if (!intron_retention && rend<=m.exons[i]->end)
+			if (!intron_retention && rstart>=m.exons[i]->start && rend<=m.exons[i]->end)
 				intron_retention=true;
 			if (intron_ovl) ichain_match=false;
 			i++;
 			continue;
 		} //no intron overlap, skipping qry intron
 		intron_ovl=true;
+		//q_last_iovl=i; //keep track of the last overlapping introns in both qry and ref
+		//r_last_iovl=j;
 		//overlapping introns, test junction matching
 		bool smatch=(mstart==rstart);
 		bool ematch=(mend==rend);
 		if (smatch || ematch) junct_match=true;
 		if (smatch && ematch) {
 			//perfect match for this intron
-			if (ichain_match) { //chain matching still possible
-			  if (jmfirst==0) jmfirst=j;
-			  if (imfirst==0) imfirst=i;
-			  imlast=i;
-			  jmlast=j;
+			if (jmfirst==0) {
+				ichain_match=true;
+				jmfirst=j;
+				imfirst=i;
+			}
+			if (ichain_match) {
+  		       imlast=i;
+			   jmlast=j;
 			}
 			i++; j++;
 			continue;
 		}
-		//intron overlapping but with at least a junction mismatch
+		//intron overlapping but not fully matching
 		intron_conflict=true;
 		ichain_match=false;
 		if (mend>rend) j++; else i++;
 	} //while checking intron overlaps
-	if (ichain_match) { //intron sub-chain match
+	/*** additional checking needed for intron retention when there is no ichain_match or overlap ?
+    if (!intron_retention && r_last_iovl<jmax) {
+	   //-- check the remaining ref introns not checked yet for retention
+       int i=q_last_iovl;
+       for (int j=r_last_iovl+1;j<=jmax && i<=imax;++j) {
+   		uint rstart=r.exons[j-1]->end; //ref intron start-end
+   		uint rend=r.exons[j]->start;
+   		if (rend<m.exons[i]->start) {
+   			i++;
+   			continue;
+   		}
+   		if (rstart>m.exons[i]->end)
+   			continue;
+   		//overlap between ref intron and m.exons[i]
+   		if (rstart>=m.exons[i]->start && rend<=m.exons[i]->end) {
+   			intron_retention=true;
+   			break;
+   		}
+       }
+    }
+    ***/
+	// --- when qry intron chain is contained within ref intron chain
+	//     qry terminal exons may poke (overhang) into ref's other introns
+	int l_iovh=0;   // overhang of q left boundary beyond the end of ref intron on the left
+	int r_iovh=0;   // same type of overhang through the ref intron on the right
+	int qry_intron_poking=0;
+	// --- when ref intron chain is contained within qry intron chain,
+	//     terminal exons of ref may poke (overhang) into qry other introns
+	int l_jovh=0;   // overhang of q left boundary beyond the end of ref intron to the left
+	int r_jovh=0;   // same type of overhang through the ref intron on the right
+	int ref_intron_poking=0;
+	if (ichain_match) { //intron (sub-)chain compatible so far (but there could still be conflicts)
 		if (imfirst==1 && imlast==imax) { // qry full intron chain match
 			if (jmfirst==1 && jmlast==jmax) {//identical intron chains
 				if (strictMatch) return (r.exons[0]->start==m.exons[0]->start &&
 						              r.exons.Last()->end && m.exons.Last()->end) ? '=' : '~';
 				else return '=';
 			}
-			// -- qry intron chain is shorter than ref intron chain --
-			int l_iovh=0;   // overhang of leftmost q exon left boundary beyond the end of ref intron to the left
-			int r_iovh=0;   // same type of overhang through the ref intron on the right
-			if (jmfirst>1 && r.exons[jmfirst-1]->start>m.start)
-				l_iovh = r.exons[jmfirst-1]->start - m.start;
-			if (jmlast<jmax && m.end > r.exons[jmlast]->end)
-				r_iovh = m.end - r.exons[jmlast]->end;
-			if (l_iovh<4 && r_iovh<4) return 'c';
-		} else if ((jmfirst==1 && jmlast==jmax)) {//ref full intron chain match
-			//check if the reference i-chain is contained in qry i-chain
-			int l_jovh=0;   // overhang of leftmost q exon left boundary beyond the end of ref intron to the left
-			int r_jovh=0;   // same type of overhang through the ref intron on the right
-			if (imfirst>1 && m.exons[imfirst-1]->start>r.start)
-				l_jovh = m.exons[imfirst-1]->start - r.start;
-			if (imlast<imax && r.end > m.exons[imlast]->end)
-				r_jovh = r.end - m.exons[imlast]->end;
-			if (l_jovh<4 && r_jovh<4) return 'k'; //reverse containment
+			// -- a partial intron chain match
+			if (jmfirst>1) {
+				//find if m.start falls within any ref intron before jmfirst
+				for (int j=jmfirst-1;j>0;--j)
+					if (m.start<r.exons[j]->start) {
+						if (m.start>r.exons[j-1]->end) { //m.start within this ref intron
+							l_iovh = r.exons[j]->start - m.start;
+							break;
+						}
+						else { intron_retention=true; ichain_match=false; }
+					}
+			}
+			if (jmlast<jmax) {
+				for (int j=jmlast;j<jmax;++j)
+					if (m.end > r.exons[j]->end) {
+						if (m.end < r.exons[j+1]->start) { //m.end within this ref intron
+							r_iovh = m.end - r.exons[j]->end;
+						    break;
+						}
+						else { intron_retention=true; ichain_match=false; }
+					}
+			}
+			if (ichain_match && l_iovh<4 && r_iovh<4) return 'c';
+			qry_intron_poking=GMAX(l_iovh, r_iovh);
+		} else if ((jmfirst==1 && jmlast==jmax)) {//ref intron chain match
+			//check if the reference j-chain is contained in qry i-chain
+			//check for ref ends poking into qry introns
+			if (imfirst>1)  {
+				for (int i=imfirst-1;i>0;--i)
+					if (m.exons[i]->start>r.start) {
+						if (r.start>m.exons[i-1]->end) {
+							l_jovh = m.exons[i]->start - r.start;
+							break;
+						}
+						else { ichain_match = false; }
+					}
+			}
+			if (imlast<imax) {
+				for (int i=imlast;i<imax;++i)
+					if (r.end > m.exons[i]->end) {
+						if (r.end < m.exons[i+1]->start)
+							 { r_jovh = r.end - m.exons[i]->end; break; }
+						else { ichain_match = false; }
+					}
+			}
+			if (ichain_match && l_jovh<4 && r_jovh<4) return 'k'; //reverse containment
+			ref_intron_poking=GMAX(l_jovh, r_jovh);
 		}
 	}
 	//'=', 'c' and 'k' were checked and assigned, check for 'm' and 'n' before falling back to 'j'
-	if (!intron_conflict && (m.start<=r.exons[0]->end && m.end>=r.exons[jmax]->start)) {
-			return 'm';
+	if (intron_retention) {
+			//ref is boundary contained with qry intron chain ? that's not required for 'm'
+		    //GMessage("r_jovh=%d, r_iovh=%d, l_jovh=%d, l_iovh=%d\n", r_jovh, r_iovh, l_jovh, l_iovh);
+		    //GMessage("m.start=%d, r.exons[0]->end=%d, m.end=%d, r.exons[jmax]->start=%d\n",
+		    //		m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
+		    //if (ref_intron_poking>0 && )
+		//we just need to have no intron poking going on
+		if (!intron_conflict && ref_intron_poking<4 && qry_intron_poking<4) return 'm';
+		else return 'n';
 	}
-	if (intron_retention) return 'n';
 	if (junct_match) return 'j';
 	//we could have 'o' or 'y' here
 	//any real exon overlaps?



View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/d10eacb5710c630525a0a6638b0f81a8bcafbf32

-- 
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/d10eacb5710c630525a0a6638b0f81a8bcafbf32
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200417/89b9b0fe/attachment-0001.html>


More information about the debian-med-commit mailing list