[med-svn] [Git][med-team/libgclib][master] 4 commits: New upstream version 0.11.10

Steffen Möller gitlab at salsa.debian.org
Sat Aug 8 22:36:30 BST 2020



Steffen Möller pushed to branch master at Debian Med / libgclib


Commits:
0f0a9f2b by Steffen Moeller at 2020-08-08T22:56:32+02:00
New upstream version 0.11.10
- - - - -
584a3430 by Steffen Moeller at 2020-08-08T22:56:32+02:00
routine-update: New upstream version

- - - - -
2693fa6a by Steffen Moeller at 2020-08-08T22:56:33+02:00
Update upstream source from tag 'upstream/0.11.10'

Update to upstream version '0.11.10'
with Debian dir 54cd374df9ed81325da8888a1f7435522d224512
- - - - -
4edf66b9 by Steffen Moeller at 2020-08-08T23:32:11+02:00
Preparing for new upstream version.

- - - - -


13 changed files:

- GBase.cpp
- GBase.h
- GList.hh
- GVec.hh
- Makefile
- debian/changelog
- debian/libgclib2.symbols
- debian/patches/autoconf.patch
- gff.cpp
- gff.h
- htest.cpp
- mdtest.cpp
- tag_git.sh


Changes:

=====================================
GBase.cpp
=====================================
@@ -921,8 +921,8 @@ void writeFasta(FILE *fw, const char* seqid, const char* descr,
  }
 
 char* commaprintnum(uint64 n) {
-  int comma = ',';
   char retbuf[48];
+  int comma = ',';
   char *p = &retbuf[sizeof(retbuf)-1];
   int i = 0;
   *p = '\0';


=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
 #ifndef G_BASE_DEFINED
 #define G_BASE_DEFINED
-#define GCLIB_VERSION "0.11.9"
+#define GCLIB_VERSION "0.11.10"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -110,21 +110,32 @@ typedef uint64_t uint64;
 
 //-------------------
 
+#define GEXIT(a) { \
+fprintf(stderr, "Error: "); fprintf(stderr, a); \
+GError("Exiting from line %i in file %s\n",__LINE__,__FILE__); \
+}
+
 // Debug helpers
 #ifndef NDEBUG
  #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
+ #define GVERIFY(condition) \
+if (!(condition)) { \
+fprintf(stderr, "Assumption \"%s\"\nFailed in file %s: at line:%i\n", \
+#condition,__FILE__,__LINE__); \
+GEXIT(#condition);}
  #ifdef TRACE
-  #define GTRACE(exp)  (GMessage exp)
+  #define GTRACE(exp)  (GMessage(exp))
  #else
-  #define GTRACE(exp)  ((void)0)
+  #define GTRACE(exp)
  #endif
 #else
- #define GASSERT(exp) ((void)0)
- #define GTRACE(exp)  ((void)0)
+ #define GASSERT(exp)
+ #define GTRACE(exp)
+ #define GVERIFY(condition)
 #endif
 
-#define GERROR(exp) (GError exp)
-/**********************************  Macros  ***********************************/
+#define GERROR(exp) (GError(exp))
+
 // Abolute value
 #define GABS(val) (((val)>=0)?(val):-(val))
 


=====================================
GList.hh
=====================================
@@ -50,6 +50,9 @@ template <class OBJ> class GArray:public GVec<OBJ> {
     int Add(OBJ* item); // specific implementation if sorted
     int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it
                        // using OBJ new operator=
+    int AddIfNew(OBJ& item, int* fidx=NULL); //requires == operator
+        //if equal item not found, item is added and return the index of it
+        //otherwise returns -1 and fidx is set to the equal item location
     int cAdd(OBJ item) { return Add(&item); }
     int cPush(OBJ item) { return Add(&item); }
     int Push(OBJ& item) { return Add(&item); }
@@ -229,14 +232,14 @@ template <class OBJ> int GArray<OBJ>::Add(OBJ* item) {
       if (fUnique) return -1; //cannot add a duplicate!
    //Found sets result to the position where the item should be!
    GVec<OBJ>::Insert(result, *item);
-   }
+ }
   else {
-   if (fUnique && Found(*item,result)) return -1; //set behaviour
+   if (fUnique && Found(*item, result)) return -1; //set behaviour
    result = this->fCount;
    if (result==this->fCapacity) GVec<OBJ>::Grow();
    this->fArray[result] = *item; //operator=, copies the item
    this->fCount++;
-   }
+ }
  return result;
 }
 
@@ -255,6 +258,29 @@ template <class OBJ> void GArray<OBJ>::Add(GArray<OBJ>& list) {
     }
 }
 
+//returns -1 if existing equal object exists, sets fidx to that equal item index
+//or returns the index where the item was added/inserted
+template <class OBJ> int GArray<OBJ>::AddIfNew(OBJ& item,
+                                     int* fidx) {
+ int rpos;
+ if (Found(item, rpos)) {
+    if (fidx) *fidx=rpos; //the position where the item should be inserted:
+    return -1; //found and not added
+ }
+ //not found, let's insert it
+ if (SORTED) {
+   //Found() set result to the position where the item should be inserted
+   GVec<OBJ>::Insert(rpos, item);
+ } else { //simply append
+	rpos = this->fCount;
+	if (rpos==this->fCapacity) GVec<OBJ>::Grow();
+	this->fArray[rpos] = item; //operator= copies the item
+	this->fCount++;
+ }
+ if (fidx!=NULL) *fidx=rpos;
+ return rpos;
+}
+
 template <class OBJ> bool GArray<OBJ>::Found(OBJ& item, int& idx) {
  //search the list by using fCompareProc (if defined)
  //or == operator for a non-sortable list
@@ -262,7 +288,7 @@ template <class OBJ> bool GArray<OBJ>::Found(OBJ& item, int& idx) {
  //set to the closest matching object!
  int i;
  idx=-1;
- if (this->fCount==0) { idx=0;return false;}
+ if (this->fCount==0) { idx=0; return false;}
  if (SORTED) { //binary search based on fCompareProc
    //do the simplest tests first:
    if ((*fCompareProc)(&(this->fArray[0]),&item)>0) {


=====================================
GVec.hh
=====================================
@@ -111,13 +111,10 @@ template <class OBJ> class GVec {
     //this will reject identical items in sorted lists only!
     void setCapacity(int NewCapacity);
     int  Count() { return fCount; }
-
     void setCount(int NewCount);         // will trim or expand the array as needed
-    void setCount(int NewCount, OBJ* v); //same as setCount() but new objects are set to v
     void setCount(int NewCount, OBJ v);
     void Resize(int NewCount) { setCount(NewCount); }
-    //void Resize(int NewCount, OBJ* v) { setCount(NewCount, v); }
-    void Resize(int NewCount, OBJ v) { setCount(NewCount, &v); }
+    void Resize(int NewCount, OBJ v) { setCount(NewCount, v); }
 
     //void Move(int curidx, int newidx);
     bool isEmpty() { return fCount==0; }
@@ -290,13 +287,15 @@ template <class OBJ> void GVec<OBJ>::setCapacity(int NewCapacity) {
    else {
       if (IsPrimitiveType<OBJ>::VAL) {
         GREALLOC(fArray, NewCapacity*sizeof(OBJ));
+        //also zero init the new items?
+        memset(fArray+fCount, 0, (NewCapacity-fCount)*sizeof(OBJ));
       } else {
         OBJ* oldArray=fArray;
 		//fArray=new OBJ[NewCapacity]();
 		fArray=new OBJ[NewCapacity];
         for (int i=0;i<this->fCount;i++) {
           fArray[i] = oldArray[i];
-          }// we need operator= here
+        }// we need operator= here
         //wouldn't be faster to use memcpy instead?
         //memcpy(fArray, oldArray, fCount*sizeof(OBJ));
         if (oldArray) delete[] oldArray;
@@ -355,16 +354,16 @@ template <class OBJ> void GVec<OBJ>::Grow(int idx, OBJ& item) {
         newList[idx]=item;
         //copy data after idx
         memmove(&newList[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
-        //..shouldn't do this:
+        //..shouldn't do this (zero init new unused items in the array)
         memset(&newList[fCount+1], 0, (NewCapacity-fCount-1)*sizeof(OBJ));
         //data copied:
         GFREE(fArray);
    } else {
-        newList=new OBJ[NewCapacity]; //]()
+        newList=new OBJ[NewCapacity];
         // operator= required!
         for (int i=0;i<idx;i++) {
           newList[i]=fArray[i];
-          }
+        }
         newList[idx]=item;
         //copy data after idx
         //memmove(&newList[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
@@ -499,9 +498,12 @@ template <class OBJ> void GVec<OBJ>::setCount(int NewCount) {
 	   GError(GVEC_COUNT_ERR, NewCount);
 	//if (NewCount > fCapacity) setCapacity(NewCount);
 	while(NewCount > fCapacity) Grow();
+       	if (IsPrimitiveType<OBJ>::VAL && NewCount>fCount) {
+		memset(fArray+fCount, 0, (NewCount-fCount)*sizeof(OBJ));
+	}
 	fCount = NewCount; //new items will be populated by the default object constructor(!)
 }
-
+/*
 template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ* v) {
 	if (NewCount<0 || NewCount > MAXLISTSIZE)
 	  GError(GVEC_COUNT_ERR, NewCount);
@@ -512,7 +514,7 @@ template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ* v) {
 	}
 	fCount = NewCount;
 }
-
+*/
 template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ v) {
 	if (NewCount<0 || NewCount > MAXLISTSIZE)
 	   GError(GVEC_COUNT_ERR, NewCount);
@@ -524,7 +526,6 @@ template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ v) {
 	fCount = NewCount;
 }
 
-
 template <class OBJ> void GVec<OBJ>::qSort(int l, int r, GCompareProc* cmpFunc) {
  int i, j;
  OBJ p,t;


=====================================
Makefile
=====================================
@@ -47,17 +47,17 @@ else
 EXE =
 endif
 
-CC      := g++
+CC      := ${CXX}
 
 BASEFLAGS  := -Wall -Wextra ${INCDIRS} $(MARCH) \
  -D_REENTRANT -fno-exceptions -fno-rtti
 
-GCCVER5 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 5)
+GCCVER5 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 5)
 ifeq "$(GCCVER5)" "1"
  BASEFLAGS += -Wno-implicit-fallthrough
 endif
 
-GCCVER8 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 8)
+GCCVER8 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 8)
 ifeq "$(GCCVER8)" "1"
   BASEFLAGS += -Wno-class-memaccess
 endif


=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+libgclib (0.11.10-1) unstable; urgency=medium
+
+  * New upstream version
+
+ -- Steffen Moeller <moeller at debian.org>  Sat, 08 Aug 2020 22:56:32 +0200
+
 libgclib (0.11.9-2) unstable; urgency=medium
 
   * Bump soname. Closes: #962550


=====================================
debian/libgclib2.symbols
=====================================
@@ -25,7 +25,8 @@ libgclib.so.2 libgclib2 #MINVER#
  _Z10g2bit2baseh at Base 0.11.4
  _Z10gcdb_allocj at Base 0.11.4
  _Z10getFileExtPKc at Base 0.11.4
- _Z10getOvlCodeR6GffObjS0_Rib at Base 0.11.4
+ _Z10getOvlCodeR6GffObjS0_Rib at Base 0.11.10-1
+ _Z10getOvlCodeR6GffObjS0_Ribi at Base 0.11.10-1
  _Z10parseFloatRPcRf at Base 0.11.4
  _Z10replaceStrRPcS_ at Base 0.11.4
  _Z10startsWithPKcS0_ at Base 0.11.4
@@ -78,12 +79,15 @@ libgclib.so.2 libgclib2 #MINVER#
  _Z14gfo_cmpRefByIDPvS_ at Base 0.11.4
  _Z14translateCodonPKc at Base 0.11.4
  _Z15printEditScriptP12GXEditScript at Base 0.11.4
- _Z15transcriptMatchR6GffObjS0_Ri at Base 0.11.4
+ _Z15transcriptMatchR6GffObjS0_Ri at Base 0.11.10-1
+ _Z15transcriptMatchR6GffObjS0_Rii at Base 0.11.10-1
  _Z15uint32_pack_bigPcj at Base 0.11.4
  _Z16BED_addAttributeP8_IO_FILERiPKcz at Base 0.11.4
  _Z16DefLTCompareProcI4GSegEiPvS1_ at Base 0.11.4
  _Z16gthreads_errExitiPKc at Base 0.11.4
- _Z16singleExonTMatchR6GffObjS0_Ri at Base 0.11.4
+ _Z16singleExonTMatchR6GffObjS0_Ri at Base 0.11.10-1
+ _Z16singleExonTMatchR6GffObjS0_Rii at Base 0.11.10-1
+ _Z15transcriptMatchR6GffObjS0_Rii at Base 0.11.10-1
  _Z17GreedyAlignRegionPKciiS0_iiP16CGreedyAlignDataP8CAlnTrimb at Base 0.11.4
  _Z17GreedyAlignRegionPKciiS0_iiiiiP16CGreedyAlignDataP8CAlnTrimb at Base 0.11.4
  _Z17gcvt_endian_setupv at Base 0.11.4


=====================================
debian/patches/autoconf.patch
=====================================
@@ -2,6 +2,8 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update: Thu, 18 Apr 2019 12:54:42 +0200
 Descriptions: Build proper libraries via automake
 
+Index: libgclib/Makefile.am
+===================================================================
 --- /dev/null
 +++ libgclib/Makefile.am
 @@ -0,0 +1,38 @@
@@ -43,6 +45,8 @@ Descriptions: Build proper libraries via automake
 +mdtest_SOURCES = mdtest.cpp libgclib.a
 +mdtest_LDADD = libgclib.la
 +bin_PROGRAMS = mdtest
+Index: libgclib/libgclib.pc.in
+===================================================================
 --- /dev/null
 +++ libgclib/libgclib.pc.in
 @@ -0,0 +1,11 @@
@@ -57,6 +61,8 @@ Descriptions: Build proper libraries via automake
 +Description: Genomic C(++) library of reusable code for bioinformatics projects
 +Libs: -L${libdir} -lgclib
 +Cflags: -I${includedir}
+Index: libgclib/configure.ac
+===================================================================
 --- /dev/null
 +++ libgclib/configure.ac
 @@ -0,0 +1,61 @@
@@ -121,123 +127,3 @@ Descriptions: Build proper libraries via automake
 +	])
 +AC_OUTPUT
 +
---- libgclib.orig/Makefile
-+++ /dev/null
-@@ -1,117 +0,0 @@
--INCDIRS := -I. -I${GDIR} -I${BAM}
--
--CXX   := $(if $(CXX),$(CXX),g++)
--
--LINKER  := $(if $(LINKER),$(LINKER),g++)
--
--LDFLAGS := $(if $(LDFLAGS),$(LDFLAGS),-g)
--
--LIBS    := 
--
--
--# A simple hack to check if we are on Windows or not (i.e. are we using mingw32-make?)
--ifeq ($(findstring mingw32, $(MAKE)), mingw32)
--WINDOWS=1
--endif
--
--# Compiler settings
--TLIBS = 
--# Non-windows systems need pthread
--ifndef WINDOWS
--TLIBS += -lpthread
--endif
--
--
--
--DMACH := $(shell ${CXX} -dumpmachine)
--
--# MinGW32 GCC 4.5 link problem fix
--ifdef WINDOWS
--DMACH := windows
--ifeq ($(findstring 4.5.,$(shell g++ -dumpversion)), 4.5.)
--LDFLAGS += -static-libstdc++ -static-libgcc
--endif
--endif
--
--# Misc. system commands
--ifdef WINDOWS
--RM = del /Q
--else
--RM = rm -f
--endif
--
--# File endings
--ifdef WINDOWS
--EXE = .exe
--else
--EXE =
--endif
--
--CC      := g++
--
--BASEFLAGS  := -Wall -Wextra ${INCDIRS} $(MARCH) \
-- -D_REENTRANT -fno-exceptions -fno-rtti
--
--GCCVER5 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 5)
--ifeq "$(GCCVER5)" "1"
-- BASEFLAGS += -Wno-implicit-fallthrough
--endif
--
--GCCVER8 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 8)
--ifeq "$(GCCVER8)" "1"
--  BASEFLAGS += -Wno-class-memaccess
--endif
--
--#add the link-time optimization flag if gcc version > 4.5
--GCC_VERSION:=$(subst ., ,$(shell gcc -dumpversion))
--GCC_MAJOR:=$(word 1,$(GCC_VERSION))
--GCC_MINOR:=$(word 2,$(GCC_VERSION))
--#GCC_SUB:=$(word 3,$(GCC_VERSION))
--GCC_SUB:=x
--
--GCC45OPTS :=
--GCC45OPTMAIN :=
--
--ifneq (,$(filter %release %nodebug, $(MAKECMDGOALS)))
--  # -- release build
--  CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-g -O3)
--  CXXFLAGS += -DNDEBUG $(BASEFLAGS)
--  ifeq ($(shell expr $(GCC_MAJOR).$(GCC_MINOR) '>=' 4.5),1)
--    CXXFLAGS += -flto
--    GCC45OPTS := -flto
--    GCC45OPTMAIN := -fwhole-program
--  endif
--else
--  # -- debug build
--  CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-g -O0)
--  ifneq (, $(findstring darwin, $(DMACH)))
--      CXXFLAGS += -gdwarf-3
--  endif
--  CXXFLAGS += -DDEBUG -D_DEBUG -DGDEBUG $(BASEFLAGS)
--endif
--
--%.o : %.cpp
--	${CXX} ${CXXFLAGS} -c $< -o $@
--
--
--.PHONY : all
--all:    htest
--#mdtest
--nodebug: all
--release: all
--debug: all
--
--OBJS := GBase.o GStr.o GArgs.o GResUsage.o
--
--version: ; @echo "GCC Version is: "$(GCC_MAJOR)":"$(GCC_MINOR)":"$(GCC_SUB)
--	@echo "> GCC Opt. string is: "$(GCC45OPTS)
--htest:  $(OBJS) htest.o GHash.hh
--	${LINKER} ${LDFLAGS} $(GCC45OPTS) $(GCC45OPTMAIN) -o $@ ${filter-out %.a %.so, $^} ${LIBS}
--mdtest: $(OBJS) mdtest.o
--	${LINKER} ${LDFLAGS} $(GCC45OPTS) $(GCC45OPTMAIN) -o $@ ${filter-out %.a %.so, $^} ${LIBS}
--# target for removing all object files
--
--.PHONY : clean
--clean:: 
--	@${RM} $(OBJS) *.o mdtest$(EXE) htest$(EXE)
--	@${RM} core.*


=====================================
gff.cpp
=====================================
@@ -2912,12 +2912,12 @@ void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
  }
  const char* gseqname=names->gseqs.Get(gseq_id)->name;
  bool gff3 = (gffp>=pgffAny && gffp<=pgffTLF);
- bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
- bool showExon = (gffp<=pgtfExon || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
+ bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgtfBoth || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
+ bool showExon = (gffp<=pgtfExon || gffp==pgtfBoth ||  gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
  //if (gscore>0.0) sprintf(dbuf,"%.2f", gscore);
  //       else strcpy(dbuf,".");
  gscore.sprint(dbuf);
- if (gffp<=pgtfCDS && gffp>=pgtfAny) { //GTF output
+ if (gffp<=pgtfBoth && gffp>=pgtfAny) { //GTF output
 	   fprintf(fout,
 	     "%s\t%s\ttranscript\t%d\t%d\t%s\t%c\t.\ttranscript_id \"%s\"",
 	     gseqname, tlabel, start, end, dbuf, strand, gffID);
@@ -3026,7 +3026,7 @@ void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
    fprintf(fout,"\n");
  }// gff3 transcript line
  if (gffp==pgffTLF) return;
- bool is_cds_only = (gffp==pgffBoth) ? false : isCDSOnly();
+ bool is_cds_only = (gffp==pgffBoth || gffp==pgtfBoth) ? false : isCDSOnly();
  if (showExon) {
     //print exons
     for (int i=0;i<exons.Count();i++) {
@@ -3039,7 +3039,7 @@ void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
 	for (int i=0;i<cds.Count();i++) {
 		printGxfExon(fout, tlabel, gseqname, true, &(cds[i]), gff3, cvtChars, dbuf, DBUF_LEN);
 	}
-  } //showCDS
+  } //printing CDSs
 }
 
 void GffObj::updateCDSPhase(GList<GffExon>& segs) {
@@ -3124,15 +3124,16 @@ void GffObj::getCDSegs(GVec<GffExon>& cds) {
 //-- transcript match/overlap classification functions
 
 
-char transcriptMatch(GffObj& a, GffObj& b, int& ovlen) {
-	//return '=' if exact exon match, '~' if intron-chain match (or 80% overlap for single-exon)
+char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange) {
+	//return '=' if exact exon match or transcripts ends are within tdelta distance
+	// '~' if intron-chain match (or 80% overlap, for single-exon)
 	// or 0 otherwise
 	int imax=a.exons.Count()-1;
 	int jmax=b.exons.Count()-1;
 	ovlen=0;
 	if (imax!=jmax) return false; //different number of exons, cannot match
 	if (imax==0) //single-exon mRNAs
-	    return (singleExonTMatch(a,b,ovlen));
+	    return (singleExonTMatch(a,b,ovlen, trange));
 	if ( a.exons[imax]->start<b.exons[0]->end ||
 		b.exons[jmax]->start<a.exons[0]->end )
 		return 0; //intron chains do not overlap at all
@@ -3146,16 +3147,17 @@ char transcriptMatch(GffObj& a, GffObj& b, int& ovlen) {
 			return 0; //intron mismatch
 		}
 	}
-	//--- full intron chain match:
-	if (a.exons[0]->start==b.exons[0]->start &&
-		a.exons.Last()->end==b.exons.Last()->end)
+	//--- full intron chain match
+	//check if it's an exact
+	if (abs((int)a.exons[0]->start-(int)b.exons[0]->start)<=trange &&
+		abs((int)a.exons.Last()->end-(int)b.exons.Last()->end)<=trange)
 		   return '=';
 	return '~';
 }
 
-
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen) {
- //return '=' if exact match, '~' if the overlap is >=80% of the longer sequence length
+char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange) {
+ //return '=' if boundaries match within tdelta distance,
+ //    or '~' if the overlap is >=80% of the longer sequence length
  // return 0 if there is no overlap
  GSeg mseg(m.start, m.end);
  ovlen=mseg.overlapLen(r.start,r.end);
@@ -3165,7 +3167,9 @@ char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen) {
  // *OR* in case of reverse containment (reference contained in m)
  //   it's also considered "matching" if the overlap is at least 80% of
  //   the reference len AND at least 70% of the query len
- if (m.start==r.start && m.end==r.end) return '=';
+ if (abs((int)m.start-(int)r.start)<=trange
+	 && abs((int)m.end-(int)r.end)<=trange)
+	  return '=';
  if (m.covlen>r.covlen) {
    if ( (ovlen >= m.covlen*0.8) ||
 		   (ovlen >= r.covlen*0.8 && ovlen >= m.covlen* 0.7 ) )
@@ -3178,19 +3182,18 @@ char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen) {
 }
 
 //formerly in gffcompare
-char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
+char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool stricterMatch, int trange) {
 	ovlen=0; //total actual exonic overlap
 	if (!m.overlap(r.start,r.end)) return 0;
 	int jmax=r.exons.Count()-1;
-	//int iovlen=0; //total m.exons overlap with ref introns
 	char rcode=0;
 	if (m.exons.Count()==1) { //single-exon transfrag
 		GSeg mseg(m.start, m.end);
 		if (jmax==0) { //also single-exon ref
 			//ovlen=mseg.overlapLen(r.start,r.end);
 			char eqcode=0;
-			if ((eqcode=singleExonTMatch(m, r, ovlen))>0) {
-				if (strictMatch) return eqcode;
+			if ((eqcode=singleExonTMatch(m, r, ovlen, trange))>0) {
+				if (stricterMatch) return eqcode;
 				            else return '=';
 			}
 			if (m.covlen<r.covlen)
@@ -3337,27 +3340,6 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
 		ichain_match=false;
 		if (mend>rend) j++; else i++;
 	} //while checking intron overlaps
-	/*** additional checking needed for intron retention when there is no ichain_match or overlap ?
-    if (!intron_retention && r_last_iovl<jmax) {
-	   //-- check the remaining ref introns not checked yet for retention
-       int i=q_last_iovl;
-       for (int j=r_last_iovl+1;j<=jmax && i<=imax;++j) {
-   		uint rstart=r.exons[j-1]->end; //ref intron start-end
-   		uint rend=r.exons[j]->start;
-   		if (rend<m.exons[i]->start) {
-   			i++;
-   			continue;
-   		}
-   		if (rstart>m.exons[i]->end)
-   			continue;
-   		//overlap between ref intron and m.exons[i]
-   		if (rstart>=m.exons[i]->start && rend<=m.exons[i]->end) {
-   			intron_retention=true;
-   			break;
-   		}
-       }
-    }
-    ***/
 	// --- when qry intron chain is contained within ref intron chain
 	//     qry terminal exons may poke (overhang) into ref's other introns
 	int l_iovh=0;   // overhang of q left boundary beyond the end of ref intron on the left
@@ -3371,9 +3353,9 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
 	if (ichain_match) { //intron (sub-)chain compatible so far (but there could still be conflicts)
 		if (imfirst==1 && imlast==imax) { // qry full intron chain match
 			if (jmfirst==1 && jmlast==jmax) {//identical intron chains
-				if (strictMatch) return (r.exons[0]->start==m.exons[0]->start &&
-						              r.exons.Last()->end && m.exons.Last()->end) ? '=' : '~';
-				else return '=';
+				if (stricterMatch) return (abs((int)r.exons[0]->start-(int)m.exons[0]->start)<=trange &&
+						              abs((int)r.exons.Last()->end-(int)m.exons.Last()->end)<=trange) ? '=' : '~';
+				return '=';
 			}
 			// -- a partial intron chain match
 			if (jmfirst>1) {
@@ -3432,8 +3414,9 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
 		    //		m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
 		    //if (ref_intron_poking>0 && )
 		//we just need to have no intron poking going on
-		if (!intron_conflict && ref_intron_poking<4 && qry_intron_poking<4) return 'm';
-		else return 'n';
+		if (!intron_conflict && ref_intron_poking<4
+								&& qry_intron_poking<4) return 'm';
+		return 'n';
 	}
 	if (junct_match) return 'j';
 	//we could have 'o' or 'y' here


=====================================
gff.h
=====================================
@@ -58,11 +58,12 @@ class GffObj;
 //---transcript overlapping - utility functions:
 int classcode_rank(char c); //returns priority value for class codes
 
-char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch=false); //returns: class code
+char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool stricterMatch=false, int trange=0); //returns: class code
 
-char transcriptMatch(GffObj& a, GffObj& b, int& ovlen); //generic transcript match test
+char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange=0); //generic transcript match test
 // -- return '=', '~'  or 0
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen); //single-exon transcript match test
+char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange=0);
+//single-exon transcript match test - returning '=', '~'  or 0
 
 //---
 // -- tracking exon/CDS segments from local mRNA to genome coordinates
@@ -484,8 +485,9 @@ void gffnames_unref(GffNames* &n);
 
 enum GffPrintMode {
   pgtfAny, //print record as read, if GTF
-  pgtfExon, //print exon only features
-  pgtfCDS,  //print CDS and exon features
+  pgtfExon, //print only exon features (CDS converted to exon if exons are missing)
+  pgtfCDS,  //print only CDS features
+  pgtfBoth,  //print both CDS and exon features
   pgffAny, //print record as read (if isCDSonly() prints only CDS)
   pgffExon,
   pgffCDS,


=====================================
htest.cpp
=====================================
@@ -4,6 +4,11 @@
 #include "GVec.hh"
 #include "GHash.hh"
 #include "GResUsage.h"
+#include <cstdint>
+#include <iostream>
+#include <string>
+//#include <tsl/hopscotch_map.h>
+//#include <tsl/hopscotch_set.h>
 
 #define USAGE "Usage:\n\
   htest textfile.. \n\
@@ -19,7 +24,7 @@ struct HStrData {
 	HStrData(char* s=NULL, int c=0):cmd(c), str(s) { }
 };
 
-int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs) {
+int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int toLoad=0) {
   int num=0;
   GLineReader lr(f);
   char* line=NULL;
@@ -41,15 +46,60 @@ int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs) {
       line[len-3]=0;
       strgs.Add(new HStrData(line));
 	  num++;
+	  if (toLoad && num>=toLoad) break;
   } //while line
   return num;
 }
 
+void showTimings(GResUsage swatch) {
+ char *wtime=commaprintnum((uint64_t)swatch.elapsed());
+ char *utime=commaprintnum((uint64_t)swatch.u_elapsed());
+ char *stime=commaprintnum((uint64_t)swatch.s_elapsed());
+ GMessage("Elapsed time (microseconds): %12s us\n", wtime);
+ GMessage("                  user time: %12s us\n", utime);
+ GMessage("                system time: %12s us\n", stime);
+ GFREE(wtime);GFREE(utime);GFREE(stime);
+}
+
+
+void run_GHash(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+	GHash<int> ghash;
+	int num_add=0, num_rm=0, num_clr=0;
+	GMessage("----------------- %s ----------------\n", label);
+	ghash.Clear();
+	swatch.start();
+	for (int i=0;i<hstrs.Count();i++) {
+			  switch (hstrs[i]->cmd) {
+				case 0:ghash.fAdd(hstrs[i]->str.chars(), new int(1)); num_add++; break;
+				case 1:ghash.Remove(hstrs[i]->str.chars()); num_rm++; break;
+				case 2:ghash.Clear(); num_clr++; break;
+			  }
+	}
+	ghash.Clear();
+	swatch.stop();
+	GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+/*
+void run_Hopscotch(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  tsl::hopscotch_map<std::string, int> hsmap;
+  GMessage("----------------- %s ----------------\n", label);
+  swatch.start();
+  for (int i=0;i<hstrs.Count();i++) {
+	  switch (hstrs[i]->cmd) {
+		case 0:hsmap.insert({hstrs[i]->str.chars(), 1}); num_add++; break;
+		case 1:hsmap.erase(hstrs[i]->str.chars()); num_rm++; break;
+		case 2:hsmap.clear(); num_clr++; break;
+	  }
+  }
+  hsmap.clear();
+  swatch.stop();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+*/
 int main(int argc, char* argv[]) {
  GPVec<HStrData> strs;
  GPVec<HStrData> sufstrs;
- GHash<int> thash;
- GHash<int> sufthash;
  strs.setFreeItem(strFreeProc);
  sufstrs.setFreeItem(strFreeProc);
  //GArgs args(argc, argv, "hg:c:s:t:o:p:help;genomic-fasta=COV=PID=seq=out=disable-flag;test=");
@@ -59,46 +109,39 @@ int main(int argc, char* argv[]) {
  args.printError(USAGE, true);
  if (args.getOpt('h') || args.getOpt("help")) GMessage(USAGE);
  int numargs=args.startNonOpt();
- if (numargs>0) {
-   char* a=NULL;
-   FILE* f=NULL;
-   int total=0;
-   while ((a=args.nextNonOpt())) {
-	   f=fopen(a, "r");
-	   if (f==NULL) GError("Error: could not open file %s !\n", a);
-	   int num=loadStrings(f, sufstrs, strs);
-	   total+=num;
-   }
+ const char* a=NULL;
+ FILE* f=NULL;
+ int total=0;
+
+ if (numargs==0) {
+	 a="htest_data.lst";
+	 f=fopen(a, "r");
+	 if (f==NULL) GError("Error: could not open file %s !\n", a);
+	 int num=loadStrings(f, sufstrs, strs, 600000);
+	 total+=num;
+	 GMessage("..loaded %d strings from file %s\n", total, a);
+ }
+ else {
+	   while ((a=args.nextNonOpt())) {
+		   f=fopen(a, "r");
+		   if (f==NULL) GError("Error: could not open file %s !\n", a);
+		   int num=loadStrings(f, sufstrs, strs, 600000);
+		   total+=num;
+	   }
+  }
    GResUsage swatch;
-   //timing starts here
-   GMessage("----------------- loading no-suffix strings ----------------\n");
-   swatch.start();
-   for (int i=0;i<strs.Count();i++) {
-	  switch (strs[i]->cmd) {
-	    case 0:thash.fAdd(strs[i]->str.chars(), new int(1)); break;
-	    case 1:thash.Remove(strs[i]->str.chars()); break;
-	    case 2:thash.Clear(); break;
-	  }
-   }
-   swatch.stop();
-   GMessage("Elapsed time (microseconds): %.0f us\n", swatch.elapsed());
-   GMessage("                  user time: %.0f us\n", swatch.u_elapsed());
-   GMessage("                system time: %.0f us\n", swatch.s_elapsed());
-   // timing here
 
-   GMessage("----------------- loading suffix strings ----------------\n");
-   swatch.start();
-   for (int i=0;i<sufstrs.Count();i++) {
-		  switch (sufstrs[i]->cmd) {
-		    case 0:sufthash.fAdd(sufstrs[i]->str.chars(), new int(1)); break;
-		    case 1:sufthash.Remove(sufstrs[i]->str.chars()); break;
-		    case 2:sufthash.Clear(); break;
-		  }
-   }
-   swatch.stop();
-   GMessage("Elapsed time (microseconds): %.0f us\n", swatch.elapsed());
-   GMessage("                  user time: %.0f us\n", swatch.u_elapsed());
-   GMessage("                system time: %.0f us\n", swatch.s_elapsed());
 
- }
+   run_GHash(swatch, strs, "GHash no suffix");
+   showTimings(swatch);
+
+   run_GHash(swatch, sufstrs, "GHash w/ suffix");
+   showTimings(swatch);
+   /*
+   run_Hopscotch(swatch, strs, "hopscotch no suffix");
+   showTimings(swatch);
+
+   run_Hopscotch(swatch, sufstrs, "hopscotch w/ suffix");
+   showTimings(swatch);
+   */
 }


=====================================
mdtest.cpp
=====================================


=====================================
tag_git.sh
=====================================
@@ -3,5 +3,12 @@ git checkout master
 ver=$(fgrep '#define GCLIB_VERSION ' GBase.h)
 ver=${ver#*\"}
 ver=${ver%%\"*}
+git fetch --tags
+if [[ "$1" == "delete" || "$1" == "del" ]]; then
+  echo "Deleting tag v$ver .."
+  git tag -d v$ver
+  git push origin :refs/tags/v$ver
+  exit
+fi
 git tag -a "v$ver" -m "release $ver"
 git push --tags



View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/compare/4d3c01db3d9b5cff05c876f6ca56a6171fef3a91...4edf66b9d830bf5cdc09295ca48e66ed66be1344

-- 
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/compare/4d3c01db3d9b5cff05c876f6ca56a6171fef3a91...4edf66b9d830bf5cdc09295ca48e66ed66be1344
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200808/403f9c7a/attachment-0001.html>


More information about the debian-med-commit mailing list