[med-svn] [Git][med-team/libgclib][master] 4 commits: New upstream version 0.11.10
Steffen Möller
gitlab at salsa.debian.org
Sat Aug 8 22:36:30 BST 2020
Steffen Möller pushed to branch master at Debian Med / libgclib
Commits:
0f0a9f2b by Steffen Moeller at 2020-08-08T22:56:32+02:00
New upstream version 0.11.10
- - - - -
584a3430 by Steffen Moeller at 2020-08-08T22:56:32+02:00
routine-update: New upstream version
- - - - -
2693fa6a by Steffen Moeller at 2020-08-08T22:56:33+02:00
Update upstream source from tag 'upstream/0.11.10'
Update to upstream version '0.11.10'
with Debian dir 54cd374df9ed81325da8888a1f7435522d224512
- - - - -
4edf66b9 by Steffen Moeller at 2020-08-08T23:32:11+02:00
Preparing for new upstream version.
- - - - -
13 changed files:
- GBase.cpp
- GBase.h
- GList.hh
- GVec.hh
- Makefile
- debian/changelog
- debian/libgclib2.symbols
- debian/patches/autoconf.patch
- gff.cpp
- gff.h
- htest.cpp
- mdtest.cpp
- tag_git.sh
Changes:
=====================================
GBase.cpp
=====================================
@@ -921,8 +921,8 @@ void writeFasta(FILE *fw, const char* seqid, const char* descr,
}
char* commaprintnum(uint64 n) {
- int comma = ',';
char retbuf[48];
+ int comma = ',';
char *p = &retbuf[sizeof(retbuf)-1];
int i = 0;
*p = '\0';
=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
#ifndef G_BASE_DEFINED
#define G_BASE_DEFINED
-#define GCLIB_VERSION "0.11.9"
+#define GCLIB_VERSION "0.11.10"
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -110,21 +110,32 @@ typedef uint64_t uint64;
//-------------------
+#define GEXIT(a) { \
+fprintf(stderr, "Error: "); fprintf(stderr, a); \
+GError("Exiting from line %i in file %s\n",__LINE__,__FILE__); \
+}
+
// Debug helpers
#ifndef NDEBUG
#define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
+ #define GVERIFY(condition) \
+if (!(condition)) { \
+fprintf(stderr, "Assumption \"%s\"\nFailed in file %s: at line:%i\n", \
+#condition,__FILE__,__LINE__); \
+GEXIT(#condition);}
#ifdef TRACE
- #define GTRACE(exp) (GMessage exp)
+ #define GTRACE(exp) (GMessage(exp))
#else
- #define GTRACE(exp) ((void)0)
+ #define GTRACE(exp)
#endif
#else
- #define GASSERT(exp) ((void)0)
- #define GTRACE(exp) ((void)0)
+ #define GASSERT(exp)
+ #define GTRACE(exp)
+ #define GVERIFY(condition)
#endif
-#define GERROR(exp) (GError exp)
-/********************************** Macros ***********************************/
+#define GERROR(exp) (GError(exp))
+
// Abolute value
#define GABS(val) (((val)>=0)?(val):-(val))
=====================================
GList.hh
=====================================
@@ -50,6 +50,9 @@ template <class OBJ> class GArray:public GVec<OBJ> {
int Add(OBJ* item); // specific implementation if sorted
int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it
// using OBJ new operator=
+ int AddIfNew(OBJ& item, int* fidx=NULL); //requires == operator
+ //if equal item not found, item is added and return the index of it
+ //otherwise returns -1 and fidx is set to the equal item location
int cAdd(OBJ item) { return Add(&item); }
int cPush(OBJ item) { return Add(&item); }
int Push(OBJ& item) { return Add(&item); }
@@ -229,14 +232,14 @@ template <class OBJ> int GArray<OBJ>::Add(OBJ* item) {
if (fUnique) return -1; //cannot add a duplicate!
//Found sets result to the position where the item should be!
GVec<OBJ>::Insert(result, *item);
- }
+ }
else {
- if (fUnique && Found(*item,result)) return -1; //set behaviour
+ if (fUnique && Found(*item, result)) return -1; //set behaviour
result = this->fCount;
if (result==this->fCapacity) GVec<OBJ>::Grow();
this->fArray[result] = *item; //operator=, copies the item
this->fCount++;
- }
+ }
return result;
}
@@ -255,6 +258,29 @@ template <class OBJ> void GArray<OBJ>::Add(GArray<OBJ>& list) {
}
}
+//returns -1 if existing equal object exists, sets fidx to that equal item index
+//or returns the index where the item was added/inserted
+template <class OBJ> int GArray<OBJ>::AddIfNew(OBJ& item,
+ int* fidx) {
+ int rpos;
+ if (Found(item, rpos)) {
+ if (fidx) *fidx=rpos; //the position where the item should be inserted:
+ return -1; //found and not added
+ }
+ //not found, let's insert it
+ if (SORTED) {
+ //Found() set result to the position where the item should be inserted
+ GVec<OBJ>::Insert(rpos, item);
+ } else { //simply append
+ rpos = this->fCount;
+ if (rpos==this->fCapacity) GVec<OBJ>::Grow();
+ this->fArray[rpos] = item; //operator= copies the item
+ this->fCount++;
+ }
+ if (fidx!=NULL) *fidx=rpos;
+ return rpos;
+}
+
template <class OBJ> bool GArray<OBJ>::Found(OBJ& item, int& idx) {
//search the list by using fCompareProc (if defined)
//or == operator for a non-sortable list
@@ -262,7 +288,7 @@ template <class OBJ> bool GArray<OBJ>::Found(OBJ& item, int& idx) {
//set to the closest matching object!
int i;
idx=-1;
- if (this->fCount==0) { idx=0;return false;}
+ if (this->fCount==0) { idx=0; return false;}
if (SORTED) { //binary search based on fCompareProc
//do the simplest tests first:
if ((*fCompareProc)(&(this->fArray[0]),&item)>0) {
=====================================
GVec.hh
=====================================
@@ -111,13 +111,10 @@ template <class OBJ> class GVec {
//this will reject identical items in sorted lists only!
void setCapacity(int NewCapacity);
int Count() { return fCount; }
-
void setCount(int NewCount); // will trim or expand the array as needed
- void setCount(int NewCount, OBJ* v); //same as setCount() but new objects are set to v
void setCount(int NewCount, OBJ v);
void Resize(int NewCount) { setCount(NewCount); }
- //void Resize(int NewCount, OBJ* v) { setCount(NewCount, v); }
- void Resize(int NewCount, OBJ v) { setCount(NewCount, &v); }
+ void Resize(int NewCount, OBJ v) { setCount(NewCount, v); }
//void Move(int curidx, int newidx);
bool isEmpty() { return fCount==0; }
@@ -290,13 +287,15 @@ template <class OBJ> void GVec<OBJ>::setCapacity(int NewCapacity) {
else {
if (IsPrimitiveType<OBJ>::VAL) {
GREALLOC(fArray, NewCapacity*sizeof(OBJ));
+ //also zero init the new items?
+ memset(fArray+fCount, 0, (NewCapacity-fCount)*sizeof(OBJ));
} else {
OBJ* oldArray=fArray;
//fArray=new OBJ[NewCapacity]();
fArray=new OBJ[NewCapacity];
for (int i=0;i<this->fCount;i++) {
fArray[i] = oldArray[i];
- }// we need operator= here
+ }// we need operator= here
//wouldn't be faster to use memcpy instead?
//memcpy(fArray, oldArray, fCount*sizeof(OBJ));
if (oldArray) delete[] oldArray;
@@ -355,16 +354,16 @@ template <class OBJ> void GVec<OBJ>::Grow(int idx, OBJ& item) {
newList[idx]=item;
//copy data after idx
memmove(&newList[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
- //..shouldn't do this:
+ //..shouldn't do this (zero init new unused items in the array)
memset(&newList[fCount+1], 0, (NewCapacity-fCount-1)*sizeof(OBJ));
//data copied:
GFREE(fArray);
} else {
- newList=new OBJ[NewCapacity]; //]()
+ newList=new OBJ[NewCapacity];
// operator= required!
for (int i=0;i<idx;i++) {
newList[i]=fArray[i];
- }
+ }
newList[idx]=item;
//copy data after idx
//memmove(&newList[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
@@ -499,9 +498,12 @@ template <class OBJ> void GVec<OBJ>::setCount(int NewCount) {
GError(GVEC_COUNT_ERR, NewCount);
//if (NewCount > fCapacity) setCapacity(NewCount);
while(NewCount > fCapacity) Grow();
+ if (IsPrimitiveType<OBJ>::VAL && NewCount>fCount) {
+ memset(fArray+fCount, 0, (NewCount-fCount)*sizeof(OBJ));
+ }
fCount = NewCount; //new items will be populated by the default object constructor(!)
}
-
+/*
template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ* v) {
if (NewCount<0 || NewCount > MAXLISTSIZE)
GError(GVEC_COUNT_ERR, NewCount);
@@ -512,7 +514,7 @@ template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ* v) {
}
fCount = NewCount;
}
-
+*/
template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ v) {
if (NewCount<0 || NewCount > MAXLISTSIZE)
GError(GVEC_COUNT_ERR, NewCount);
@@ -524,7 +526,6 @@ template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ v) {
fCount = NewCount;
}
-
template <class OBJ> void GVec<OBJ>::qSort(int l, int r, GCompareProc* cmpFunc) {
int i, j;
OBJ p,t;
=====================================
Makefile
=====================================
@@ -47,17 +47,17 @@ else
EXE =
endif
-CC := g++
+CC := ${CXX}
BASEFLAGS := -Wall -Wextra ${INCDIRS} $(MARCH) \
-D_REENTRANT -fno-exceptions -fno-rtti
-GCCVER5 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 5)
+GCCVER5 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 5)
ifeq "$(GCCVER5)" "1"
BASEFLAGS += -Wno-implicit-fallthrough
endif
-GCCVER8 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 8)
+GCCVER8 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 8)
ifeq "$(GCCVER8)" "1"
BASEFLAGS += -Wno-class-memaccess
endif
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+libgclib (0.11.10-1) unstable; urgency=medium
+
+ * New upstream version
+
+ -- Steffen Moeller <moeller at debian.org> Sat, 08 Aug 2020 22:56:32 +0200
+
libgclib (0.11.9-2) unstable; urgency=medium
* Bump soname. Closes: #962550
=====================================
debian/libgclib2.symbols
=====================================
@@ -25,7 +25,8 @@ libgclib.so.2 libgclib2 #MINVER#
_Z10g2bit2baseh at Base 0.11.4
_Z10gcdb_allocj at Base 0.11.4
_Z10getFileExtPKc at Base 0.11.4
- _Z10getOvlCodeR6GffObjS0_Rib at Base 0.11.4
+ _Z10getOvlCodeR6GffObjS0_Rib at Base 0.11.10-1
+ _Z10getOvlCodeR6GffObjS0_Ribi at Base 0.11.10-1
_Z10parseFloatRPcRf at Base 0.11.4
_Z10replaceStrRPcS_ at Base 0.11.4
_Z10startsWithPKcS0_ at Base 0.11.4
@@ -78,12 +79,15 @@ libgclib.so.2 libgclib2 #MINVER#
_Z14gfo_cmpRefByIDPvS_ at Base 0.11.4
_Z14translateCodonPKc at Base 0.11.4
_Z15printEditScriptP12GXEditScript at Base 0.11.4
- _Z15transcriptMatchR6GffObjS0_Ri at Base 0.11.4
+ _Z15transcriptMatchR6GffObjS0_Ri at Base 0.11.10-1
+ _Z15transcriptMatchR6GffObjS0_Rii at Base 0.11.10-1
_Z15uint32_pack_bigPcj at Base 0.11.4
_Z16BED_addAttributeP8_IO_FILERiPKcz at Base 0.11.4
_Z16DefLTCompareProcI4GSegEiPvS1_ at Base 0.11.4
_Z16gthreads_errExitiPKc at Base 0.11.4
- _Z16singleExonTMatchR6GffObjS0_Ri at Base 0.11.4
+ _Z16singleExonTMatchR6GffObjS0_Ri at Base 0.11.10-1
+ _Z16singleExonTMatchR6GffObjS0_Rii at Base 0.11.10-1
+ _Z15transcriptMatchR6GffObjS0_Rii at Base 0.11.10-1
_Z17GreedyAlignRegionPKciiS0_iiP16CGreedyAlignDataP8CAlnTrimb at Base 0.11.4
_Z17GreedyAlignRegionPKciiS0_iiiiiP16CGreedyAlignDataP8CAlnTrimb at Base 0.11.4
_Z17gcvt_endian_setupv at Base 0.11.4
=====================================
debian/patches/autoconf.patch
=====================================
@@ -2,6 +2,8 @@ Author: Andreas Tille <tille at debian.org>
Last-Update: Thu, 18 Apr 2019 12:54:42 +0200
Descriptions: Build proper libraries via automake
+Index: libgclib/Makefile.am
+===================================================================
--- /dev/null
+++ libgclib/Makefile.am
@@ -0,0 +1,38 @@
@@ -43,6 +45,8 @@ Descriptions: Build proper libraries via automake
+mdtest_SOURCES = mdtest.cpp libgclib.a
+mdtest_LDADD = libgclib.la
+bin_PROGRAMS = mdtest
+Index: libgclib/libgclib.pc.in
+===================================================================
--- /dev/null
+++ libgclib/libgclib.pc.in
@@ -0,0 +1,11 @@
@@ -57,6 +61,8 @@ Descriptions: Build proper libraries via automake
+Description: Genomic C(++) library of reusable code for bioinformatics projects
+Libs: -L${libdir} -lgclib
+Cflags: -I${includedir}
+Index: libgclib/configure.ac
+===================================================================
--- /dev/null
+++ libgclib/configure.ac
@@ -0,0 +1,61 @@
@@ -121,123 +127,3 @@ Descriptions: Build proper libraries via automake
+ ])
+AC_OUTPUT
+
---- libgclib.orig/Makefile
-+++ /dev/null
-@@ -1,117 +0,0 @@
--INCDIRS := -I. -I${GDIR} -I${BAM}
--
--CXX := $(if $(CXX),$(CXX),g++)
--
--LINKER := $(if $(LINKER),$(LINKER),g++)
--
--LDFLAGS := $(if $(LDFLAGS),$(LDFLAGS),-g)
--
--LIBS :=
--
--
--# A simple hack to check if we are on Windows or not (i.e. are we using mingw32-make?)
--ifeq ($(findstring mingw32, $(MAKE)), mingw32)
--WINDOWS=1
--endif
--
--# Compiler settings
--TLIBS =
--# Non-windows systems need pthread
--ifndef WINDOWS
--TLIBS += -lpthread
--endif
--
--
--
--DMACH := $(shell ${CXX} -dumpmachine)
--
--# MinGW32 GCC 4.5 link problem fix
--ifdef WINDOWS
--DMACH := windows
--ifeq ($(findstring 4.5.,$(shell g++ -dumpversion)), 4.5.)
--LDFLAGS += -static-libstdc++ -static-libgcc
--endif
--endif
--
--# Misc. system commands
--ifdef WINDOWS
--RM = del /Q
--else
--RM = rm -f
--endif
--
--# File endings
--ifdef WINDOWS
--EXE = .exe
--else
--EXE =
--endif
--
--CC := g++
--
--BASEFLAGS := -Wall -Wextra ${INCDIRS} $(MARCH) \
-- -D_REENTRANT -fno-exceptions -fno-rtti
--
--GCCVER5 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 5)
--ifeq "$(GCCVER5)" "1"
-- BASEFLAGS += -Wno-implicit-fallthrough
--endif
--
--GCCVER8 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 8)
--ifeq "$(GCCVER8)" "1"
-- BASEFLAGS += -Wno-class-memaccess
--endif
--
--#add the link-time optimization flag if gcc version > 4.5
--GCC_VERSION:=$(subst ., ,$(shell gcc -dumpversion))
--GCC_MAJOR:=$(word 1,$(GCC_VERSION))
--GCC_MINOR:=$(word 2,$(GCC_VERSION))
--#GCC_SUB:=$(word 3,$(GCC_VERSION))
--GCC_SUB:=x
--
--GCC45OPTS :=
--GCC45OPTMAIN :=
--
--ifneq (,$(filter %release %nodebug, $(MAKECMDGOALS)))
-- # -- release build
-- CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-g -O3)
-- CXXFLAGS += -DNDEBUG $(BASEFLAGS)
-- ifeq ($(shell expr $(GCC_MAJOR).$(GCC_MINOR) '>=' 4.5),1)
-- CXXFLAGS += -flto
-- GCC45OPTS := -flto
-- GCC45OPTMAIN := -fwhole-program
-- endif
--else
-- # -- debug build
-- CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-g -O0)
-- ifneq (, $(findstring darwin, $(DMACH)))
-- CXXFLAGS += -gdwarf-3
-- endif
-- CXXFLAGS += -DDEBUG -D_DEBUG -DGDEBUG $(BASEFLAGS)
--endif
--
--%.o : %.cpp
-- ${CXX} ${CXXFLAGS} -c $< -o $@
--
--
--.PHONY : all
--all: htest
--#mdtest
--nodebug: all
--release: all
--debug: all
--
--OBJS := GBase.o GStr.o GArgs.o GResUsage.o
--
--version: ; @echo "GCC Version is: "$(GCC_MAJOR)":"$(GCC_MINOR)":"$(GCC_SUB)
-- @echo "> GCC Opt. string is: "$(GCC45OPTS)
--htest: $(OBJS) htest.o GHash.hh
-- ${LINKER} ${LDFLAGS} $(GCC45OPTS) $(GCC45OPTMAIN) -o $@ ${filter-out %.a %.so, $^} ${LIBS}
--mdtest: $(OBJS) mdtest.o
-- ${LINKER} ${LDFLAGS} $(GCC45OPTS) $(GCC45OPTMAIN) -o $@ ${filter-out %.a %.so, $^} ${LIBS}
--# target for removing all object files
--
--.PHONY : clean
--clean::
-- @${RM} $(OBJS) *.o mdtest$(EXE) htest$(EXE)
-- @${RM} core.*
=====================================
gff.cpp
=====================================
@@ -2912,12 +2912,12 @@ void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
}
const char* gseqname=names->gseqs.Get(gseq_id)->name;
bool gff3 = (gffp>=pgffAny && gffp<=pgffTLF);
- bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
- bool showExon = (gffp<=pgtfExon || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
+ bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgtfBoth || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
+ bool showExon = (gffp<=pgtfExon || gffp==pgtfBoth || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
//if (gscore>0.0) sprintf(dbuf,"%.2f", gscore);
// else strcpy(dbuf,".");
gscore.sprint(dbuf);
- if (gffp<=pgtfCDS && gffp>=pgtfAny) { //GTF output
+ if (gffp<=pgtfBoth && gffp>=pgtfAny) { //GTF output
fprintf(fout,
"%s\t%s\ttranscript\t%d\t%d\t%s\t%c\t.\ttranscript_id \"%s\"",
gseqname, tlabel, start, end, dbuf, strand, gffID);
@@ -3026,7 +3026,7 @@ void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
fprintf(fout,"\n");
}// gff3 transcript line
if (gffp==pgffTLF) return;
- bool is_cds_only = (gffp==pgffBoth) ? false : isCDSOnly();
+ bool is_cds_only = (gffp==pgffBoth || gffp==pgtfBoth) ? false : isCDSOnly();
if (showExon) {
//print exons
for (int i=0;i<exons.Count();i++) {
@@ -3039,7 +3039,7 @@ void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
for (int i=0;i<cds.Count();i++) {
printGxfExon(fout, tlabel, gseqname, true, &(cds[i]), gff3, cvtChars, dbuf, DBUF_LEN);
}
- } //showCDS
+ } //printing CDSs
}
void GffObj::updateCDSPhase(GList<GffExon>& segs) {
@@ -3124,15 +3124,16 @@ void GffObj::getCDSegs(GVec<GffExon>& cds) {
//-- transcript match/overlap classification functions
-char transcriptMatch(GffObj& a, GffObj& b, int& ovlen) {
- //return '=' if exact exon match, '~' if intron-chain match (or 80% overlap for single-exon)
+char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange) {
+ //return '=' if exact exon match or transcripts ends are within tdelta distance
+ // '~' if intron-chain match (or 80% overlap, for single-exon)
// or 0 otherwise
int imax=a.exons.Count()-1;
int jmax=b.exons.Count()-1;
ovlen=0;
if (imax!=jmax) return false; //different number of exons, cannot match
if (imax==0) //single-exon mRNAs
- return (singleExonTMatch(a,b,ovlen));
+ return (singleExonTMatch(a,b,ovlen, trange));
if ( a.exons[imax]->start<b.exons[0]->end ||
b.exons[jmax]->start<a.exons[0]->end )
return 0; //intron chains do not overlap at all
@@ -3146,16 +3147,17 @@ char transcriptMatch(GffObj& a, GffObj& b, int& ovlen) {
return 0; //intron mismatch
}
}
- //--- full intron chain match:
- if (a.exons[0]->start==b.exons[0]->start &&
- a.exons.Last()->end==b.exons.Last()->end)
+ //--- full intron chain match
+ //check if it's an exact
+ if (abs((int)a.exons[0]->start-(int)b.exons[0]->start)<=trange &&
+ abs((int)a.exons.Last()->end-(int)b.exons.Last()->end)<=trange)
return '=';
return '~';
}
-
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen) {
- //return '=' if exact match, '~' if the overlap is >=80% of the longer sequence length
+char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange) {
+ //return '=' if boundaries match within tdelta distance,
+ // or '~' if the overlap is >=80% of the longer sequence length
// return 0 if there is no overlap
GSeg mseg(m.start, m.end);
ovlen=mseg.overlapLen(r.start,r.end);
@@ -3165,7 +3167,9 @@ char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen) {
// *OR* in case of reverse containment (reference contained in m)
// it's also considered "matching" if the overlap is at least 80% of
// the reference len AND at least 70% of the query len
- if (m.start==r.start && m.end==r.end) return '=';
+ if (abs((int)m.start-(int)r.start)<=trange
+ && abs((int)m.end-(int)r.end)<=trange)
+ return '=';
if (m.covlen>r.covlen) {
if ( (ovlen >= m.covlen*0.8) ||
(ovlen >= r.covlen*0.8 && ovlen >= m.covlen* 0.7 ) )
@@ -3178,19 +3182,18 @@ char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen) {
}
//formerly in gffcompare
-char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
+char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool stricterMatch, int trange) {
ovlen=0; //total actual exonic overlap
if (!m.overlap(r.start,r.end)) return 0;
int jmax=r.exons.Count()-1;
- //int iovlen=0; //total m.exons overlap with ref introns
char rcode=0;
if (m.exons.Count()==1) { //single-exon transfrag
GSeg mseg(m.start, m.end);
if (jmax==0) { //also single-exon ref
//ovlen=mseg.overlapLen(r.start,r.end);
char eqcode=0;
- if ((eqcode=singleExonTMatch(m, r, ovlen))>0) {
- if (strictMatch) return eqcode;
+ if ((eqcode=singleExonTMatch(m, r, ovlen, trange))>0) {
+ if (stricterMatch) return eqcode;
else return '=';
}
if (m.covlen<r.covlen)
@@ -3337,27 +3340,6 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
ichain_match=false;
if (mend>rend) j++; else i++;
} //while checking intron overlaps
- /*** additional checking needed for intron retention when there is no ichain_match or overlap ?
- if (!intron_retention && r_last_iovl<jmax) {
- //-- check the remaining ref introns not checked yet for retention
- int i=q_last_iovl;
- for (int j=r_last_iovl+1;j<=jmax && i<=imax;++j) {
- uint rstart=r.exons[j-1]->end; //ref intron start-end
- uint rend=r.exons[j]->start;
- if (rend<m.exons[i]->start) {
- i++;
- continue;
- }
- if (rstart>m.exons[i]->end)
- continue;
- //overlap between ref intron and m.exons[i]
- if (rstart>=m.exons[i]->start && rend<=m.exons[i]->end) {
- intron_retention=true;
- break;
- }
- }
- }
- ***/
// --- when qry intron chain is contained within ref intron chain
// qry terminal exons may poke (overhang) into ref's other introns
int l_iovh=0; // overhang of q left boundary beyond the end of ref intron on the left
@@ -3371,9 +3353,9 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
if (ichain_match) { //intron (sub-)chain compatible so far (but there could still be conflicts)
if (imfirst==1 && imlast==imax) { // qry full intron chain match
if (jmfirst==1 && jmlast==jmax) {//identical intron chains
- if (strictMatch) return (r.exons[0]->start==m.exons[0]->start &&
- r.exons.Last()->end && m.exons.Last()->end) ? '=' : '~';
- else return '=';
+ if (stricterMatch) return (abs((int)r.exons[0]->start-(int)m.exons[0]->start)<=trange &&
+ abs((int)r.exons.Last()->end-(int)m.exons.Last()->end)<=trange) ? '=' : '~';
+ return '=';
}
// -- a partial intron chain match
if (jmfirst>1) {
@@ -3432,8 +3414,9 @@ char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch) {
// m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
//if (ref_intron_poking>0 && )
//we just need to have no intron poking going on
- if (!intron_conflict && ref_intron_poking<4 && qry_intron_poking<4) return 'm';
- else return 'n';
+ if (!intron_conflict && ref_intron_poking<4
+ && qry_intron_poking<4) return 'm';
+ return 'n';
}
if (junct_match) return 'j';
//we could have 'o' or 'y' here
=====================================
gff.h
=====================================
@@ -58,11 +58,12 @@ class GffObj;
//---transcript overlapping - utility functions:
int classcode_rank(char c); //returns priority value for class codes
-char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool strictMatch=false); //returns: class code
+char getOvlCode(GffObj& m, GffObj& r, int& ovlen, bool stricterMatch=false, int trange=0); //returns: class code
-char transcriptMatch(GffObj& a, GffObj& b, int& ovlen); //generic transcript match test
+char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange=0); //generic transcript match test
// -- return '=', '~' or 0
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen); //single-exon transcript match test
+char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange=0);
+//single-exon transcript match test - returning '=', '~' or 0
//---
// -- tracking exon/CDS segments from local mRNA to genome coordinates
@@ -484,8 +485,9 @@ void gffnames_unref(GffNames* &n);
enum GffPrintMode {
pgtfAny, //print record as read, if GTF
- pgtfExon, //print exon only features
- pgtfCDS, //print CDS and exon features
+ pgtfExon, //print only exon features (CDS converted to exon if exons are missing)
+ pgtfCDS, //print only CDS features
+ pgtfBoth, //print both CDS and exon features
pgffAny, //print record as read (if isCDSonly() prints only CDS)
pgffExon,
pgffCDS,
=====================================
htest.cpp
=====================================
@@ -4,6 +4,11 @@
#include "GVec.hh"
#include "GHash.hh"
#include "GResUsage.h"
+#include <cstdint>
+#include <iostream>
+#include <string>
+//#include <tsl/hopscotch_map.h>
+//#include <tsl/hopscotch_set.h>
#define USAGE "Usage:\n\
htest textfile.. \n\
@@ -19,7 +24,7 @@ struct HStrData {
HStrData(char* s=NULL, int c=0):cmd(c), str(s) { }
};
-int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs) {
+int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int toLoad=0) {
int num=0;
GLineReader lr(f);
char* line=NULL;
@@ -41,15 +46,60 @@ int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs) {
line[len-3]=0;
strgs.Add(new HStrData(line));
num++;
+ if (toLoad && num>=toLoad) break;
} //while line
return num;
}
+void showTimings(GResUsage swatch) {
+ char *wtime=commaprintnum((uint64_t)swatch.elapsed());
+ char *utime=commaprintnum((uint64_t)swatch.u_elapsed());
+ char *stime=commaprintnum((uint64_t)swatch.s_elapsed());
+ GMessage("Elapsed time (microseconds): %12s us\n", wtime);
+ GMessage(" user time: %12s us\n", utime);
+ GMessage(" system time: %12s us\n", stime);
+ GFREE(wtime);GFREE(utime);GFREE(stime);
+}
+
+
+void run_GHash(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ GHash<int> ghash;
+ int num_add=0, num_rm=0, num_clr=0;
+ GMessage("----------------- %s ----------------\n", label);
+ ghash.Clear();
+ swatch.start();
+ for (int i=0;i<hstrs.Count();i++) {
+ switch (hstrs[i]->cmd) {
+ case 0:ghash.fAdd(hstrs[i]->str.chars(), new int(1)); num_add++; break;
+ case 1:ghash.Remove(hstrs[i]->str.chars()); num_rm++; break;
+ case 2:ghash.Clear(); num_clr++; break;
+ }
+ }
+ ghash.Clear();
+ swatch.stop();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+/*
+void run_Hopscotch(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ tsl::hopscotch_map<std::string, int> hsmap;
+ GMessage("----------------- %s ----------------\n", label);
+ swatch.start();
+ for (int i=0;i<hstrs.Count();i++) {
+ switch (hstrs[i]->cmd) {
+ case 0:hsmap.insert({hstrs[i]->str.chars(), 1}); num_add++; break;
+ case 1:hsmap.erase(hstrs[i]->str.chars()); num_rm++; break;
+ case 2:hsmap.clear(); num_clr++; break;
+ }
+ }
+ hsmap.clear();
+ swatch.stop();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+*/
int main(int argc, char* argv[]) {
GPVec<HStrData> strs;
GPVec<HStrData> sufstrs;
- GHash<int> thash;
- GHash<int> sufthash;
strs.setFreeItem(strFreeProc);
sufstrs.setFreeItem(strFreeProc);
//GArgs args(argc, argv, "hg:c:s:t:o:p:help;genomic-fasta=COV=PID=seq=out=disable-flag;test=");
@@ -59,46 +109,39 @@ int main(int argc, char* argv[]) {
args.printError(USAGE, true);
if (args.getOpt('h') || args.getOpt("help")) GMessage(USAGE);
int numargs=args.startNonOpt();
- if (numargs>0) {
- char* a=NULL;
- FILE* f=NULL;
- int total=0;
- while ((a=args.nextNonOpt())) {
- f=fopen(a, "r");
- if (f==NULL) GError("Error: could not open file %s !\n", a);
- int num=loadStrings(f, sufstrs, strs);
- total+=num;
- }
+ const char* a=NULL;
+ FILE* f=NULL;
+ int total=0;
+
+ if (numargs==0) {
+ a="htest_data.lst";
+ f=fopen(a, "r");
+ if (f==NULL) GError("Error: could not open file %s !\n", a);
+ int num=loadStrings(f, sufstrs, strs, 600000);
+ total+=num;
+ GMessage("..loaded %d strings from file %s\n", total, a);
+ }
+ else {
+ while ((a=args.nextNonOpt())) {
+ f=fopen(a, "r");
+ if (f==NULL) GError("Error: could not open file %s !\n", a);
+ int num=loadStrings(f, sufstrs, strs, 600000);
+ total+=num;
+ }
+ }
GResUsage swatch;
- //timing starts here
- GMessage("----------------- loading no-suffix strings ----------------\n");
- swatch.start();
- for (int i=0;i<strs.Count();i++) {
- switch (strs[i]->cmd) {
- case 0:thash.fAdd(strs[i]->str.chars(), new int(1)); break;
- case 1:thash.Remove(strs[i]->str.chars()); break;
- case 2:thash.Clear(); break;
- }
- }
- swatch.stop();
- GMessage("Elapsed time (microseconds): %.0f us\n", swatch.elapsed());
- GMessage(" user time: %.0f us\n", swatch.u_elapsed());
- GMessage(" system time: %.0f us\n", swatch.s_elapsed());
- // timing here
- GMessage("----------------- loading suffix strings ----------------\n");
- swatch.start();
- for (int i=0;i<sufstrs.Count();i++) {
- switch (sufstrs[i]->cmd) {
- case 0:sufthash.fAdd(sufstrs[i]->str.chars(), new int(1)); break;
- case 1:sufthash.Remove(sufstrs[i]->str.chars()); break;
- case 2:sufthash.Clear(); break;
- }
- }
- swatch.stop();
- GMessage("Elapsed time (microseconds): %.0f us\n", swatch.elapsed());
- GMessage(" user time: %.0f us\n", swatch.u_elapsed());
- GMessage(" system time: %.0f us\n", swatch.s_elapsed());
- }
+ run_GHash(swatch, strs, "GHash no suffix");
+ showTimings(swatch);
+
+ run_GHash(swatch, sufstrs, "GHash w/ suffix");
+ showTimings(swatch);
+ /*
+ run_Hopscotch(swatch, strs, "hopscotch no suffix");
+ showTimings(swatch);
+
+ run_Hopscotch(swatch, sufstrs, "hopscotch w/ suffix");
+ showTimings(swatch);
+ */
}
=====================================
mdtest.cpp
=====================================
=====================================
tag_git.sh
=====================================
@@ -3,5 +3,12 @@ git checkout master
ver=$(fgrep '#define GCLIB_VERSION ' GBase.h)
ver=${ver#*\"}
ver=${ver%%\"*}
+git fetch --tags
+if [[ "$1" == "delete" || "$1" == "del" ]]; then
+ echo "Deleting tag v$ver .."
+ git tag -d v$ver
+ git push origin :refs/tags/v$ver
+ exit
+fi
git tag -a "v$ver" -m "release $ver"
git push --tags
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/compare/4d3c01db3d9b5cff05c876f6ca56a6171fef3a91...4edf66b9d830bf5cdc09295ca48e66ed66be1344
--
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/compare/4d3c01db3d9b5cff05c876f6ca56a6171fef3a91...4edf66b9d830bf5cdc09295ca48e66ed66be1344
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200808/403f9c7a/attachment-0001.html>
More information about the debian-med-commit
mailing list