[med-svn] [Git][med-team/libgclib][upstream] New upstream version 0.12.8+ds
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Thu Nov 27 21:41:49 GMT 2025
Étienne Mollier pushed to branch upstream at Debian Med / libgclib
Commits:
d83ce938 by Étienne Mollier at 2025-11-27T22:41:06+01:00
New upstream version 0.12.8+ds
- - - - -
8 changed files:
- GBase.cpp
- GBase.h
- GBitVec.h
- GFaSeqGet.h
- GList.hh
- GVec.hh
- gff.cpp
- gff.h
Changes:
=====================================
GBase.cpp
=====================================
@@ -46,7 +46,7 @@ void GError(const char* format,...){
va_end(arguments);
#ifdef DEBUG
// comment this if you do NOT want a core dump
- abort();
+ //abort();
#endif
#endif
exit(1);
@@ -119,6 +119,13 @@ int G_mkdir(const char* path, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH
#endif
}
+int Grmdir(const char *path) {
+#ifdef _WIN32
+ return !RemoveDirectoryA(path);
+#else
+ return rmdir(path);
+#endif
+}
void Gmktempdir(char* templ) {
#ifdef _WIN32
@@ -840,8 +847,9 @@ int fileExists(const char* fname) {
int64 fileSize(const char* fpath) {
#ifdef _WIN32
WIN32_FILE_ATTRIBUTE_DATA fad;
+
if (!GetFileAttributesEx(fpath, GetFileExInfoStandard, &fad))
- return -1; // error condition, could call GetLastError to find out more
+ return -1; // error condition, could call GetLastError to find out more
LARGE_INTEGER size;
size.HighPart = fad.nFileSizeHigh;
size.LowPart = fad.nFileSizeLow;
=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
#ifndef G_BASE_DEFINED
#define G_BASE_DEFINED
-#define GCLIB_VERSION "0.12.7"
+#define GCLIB_VERSION "0.12.8"
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -197,6 +197,7 @@ inline int iround(double x) {
char* Grealpath(const char *path, char *resolved_path);
int Gmkdir(const char *path, bool recursive=true, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH));
+int Grmdir(const char *path);
void Gmktempdir(char* templ);
@@ -249,9 +250,10 @@ inline void GFree(pointer* ptr){
inline bool GMalloc(pointer* ptr,unsigned long size){
//GASSERT(ptr);
+ *ptr=nullptr;
if (size!=0)
*ptr=malloc(size);
- return *ptr!=NULL;
+ return *ptr!=nullptr;
}
// Allocate 0-filled memory
@@ -415,14 +417,18 @@ struct GSeg {
return (r->end<end)? r->end-start+1 : end-start+1;
}
}
- int overlapLen(uint rstart, uint rend) {
+ // refstart = overlap start in ref coordinate space (1-based)
+ int overlapLen(uint rstart, uint rend, int* refstart=NULL) {
if (rstart>rend) { Gswap(rstart,rend); }
+ if (refstart) *refstart=0;
if (start<rstart) {
if (rstart>end) return 0;
+ if (refstart) *refstart=1;
return (rend>end) ? end-rstart+1 : rend-rstart+1;
}
else { //rstart<=start
if (start>rend) return 0;
+ if (refstart) *refstart=(start-rstart)+1;
return (rend<end)? rend-start+1 : end-start+1;
}
}
=====================================
GBitVec.h
=====================================
@@ -109,8 +109,8 @@ public:
/// GBitVec default ctor - Creates an empty GBitVec.
- GBitVec() : Size(0), Capacity(0) {
- fBits = 0;
+ GBitVec() : fBits(0), Size(0), Capacity(0) {
+
}
/// GBitVec ctor - Creates a GBitVec of specified number of bits. All
@@ -128,13 +128,20 @@ public:
if (value)
clear_unused_bits();
}
+
unsigned long getMemorySize() const {
unsigned long r = ((unsigned long) Capacity) * sizeof(BitWord);
return r;
}
+ //move constructor
+ GBitVec(GBitVec&& o):fBits(o.fBits), Size(o.Size), Capacity(o.Capacity)
+ {
+ o.fBits=nullptr;
+ o.Size=0;o.Capacity=0;
+ }
GBitVec(const GBitVec* RHS) {
- if (RHS==NULL) {
+ if (RHS==NULL || RHS->size()==0) {
Size = 0;
fBits = 0;
Capacity = 0;
@@ -142,6 +149,7 @@ public:
}
Capacity = NumBitWords(RHS->size());
GMALLOC(fBits, Capacity * sizeof(BitWord));
+ Size = RHS->size();
memcpy(fBits, RHS->fBits, Capacity * sizeof(BitWord));
}
=====================================
GFaSeqGet.h
=====================================
@@ -178,7 +178,7 @@ class GFastaDb {
//GMessage("creating GFastaIndex with fastaPath=%s, fainame=%s\n", fastaPath, fainame.chars());
faIdx=new GFastaIndex(fastaPath, fainame);
char* fainamecwd=fainame; //will hold just the file name without the path
- char* plast=strrchr(fainamecwd, '/'); //CHPATHSEP
+ char* plast=strrchr(fainamecwd, '/');
if (plast!=NULL) {
fainamecwd=plast+1; //point to the file name only
}
=====================================
GList.hh
=====================================
@@ -11,11 +11,11 @@ Sortable collections of objects and object pointers
#define GLIST_UNSORTED_ERR "Operation not allowed on an unsorted list!\n"
//------ useful macros:
-#define BE_UNSORTED if (fCompareProc!=NULL) { GError(GLIST_SORTED_ERR); return; }
-#define BE_SORTED if (fCompareProc==NULL) { GError(GLIST_UNSORTED_ERR); return; }
+#define BE_UNSORTED if (fCompareProc!=nullptr) { GError(GLIST_SORTED_ERR); return; }
+#define BE_SORTED if (fCompareProc==nullptr) { GError(GLIST_UNSORTED_ERR); return; }
-#define SORTED (fCompareProc!=NULL)
-#define UNSORTED (fCompareProc==NULL)
+#define SORTED (fCompareProc!=nullptr)
+#define UNSORTED (fCompareProc==nullptr)
// GArray is the sortable array type, requires the comparison operator < to be defined
template <class OBJ> class GArray:public GVec<OBJ> {
@@ -29,7 +29,7 @@ template <class OBJ> class GArray:public GVec<OBJ> {
}
GCompareProc* fCompareProc;
public:
- GArray(GCompareProc* cmpFunc=NULL);
+ GArray(GCompareProc* cmpFunc=nullptr);
GArray(bool sorted, bool unique=false);
GArray(int init_capacity, bool sorted, bool unique=false);
GArray(const GArray<OBJ>& array); //copy constructor
@@ -44,13 +44,13 @@ template <class OBJ> class GArray:public GVec<OBJ> {
Sort();
}
}
- else fCompareProc=NULL;
+ else fCompareProc=nullptr;
}
- //sort the array if cmpFunc not NULL or changes
+ //sort the array if cmpFunc not nullptr or changes
int Add(OBJ* item); // specific implementation if sorted
int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it
// using OBJ new operator=
- int AddIfNew(OBJ& item, int* fidx=NULL); //requires == operator
+ int AddIfNew(OBJ& item, int* fidx=nullptr); //requires == operator
//if equal item not found, item is added and return the index of it
//otherwise returns -1 and fidx is set to the equal item location
int cAdd(OBJ item) { return Add(&item); }
@@ -61,7 +61,7 @@ template <class OBJ> class GArray:public GVec<OBJ> {
//this will reject identical items in sorted lists only!
void setUnique(bool beUnique) { fUnique = beUnique; };
void Sort(); //explicit sort may be requested
- bool Sorted() { return fCompareProc!=NULL; }
+ bool Sorted() { return fCompareProc!=nullptr; }
void Replace(int idx, OBJ& item); //Put, use operator= to copy
int Unique() { return fUnique; }
int IndexOf(OBJ& item);
@@ -91,7 +91,7 @@ template <class OBJ> class GList:public GPVec<OBJ> {
public:
void sortInsert(int idx, OBJ* item); //special insert in sorted lists
//WARNING: the caller must know the insert index such that the sort order is preserved!
- GList(GCompareProc* compareProc=NULL); //free by default
+ GList(GCompareProc* compareProc=nullptr); //free by default
GList(GCompareProc* compareProc, //unsorted by default
GFreeProc *freeProc,
bool beUnique=false);
@@ -105,8 +105,8 @@ template <class OBJ> class GList:public GPVec<OBJ> {
//void Clear();
//~GList();
void setSorted(GCompareProc* compareProc);
- //sorted if compareProc not NULL; sort the list if compareProc changes !
- bool Sorted() { return fCompareProc!=NULL; }
+ //sorted if compareProc not nullptr; sort the list if compareProc changes !
+ bool Sorted() { return fCompareProc!=nullptr; }
void setSorted(bool sorted) {
if (sorted) {
if (fCompareProc!=&DefaultCompareProc) {
@@ -114,12 +114,12 @@ template <class OBJ> class GList:public GPVec<OBJ> {
Sort();
}
}
- else fCompareProc=NULL;
+ else fCompareProc=nullptr;
}
int Add(OBJ* item); //-- specific implementation if sorted - may become an Insert()
void Add(GList<OBJ>& list); //add all pointers from another list
- OBJ* AddIfNew(OBJ* item, bool deleteIfFound=true, int* fidx=NULL);
+ OBJ* AddIfNew(OBJ* item, bool deleteIfFound=true, int* fidx=nullptr);
// default: delete item if Found() (and pointers are not equal)!
//returns the equal (==) object if it's in the list already
//or the item itself if it is unique and actually added
@@ -159,7 +159,7 @@ template <class OBJ> class GList:public GPVec<OBJ> {
template <class OBJ> GArray<OBJ>::GArray(const GArray<OBJ>& array):GVec<OBJ>(0) { //copy constructor
this->fCount=array.fCount;
this->fCapacity=array.fCapacity;
- this->fArray=NULL;
+ this->fArray=nullptr;
if (this->fCapacity>0) {
this->fArray=new OBJ[this->fCapacity];
}
@@ -196,19 +196,19 @@ template <class OBJ> GArray<OBJ>::GArray(GCompareProc* cmpFunc):GVec<OBJ>(0) {
template <class OBJ> GArray<OBJ>::GArray(bool sorted, bool unique):GVec<OBJ>(0) {
fUnique=unique;
- fCompareProc = sorted ? DefaultCompareProc : NULL;
+ fCompareProc = sorted ? DefaultCompareProc : nullptr;
}
template <class OBJ> GArray<OBJ>::GArray(int init_capacity,
bool sorted, bool unique):GVec<OBJ>(init_capacity) {
fUnique=unique;
- fCompareProc=sorted ? DefaultCompareProc : NULL;
+ fCompareProc=sorted ? DefaultCompareProc : nullptr;
}
template <class OBJ> void GArray<OBJ>::setSorted(GCompareProc* cmpFunc) {
GCompareProc* old_proc=fCompareProc;
fCompareProc=cmpFunc;
- if (fCompareProc!=old_proc && fCompareProc!=NULL)
+ if (fCompareProc!=old_proc && fCompareProc!=nullptr)
Sort(); //new compare method
}
@@ -226,7 +226,7 @@ template <class OBJ> bool GArray<OBJ>::Exists(OBJ& item) {
template <class OBJ> int GArray<OBJ>::Add(OBJ* item) {
- if (item==NULL) return -1;
+ if (item==nullptr) return -1;
int result;
if (SORTED) {
if (Found(*item, result))
@@ -278,7 +278,7 @@ template <class OBJ> int GArray<OBJ>::AddIfNew(OBJ& item,
this->fArray[rpos] = item; //operator= copies the item
this->fCount++;
}
- if (fidx!=NULL) *fidx=rpos;
+ if (fidx!=nullptr) *fidx=rpos;
return rpos;
}
@@ -358,8 +358,8 @@ template <class OBJ> void GArray<OBJ>::Replace(int idx, OBJ& item) {
}
template <class OBJ> void GArray<OBJ>::Sort() {
- if (fCompareProc==NULL) { fCompareProc=DefaultCompareProc; }
- if (this->fArray!=NULL && this->fCount>0)
+ if (fCompareProc==nullptr) { fCompareProc=DefaultCompareProc; }
+ if (this->fArray!=nullptr && this->fCount>0)
this->qSort(0, this->fCount-1, fCompareProc);
}
@@ -371,9 +371,11 @@ template <class OBJ> GList<OBJ>::GList(const GList<OBJ>& list):GPVec<OBJ>(list)
fCompareProc=list.fCompareProc;
}
-template <class OBJ> GList<OBJ>::GList(GList<OBJ>&& list):GPVec<OBJ>(list) { //move constructor
- fUnique=list.fUnique;
- fCompareProc=list.fCompareProc;
+//move constructor
+template <class OBJ> GList<OBJ>::GList(GList<OBJ>&& other) noexcept:
+ GPVec<OBJ>(std::move(other)), fUnique(other.fUnique), fCompareProc(other.fCompareProc){
+ //other.fUnique = false;
+ //other.fCompareProc = nullptr;
}
@@ -410,19 +412,19 @@ template <class OBJ> GList<OBJ>::GList(bool sorted,
}
else {
fCompareProc=&DefaultCompareProc;
- this->fFreeProc=NULL;
+ this->fFreeProc=nullptr;
fUnique=beUnique;
}
}
else {
if (free_elements) {
- fCompareProc=NULL;
+ fCompareProc=nullptr;
this->fFreeProc=GPVec<OBJ>::DefaultFreeProc;
fUnique=beUnique;
}
else {
- fCompareProc=NULL;
- this->fFreeProc=NULL;
+ fCompareProc=nullptr;
+ this->fFreeProc=nullptr;
fUnique=beUnique;
}
}
@@ -436,7 +438,7 @@ template <class OBJ> GList<OBJ>::GList(int init_capacity, bool sorted,
fUnique=beUnique;
}
else {
- fCompareProc=NULL;
+ fCompareProc=nullptr;
fUnique=beUnique;
}
}
@@ -453,16 +455,14 @@ template <class OBJ> GList<OBJ>& GList<OBJ>::operator=(GList& list) {
return *this;
}
+// move operator
template <class OBJ> GList<OBJ>& GList<OBJ>::operator=(GList&& list) {
if (&list!=this) {
- GPVec<OBJ>::Clear();
+ GPVec<OBJ>::operator=(std::move(list));
fCompareProc=list.fCompareProc;
- this->fCount=list.fCount;
- this->fFreeProc=list.fFreeProc;
- this->fList=list.fList;
- list.fList=NULL;
- list.fCount=0;
- list.fCapacity=0;
+ fUnique=list.fUnique;
+ //list.fUnique=false;
+ //list.fCompareProc=nullptr;
}
return *this;
}
@@ -470,7 +470,7 @@ template <class OBJ> GList<OBJ>& GList<OBJ>::operator=(GList&& list) {
template <class OBJ> void GList<OBJ>::setSorted(GCompareProc* compareProc) {
GCompareProc* old_proc=fCompareProc;
fCompareProc=compareProc;
- if (fCompareProc!=old_proc && fCompareProc!=NULL)
+ if (fCompareProc!=old_proc && fCompareProc!=nullptr)
Sort(); //new compare method
}
@@ -494,15 +494,15 @@ template <class OBJ> bool GList<OBJ>::Exists(OBJ* item) {
template <class OBJ> int GList<OBJ>::Add(OBJ* item) {
int result;
- if (item==NULL) return -1;
+ if (item==nullptr) return -1;
if (SORTED) {
if (Found(item, result))
if (fUnique) return -1; //duplicates forbidden
//Found sets result to the position where the item should be!
sortInsert(result, item);
}
- else {
- if (fUnique && Found(item,result)) return -1; //set behaviour
+ else { //not a sorted list
+ if (fUnique && Found(item,result)) return -1; //cannot accept duplicates
result = this->fCount;
if (result==this->fCapacity) GPVec<OBJ>::Grow();
this->fList[result]=item;
@@ -521,7 +521,7 @@ template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
if (deleteIfFound && (pointer)item != (pointer)(this->fList[r])) {
this->deallocate_item(item);
}
- if (fidx!=NULL) *fidx=r;
+ if (fidx!=nullptr) *fidx=r;
return this->fList[r]; //found
}
//not found:
@@ -534,7 +534,7 @@ template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
this->fList[r]=item;
this->fCount++;
}
- if (fidx!=NULL) *fidx=r;
+ if (fidx!=nullptr) *fidx=r;
return item;
}
@@ -567,7 +567,7 @@ template <class OBJ> bool GList<OBJ>::Found(OBJ* item, int& idx) {
//search the list by using fCompareProc (if defined)
//or == operator for a non-sortable list
//for sorted lists, even when the result is false, the idx is
- //set to the closest matching object!
+ //set to the insert index!
int i;
idx=-1;
if (this->fCount==0) { idx=0;return false;}
@@ -611,6 +611,7 @@ template <class OBJ> bool GList<OBJ>::Found(OBJ* item, int& idx) {
}
i++;
}
+ idx=i;
return false;
}
}
@@ -648,7 +649,7 @@ template <class OBJ> void GList<OBJ>::Put(int idx, OBJ* item, bool re_sort) {
// this may BREAK the sort order unless the "re_sort" parameter is given
if (idx<0 || idx>this->fCount) GError(GVEC_INDEX_ERR, idx);
this->fList[idx]=item;
- if (SORTED && item!=NULL && re_sort) Sort(); //re-sort
+ if (SORTED && item!=nullptr && re_sort) Sort(); //re-sort
}
template <class OBJ> int GList<OBJ>::Remove(OBJ* item) {
@@ -659,8 +660,8 @@ template <class OBJ> int GList<OBJ>::Remove(OBJ* item) {
}
template <class OBJ> void GList<OBJ>::Sort() {
- if (fCompareProc==NULL) fCompareProc = DefaultCompareProc;
- if (this->fList!=NULL && this->fCount>0)
+ if (fCompareProc==nullptr) fCompareProc = DefaultCompareProc;
+ if (this->fList!=nullptr && this->fCount>0)
this->qSort(0, this->fCount-1, fCompareProc);
}
=====================================
GVec.hh
=====================================
@@ -86,7 +86,8 @@ template <class OBJ> class GVec {
return fArray[0];
}
void Clear();
- void Delete(int index);
+ void Delete(int index); //delete item at index
+ void Delete(int index, int delcount); //delete delcount items starting at index
void Replace(int idx, OBJ& item); //Put, use operator= to copy
void Exchange(int idx1, int idx2);
void Swap(int idx1, int idx2) { Exchange(idx1, idx2); }
@@ -182,7 +183,7 @@ template <class OBJ> class GPVec {
GPVec(GPVec<OBJ>&& list); //move construstor
GPVec(GPVec<OBJ>* list); //similar to a copy constructor
GPVec<OBJ>& operator=(const GPVec<OBJ>& list);
- GPVec<OBJ>& operator=(GPVec<OBJ>&& list);//move assignment operator
+ GPVec<OBJ>& operator=(GPVec<OBJ>&& list);//move operator
inline OBJ* Get(int i) {
TEST_INDEX(i);
return fList[i];
@@ -355,9 +356,9 @@ template <class OBJ> void GVec<OBJ>::Grow(int idx, OBJ& item) {
//expands and inserts item at idx at the same time
//insertGrow(NewCapacity, idx, item);
OBJ* newList=new OBJ[NewCapacity];
- // operator= required!
+ // move operator= required!
std::move(& fArray[0], & fArray[idx], & newList[0]);
- newList[idx]=item;
+ newList[idx]=item; //operator=
//copy data after idx
std::move(& fArray[idx], & fArray[fCount], & newList[idx+1]);
delete[] fArray;
@@ -465,19 +466,15 @@ template <class OBJ> void GVec<OBJ>::Delete(int idx) {
TEST_INDEX(idx);
std::move(& fArray[idx+1], & fArray[fCount], & fArray[idx]);
fCount--;
- /*
- if (std::is_trivial<OBJ>::value) {
- if (index<fCount)
- //move higher elements if any (shift down)
- memmove(&fArray[index], &fArray[index+1], (fCount-index)*sizeof(OBJ));
- }
- else {
- while (index<fCount) {
- fArray[index]=fArray[index+1];
- index++;
- }
- }
- */
+}
+
+template <class OBJ> void GVec<OBJ>::Delete(int idx, int delcount) {
+ if (delcount<1 || delcount>fCount-idx)
+ GError("GVec::Delete error: cannot delete %d items from %d-long GVec at pos %d !\n",
+ delcount, fCount, idx);
+ TEST_INDEX(idx);
+ std::move(& fArray[idx+delcount], & fArray[fCount], & fArray[idx]);
+ fCount-=delcount;
}
template <class OBJ> void GVec<OBJ>::setCount(int NewCount) {
@@ -559,7 +556,7 @@ template <class OBJ> GPVec<OBJ>::GPVec(const GPVec& list) { //copy constructor
if (fCapacity>0) {
//GMALLOC(fList, fCapacity*sizeof(OBJ*));
fList=new OBJ*[fCapacity];
- std::move(&list.fList[0], &list.fList[fCount], &fList[0]);
+ std::copy(&list.fList[0], &list.fList[fCount], &fList[0]);
//memcpy(fList, list.fList, fCount*sizeof(OBJ*));
}
}
@@ -583,7 +580,7 @@ template <class OBJ> GPVec<OBJ>::GPVec(GPVec* plist) { //another copy constructo
if (fCapacity>0) {
//GMALLOC(fList, fCapacity*sizeof(OBJ*));
fList=new OBJ*[fCapacity];
- std::move(& (plist->fList[0]), & (plist->fList[fCount]), &fList[0]);
+ std::copy(& (plist->fList[0]), & (plist->fList[fCount]), &fList[0]);
//memcpy(fList, plist->fList, fCount*sizeof(OBJ*));
}
}
@@ -600,7 +597,7 @@ template <class OBJ> GPVec<OBJ>& GPVec<OBJ>::operator=(const GPVec& list) {
//GMALLOC(fList, fCapacity*sizeof(OBJ*));
//memcpy(fList, list.fList, fCount*sizeof(OBJ*));
fList=new OBJ*[fCapacity];
- std::move(&list.fList[0], &list.fList[fCount], &fList[0]);
+ std::copy(&list.fList[0], &list.fList[fCount], &fList[0]);
}
}
return *this;
=====================================
gff.cpp
=====================================
@@ -38,9 +38,11 @@ const byte CLASSCODE_J_RANK = 6; // all junctional based overlaps
byte classcode_rank(char c) {
switch (c) {
- case '=': return 0; //intron chain match or full exon chain match if strict matching is enabled
- case '~': return 1; //intron chain match when strict matching is enabled
- case 'c': return 4; //containment, perfect partial match (transfrag contained in reference)
+ case '=': return 0; // intron chain match or full exon chain match if strict matching is enabled
+ case '~': return 1; // intron chain match when strict matching is enabled
+ case ':': return 2; // intron chain match with no CDS match when cds matching is enabled
+ case '_': return 3; // intron chain match with no CDS match when strict matching and cds matching is enabled
+ case 'c': return 4; // containment, perfect partial match (transfrag contained in reference)
case 'k': return 4; // reverse containment (reference contained in transfrag)
case 'm': return 6; // full span overlap with all reference introns either matching or retained
case 'n': return 6; // partial overlap transfrag with at least one intron retention
@@ -52,9 +54,9 @@ byte classcode_rank(char c) {
case 'x': return 18; // generic overlap on opposite strand (usually wrong strand mapping)
case 'i': return 20; // intra-intron (transfrag fully contained within a reference intron)
case 'y': return 30; // no exon overlap: ref exons fall within transfrag introns! (reverse of i)
- case 'p': return 90; //polymerase run
- case 'r': return 92; //repeats
- case 'u': return 94; //intergenic
+ case 'p': return 90; // polymerase run
+ case 'r': return 92; // repeats
+ case 'u': return 94; // intergenic
case 0 : return 100;
default: return 96;
}
@@ -101,6 +103,86 @@ int gfo_cmpRefByID(const pointer p1, const pointer p2) {
else return (g1.gseq_id-g2.gseq_id); // sort refs by their id# order
}
+//only for multi-exon transcripts: comparison/ordering function
+// based on intron chain comparison and matching only!
+int txCmpByIntrons(const pointer p1, const pointer p2) {
+ //const GffObj* a = static_cast<const GffObj*>(p1);
+ //const GffObj* b = static_cast<const GffObj*>(p2);
+ GffObj* a = (GffObj*)p1;
+ GffObj* b = (GffObj*)p2;
+ // check chromosome and strand
+ if (a->gseq_id != b->gseq_id) return a->gseq_id < b->gseq_id ? -1 : 1;
+ if (a->strand != b->strand) return a->strand < b->strand ? -1 : 1;
+ // compare intron chains by their start and end, regardless of exon count
+ int min_exon_count = GMIN(a->exons.Count(), b->exons.Count());
+ for (int i = 0; i < min_exon_count-1; ++i) {
+ int a_istart = a->exons[i]->end+1;
+ int a_iend = a->exons[i+1]->start-1;
+ int b_istart = b->exons[i]->end+1;
+ int b_iend = b->exons[i+1]->start-1;
+
+ if (a_istart != b_istart) return a_istart < b_istart ? -1 : 1;
+ if (a_iend != b_iend) return a_iend < b_iend ? -1 : 1;
+ }
+ // after comparing shared introns, tx with more exons is "greater"
+ if (a->exons.Count() != b->exons.Count()) {
+ return a->exons.Count() < b->exons.Count() ? -1 : 1;
+ }
+ // intron chains are identical, they are considered "equal"
+ return 0;
+}
+
+// compare transcripts with strict ordering by their exon positions
+// function returns 0 (equal) if both transcripts have exact same exons
+int txCmpByExons(const pointer p1, const pointer p2) {
+ GffObj* t1 = (GffObj*)p1;
+ GffObj* t2 = (GffObj*)p2;
+ // check chromosome and strand
+ if (t1->gseq_id != t2->gseq_id) return t1->gseq_id - t2->gseq_id;
+ if (t1->strand != t2->strand) return t1->strand < t2->strand ? -1 : 1;
+ int64_t t1_start = t1->start;
+ int64_t t1_end = t1->end;
+ int64_t t2_start = t2->start;
+ int64_t t2_end = t2->end;
+
+ if (t1_start != t2_start) return (int)(t1_start - t2_start);
+ if (t1_end != t2_end) return (int)(t1_end - t2_end);
+
+
+ int minExonCount = GMIN(t1->exons.Count(), t2->exons.Count());
+ for (int i = 0; i < minExonCount; ++i) {
+ int64_t e1_start = t1->exons[i]->start;
+ int64_t e1_end = t1->exons[i]->end;
+ int64_t e2_start = t2->exons[i]->start;
+ int64_t e2_end = t2->exons[i]->end;
+
+ if (e1_start != e2_start) return (int)(e1_start - e2_start);
+ if (e1_end != e2_end) return (int)(e1_end - e2_end);
+ }
+
+ if (t1->exons.Count() != t2->exons.Count()) return t1->exons.Count()-t2->exons.Count();
+
+ return 0; // Equal if they have the same number and position of exons
+}
+
+//single-exon transcripts basic comparison/ordering function
+int seTxCompareProc(pointer* p1, pointer* p2) {
+ // check chromosome and strand
+ GffObj* a = (GffObj*)p1;
+ GffObj* b = (GffObj*)p2;
+ if (a->gseq_id != b->gseq_id) return a->gseq_id < b->gseq_id ? -1 : 1;
+ if (a->strand != b->strand) return a->strand < b->strand ? -1 : 1;
+ //first by start positions
+ if (a->start != b->start) return a->start < b->start ? -1 : 1;
+ // then by end positions
+ if (a->end != b->end) return a->end < b->end ? -1 : 1;
+ return 0; // they are equal (both start and end match)
+}
+
+
+
+
+
char* GffLine::extractGFFAttr(char* & infostr, const char* oline, const char* attr, bool caseStrict,
bool enforce_GTF2, int* rlen, bool deleteAttr) {
//parse a key attribute and remove it from the info string
@@ -1010,6 +1092,10 @@ int GffObj::addExon(uint segstart, uint segend, int8_t exontype, char phase, Gff
} //exon overlap/adjacent check
//new exon/CDS, not merged in a previous one
GffExon* enew=new GffExon(segstart, segend, exontype, phase, exon_score.score, exon_score.precision);
+ if (segs==&exons && exons.Count()==0) {
+ start=segstart;
+ end=segend;
+ }
int eidx=segs->Add(enew);
if (eidx<0) {
//this would actually be possible if the object is a "Gene" and "exons" are in fact isoforms
@@ -2953,10 +3039,23 @@ bool GffObj::printAttrs(FILE* fout, const char* sep, bool GTFstyle, bool cvtCha
if (strcmp(attrname, "gene_id")==0) continue;
if (cvtChars) {
decodeHexChars(dbuf, attrval, DBUF_LEN-1);
- fprintf(fout,"%s%s \"%s\"", prsep, attrname, dbuf);
+ int dl=0;
+ char* av=dbuf;
+ if ( (dl=strlen(av))>0) {
+ if (av[dl-1]=='"') av[dl-1]=0;
+ if (av[0]=='"') ++av;
+ }
+ fprintf(fout,"%s%s \"%s\"", prsep, attrname, av);
+ }
+ else {
+ int dl=0;
+ char* av=attrs->Get(i)->attr_val;
+ if ( (dl=strlen(av))>0) {
+ if (av[dl-1]=='"') av[dl-1]=0;
+ if (av[0]=='"') ++av;
+ }
+ fprintf(fout,"%s%s \"%s\"", prsep, attrname, av);
}
- else
- fprintf(fout,"%s%s \"%s\"", prsep, attrname, attrs->Get(i)->attr_val);
prsep=sep;
pr=true;
}
@@ -3156,361 +3255,578 @@ void GffObj::getCDSegs(GVec<GffExon>& cds) {
//-- transcript match/overlap classification functions
-char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange) {
- //return '=' if exact exon match or transcripts ends are within tdelta distance
- // '~' if intron-chain match (or 80% overlap, for single-exon)
- // or 0 otherwise
- int imax=a.exons.Count()-1;
- int jmax=b.exons.Count()-1;
- ovlen=0;
- if (imax!=jmax) return false; //different number of exons, cannot match
- if (imax==0) //single-exon mRNAs
- return (singleExonTMatch(a,b,ovlen, trange));
- if ( a.exons[imax]->start<b.exons[0]->end ||
- b.exons[jmax]->start<a.exons[0]->end )
- return 0; //intron chains do not overlap at all
- //check intron overlaps
- ovlen=a.exons[0]->end-(GMAX(a.start,b.start))+1;
- ovlen+=(GMIN(a.end,b.end))-a.exons.Last()->start;
- for (int i=1;i<=imax;i++) {
- if (i<imax) ovlen+=a.exons[i]->len();
- if ((a.exons[i-1]->end!=b.exons[i-1]->end) ||
- (a.exons[i]->start!=b.exons[i]->start)) {
- return 0; //intron mismatch
+char transcriptMatch(GffObj &a, GffObj &b, int &ovlen, int trange)
+{
+ // return '=' if exact exon match or transcripts ends are within tdelta distance
+ // '~' if intron-chain match (or 80% overlap, for single-exon)
+ // '-' if CDS chains do not match but otherwise would be '=' or '~'
+ // or 0 otherwise
+ int imax = a.exons.Count() - 1;
+ int jmax = b.exons.Count() - 1;
+ ovlen = 0;
+ if (imax != jmax)
+ return false; // different number of exons, cannot match
+ if (imax == 0) // single-exon mRNAs
+ return (singleExonTMatch(a, b, ovlen, trange));
+ if (a.exons[imax]->start < b.exons[0]->end ||
+ b.exons[jmax]->start < a.exons[0]->end)
+ return 0; // intron chains do not overlap at all
+ // check intron overlaps
+ ovlen = a.exons[0]->end - (GMAX(a.start, b.start)) + 1;
+ ovlen += (GMIN(a.end, b.end)) - a.exons.Last()->start;
+ for (int i = 1; i <= imax; i++)
+ {
+ if (i < imax)
+ ovlen += a.exons[i]->len();
+ if ((a.exons[i - 1]->end != b.exons[i - 1]->end) ||
+ (a.exons[i]->start != b.exons[i]->start))
+ {
+ return 0; // intron mismatch
}
}
//--- full intron chain match
- //check if it's an exact
- if (abs((int)a.exons[0]->start-(int)b.exons[0]->start)<=trange &&
- abs((int)a.exons.Last()->end-(int)b.exons.Last()->end)<=trange)
- return '=';
- return '~';
-}
-
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange) {
- //return '=' if boundaries match within tdelta distance,
- // or '~' if the overlap is >=80% of the longer sequence length
- // return 0 if there is no overlap
- GSeg mseg(m.start, m.end);
- ovlen=mseg.overlapLen(r.start,r.end);
- if (ovlen<=0) return 0;
- // fuzzy matching for single-exon transcripts:
- // matching = overlap is at least 80% of the length of the longer transcript
- // *OR* in case of reverse containment (reference contained in m)
- // it's also considered "matching" if the overlap is at least 80% of
- // the reference len AND at least 70% of the query len
- if (abs((int)m.start-(int)r.start)<=trange
- && abs((int)m.end-(int)r.end)<=trange)
- return '=';
- if (m.covlen>r.covlen) {
- if ( (ovlen >= m.covlen*0.8) ||
- (ovlen >= r.covlen*0.8 && ovlen >= m.covlen* 0.7 ) )
- //allow also some fuzzy reverse containment
- return '~';
- } else {
- if (ovlen >= r.covlen*0.8) return '~';
- }
- return 0;
+ char result = 0;
+ // check if it's an exact
+ if (abs((int)a.exons[0]->start - (int)b.exons[0]->start) <= trange &&
+ abs((int)a.exons.Last()->end - (int)b.exons.Last()->end) <= trange)
+ result = '=';
+ else
+ result = '~';
+
+ bool cdsEndsMatch = a.CDstart == b.CDstart && a.CDend == b.CDend;
+ if (!cdsEndsMatch)
+ {
+ if (result == '=')
+ {
+ result = ':';
+ }
+ else if (result == '~')
+ {
+ result = '_';
+ }
+ }
+
+ return result;
+}
+
+bool txStructureMatch(GffObj& a, GffObj& b, double SE_tolerance, int ME_range) {
+ // Check if transcripts are on the same genomic sequence and overlap
+ if (a.gseq_id != b.gseq_id || !a.overlap(b.start, b.end)) {
+ return false; // No match if they are on different sequences or do not overlap
+ }
+ int exc = a.exons.Count();
+ if (exc != b.exons.Count())
+ return false; // No match if they have different exon counts
+ // Check for multi-exon transcripts with identical intron chain structure
+ if (exc > 1) {
+ for (int i = 0; i < exc-1; ++i) { // Compare intron coordinates
+ if (a.exons[i]->end != b.exons[i]->end ||
+ a.exons[i + 1]->start != b.exons[i + 1]->start) {
+ return false; // Intron chains do not match
+ }
+ }
+ if (abs((int)a.start - (int)b.start) > ME_range || abs((int)a.end - (int)b.end) > ME_range)
+ return false; // Transcript ends are too far apart
+ return true; // All intron chains match
+ } else {
+ // Calculate overlap for single-exon transcripts
+ int ov_start = GMAX(a.start, b.start);
+ int ov_end = GMIN(a.end, b.end);
+ int ovlen = ov_end - ov_start + 1;
+ //int shortest_span = GMIN(a.len(), b.len());
+ int longer_span = GMAX(a.len(), b.len());
+ if (static_cast<double>(ovlen) / longer_span >= SE_tolerance) {
+ return true; // Overlap meets or exceeds tolerance
+ }
+ }
+ return false; // Default case: no match
+}
+
+char singleExonTMatch(GffObj &m, GffObj &r, int &ovlen, int trange, int *ovlrefstart)
+{
+ // return '=' if boundaries match within tdelta distance,
+ // or '~' if the overlap is >=80% of the longer sequence length
+ // ':' and '_' respectively if CDS chains do not match but otherwise would be '=' or '~'
+ // return 0 if there is no overlap
+ GSeg mseg(m.start, m.end);
+ ovlen = mseg.overlapLen(r.start, r.end, ovlrefstart);
+ if (ovlen <= 0)
+ return 0;
+ // fuzzy matching for single-exon transcripts:
+ // matching = overlap is at least 80% of the length of the longer transcript
+ // *OR* in case of reverse containment (reference contained in m)
+ // it's also considered "matching" if the overlap is at least 80% of
+ // the reference len AND at least 70% of the query len
+ char result = 0;
+ if (abs((int)m.start - (int)r.start) <= trange && abs((int)m.end - (int)r.end) <= trange)
+ result = '=';
+ if (m.covlen > r.covlen)
+ {
+ if ((ovlen >= m.covlen * 0.8) ||
+ (ovlen >= r.covlen * 0.8 && ovlen >= m.covlen * 0.7))
+ // allow also some fuzzy reverse containment
+ result = '~';
+ }
+ else
+ {
+ if (ovlen >= r.covlen * 0.8)
+ result = '~';
+ }
+
+ // check if CDS chains match and modify result accordingly
+ bool cdsEndsMatch = r.CDstart == m.CDstart && r.CDend == m.CDend;
+ if (!cdsEndsMatch)
+ {
+ if (result == '=')
+ {
+ result = ':';
+ }
+ else if (result == '~')
+ {
+ result = '_';
+ }
+ }
+
+ return 0;
}
-TOvlData getOvlData(GffObj& m, GffObj& r, bool stricterMatch, int trange) {
+//NOTE: getOvlData() does not check the strands of the transcripts
+TOvlData getOvlData(GffObj &m, GffObj &r, bool stricterMatch, int trange, bool cdsMatch)
+{
TOvlData odta;
- if (!m.overlap(r.start,r.end)) return odta;
- int jmax=r.exons.Count()-1;
- //char rcode=0;
- if (m.exons.Count()==1) { //single-exon transfrag
+ if (!m.overlap(r.start, r.end))
+ return odta;
+ int jmax = r.exons.Count() - 1;
+ // char rcode=0;
+ if (m.exons.Count() == 1)
+ { // single-exon transfrag
GSeg mseg(m.start, m.end);
- if (jmax==0) { //also single-exon ref
- char eqcode=0;
- if ((eqcode=singleExonTMatch(m, r, odta.ovlen, trange))>0) {
- odta.ovlcode=(stricterMatch) ? eqcode : '=';
+ if (jmax == 0)
+ { // also single-exon ref
+ char eqcode = 0;
+ if ((eqcode = singleExonTMatch(m, r, odta.ovlen, trange, &odta.ovlRefstart)) > 0)
+ {
+ odta.ovlcode = (stricterMatch) ? eqcode : '=';
return odta;
}
- if (m.covlen<r.covlen)
- { if (odta.ovlen >= m.covlen*0.8) { odta.ovlcode='c'; return odta; } } // fuzzy containment
- else
- if (odta.ovlen >= r.covlen*0.8 ) { odta.ovlcode='k'; return odta; } // fuzzy reverse containment
- odta.ovlcode='o';
+ // singleExonTMatch set odta.ovlen and odta.ovlRefstart anyway
+ if (m.covlen < r.covlen)
+ {
+ if (odta.ovlen >= m.covlen * 0.8)
+ {
+ odta.ovlcode = 'c';
+ return odta;
+ }
+ } // fuzzy containment
+ else if (odta.ovlen >= r.covlen * 0.8)
+ {
+ odta.ovlcode = 'k';
+ return odta;
+ } // fuzzy reverse containment
+ odta.ovlcode = 'o';
return odta;
- //just plain overlapping
+ // just plain overlapping
}
//-- single-exon qry overlaping multi-exon ref
- //check full pre-mRNA case (all introns retained): code 'm'
- if (m.start<=r.exons[0]->end && m.end>=r.exons[jmax]->start)
- { odta.ovlcode='m'; return odta; }
-
- for (int j=0;j<=jmax;j++) {
- //check if it's ~contained by an exon
- int exovlen=mseg.overlapLen(r.exons[j]);
- if (exovlen>0) {
- odta.ovlen+=exovlen;
- if (m.start>r.exons[j]->start-4 && m.end<r.exons[j]->end+4) {
- odta.ovlcode='c';
- return odta; //close enough to be considered contained in this exon
+ // check full pre-mRNA case (all introns retained): code 'm'
+ if (m.start <= r.exons[0]->end && m.end >= r.exons[jmax]->start)
+ {
+ odta.ovlcode = 'm';
+ odta.ovlRefstart = ((r.exons[0]->start > m.start) ? 1 : m.start - r.exons[0]->start + 1);
+ int rovlend = (r.exons[jmax]->end < m.end) ? r.covlen : r.exons[jmax]->end - m.end + 1;
+ odta.ovlen = rovlend - odta.ovlRefstart + 1;
+ return odta;
+ }
+ int refxpos = 0;
+ for (int j = 0; j <= jmax; j++)
+ {
+ int ovlst = 0; // check if it's ~contained by an exon
+ int exovlen = mseg.overlapLen(r.exons[j]->start, r.exons[j]->end, &ovlst);
+ if (exovlen > 0)
+ {
+ if (odta.ovlen == 0)
+ { // first exon being overlapped
+ odta.ovlRefstart = refxpos + ovlst;
+ }
+ odta.ovlen += exovlen;
+
+ if (m.start > r.exons[j]->start - 4 && m.end < r.exons[j]->end + 4)
+ {
+ odta.ovlcode = 'c';
+ return odta; // close enough to be considered contained in this exon
}
}
- if (j==jmax) break; //last exon here, no intron to check
- //check if it fully covers an intron (retained intron)
- if (m.start<r.exons[j]->end && m.end>r.exons[j+1]->start)
- { odta.ovlcode='n'; return odta; }
- //check if it's fully contained by an intron
- if (m.end<r.exons[j+1]->start && m.start>r.exons[j]->end)
- { odta.ovlcode='i'; return odta; }
+ refxpos += r.exons[j]->len();
+ if (j == jmax)
+ break; // last exon here, no intron to check
+ // check if it fully covers an intron (retained intron)
+ if (m.start < r.exons[j]->end && m.end > r.exons[j + 1]->start)
+ {
+ odta.ovlcode = 'n';
+ return odta;
+ }
+ // check if it's fully contained by an intron
+ if (m.end < r.exons[j + 1]->start && m.start > r.exons[j]->end)
+ {
+ odta.ovlcode = 'i';
+ return odta;
+ }
// check if it's a potential pre-mRNA transcript
// (if overlaps this intron at least 10 bases)
- uint introvl=mseg.overlapLen(r.exons[j]->end+1, r.exons[j+1]->start-1);
- //iovlen+=introvl;
- if (introvl>=10 && mseg.len()>introvl+10) { odta.ovlcode='e'; }
- } //for each ref exon
- if (odta.ovlcode>0) return odta;
- odta.ovlcode='o'; //plain overlap, uncategorized
+ uint introvl = mseg.overlapLen(r.exons[j]->end + 1, r.exons[j + 1]->start - 1);
+ // iovlen+=introvl;
+ if (introvl >= 10 && mseg.len() > introvl + 10)
+ {
+ odta.ovlcode = 'e';
+ }
+ } // for each ref exon
+ if (odta.ovlcode > 0)
+ return odta;
+ odta.ovlcode = 'o'; // plain overlap, uncategorized
return odta;
- } //single-exon transfrag
+ } // single-exon transfrag
//-- multi-exon transfrag --
- int imax=m.exons.Count()-1;// imax>0 here
- odta.jbits.resize(imax << 1); //num_junctions = 2 * num_introns
- if (jmax==0) { //single-exon reference overlap
- //any exon overlap?
+ int imax = m.exons.Count() - 1; // imax>0 here
+ odta.jbits.resize(imax << 1); // num_junctions = 2 * num_introns
+ odta.inbits.resize(imax); // num_introns
+ if (jmax == 0)
+ { // single-exon reference overlap
+ // any exon overlap?
GSeg rseg(r.start, r.end);
- for (int i=0;i<=imax;i++) {
- //check if it's ~contained by an exon
- int exovlen=rseg.overlapLen(m.exons[i]);
- if (exovlen>0) {
- odta.ovlen+=exovlen;
- if (r.start>m.exons[i]->start-4 && r.end<m.exons[i]->end+4) {
- odta.ovlcode='k';
- return odta; //reference contained in this assembled exon
+ for (int i = 0; i <= imax; i++)
+ {
+ // check if it's ~contained by an exon
+ int rxpos = 0;
+ int exovlen = m.exons[i]->overlapLen(r.start, r.end, &rxpos);
+ if (exovlen > 0)
+ {
+ if (odta.ovlRefstart == 0)
+ odta.ovlRefstart = rxpos;
+ odta.ovlen += exovlen;
+ if (r.start > m.exons[i]->start - 4 && r.end < m.exons[i]->end + 4)
+ {
+ odta.ovlcode = 'k';
+ return odta; // reference contained in this assembled exon
}
}
- if (i==imax) break;
- if (r.end<m.exons[i+1]->start && r.start>m.exons[i]->end) {
- odta.ovlcode='y'; //ref contained in this transfrag intron
- return odta;
+ if (i == imax)
+ break;
+ if (r.end < m.exons[i + 1]->start && r.start > m.exons[i]->end)
+ {
+ odta.ovlcode = 'y'; // ref contained in this transfrag intron
+ return odta;
}
}
- odta.ovlcode='o';
+ odta.ovlcode = 'o'; // ref is just partially overlapped by this transfrag
return odta;
} // SET reference
// -- MET comparison ---
+ odta.rint.resize(jmax); // initialize bitvector of reference introns (1=intron matched)
// check if qry contained by a ref intron
- for (int j=0;j<jmax;j++) {
- if (m.end<r.exons[j+1]->start && m.start>r.exons[j]->end)
- { odta.ovlcode='i'; return odta; }
- }
- if (m.exons[imax]->start<r.exons[0]->end) {
- //qry intron chain ends before ref intron chain starts
- //check if last qry exon plugs the 1st ref intron
- if (m.exons[imax]->start<=r.exons[0]->end &&
- m.exons[imax]->end>=r.exons[1]->start) {
- odta.ovlen=m.exonOverlapLen(r);
- odta.ovlcode='n';
+ for (int j = 0; j < jmax; j++)
+ {
+ if (m.end < r.exons[j + 1]->start && m.start > r.exons[j]->end)
+ {
+ odta.ovlcode = 'i';
return odta;
}
- odta.ovlen=m.exons[imax]->overlapLen(r.exons[0]);
- odta.ovlcode='o'; //only terminal exons overlap
+ }
+ if (m.exons[imax]->start < r.exons[0]->end)
+ {
+ // qry intron chain ends before ref intron chain starts
+ // check if last qry exon plugs the 1st ref intron
+ if (m.exons[imax]->start <= r.exons[0]->end &&
+ m.exons[imax]->end >= r.exons[1]->start)
+ {
+ odta.ovlen = m.exonOverlapLen(r, &odta.ovlRefstart);
+ odta.ovlcode = 'n';
+ return odta;
+ }
+ odta.ovlen = m.exons[imax]->overlapLen(r.exons[0]->start, r.exons[0]->end, &odta.ovlRefstart);
+ odta.ovlcode = 'o'; // only terminal exons overlap
return odta;
}
- else if (r.exons[jmax]->start<m.exons[0]->end) {
- //qry intron chain starts after ref intron chain ends
- //check if first qry exon plugs the last ref intron
- if (m.exons[0]->start<=r.exons[jmax-1]->end &&
- m.exons[0]->end>=r.exons[jmax]->start) {
- odta.ovlen=m.exonOverlapLen(r);
- odta.ovlcode='n';
+ else if (r.exons[jmax]->start < m.exons[0]->end)
+ {
+ // qry intron chain starts after ref intron chain ends
+ // check if first qry exon plugs the last ref intron
+ if (m.exons[0]->start <= r.exons[jmax - 1]->end &&
+ m.exons[0]->end >= r.exons[jmax]->start)
+ {
+ odta.ovlen = m.exonOverlapLen(r, &odta.ovlRefstart);
+ odta.ovlcode = 'n';
return odta;
}
- odta.ovlen=m.exons[0]->overlapLen(r.exons[jmax]);
- odta.ovlcode='o'; //only terminal exons overlap
+ odta.ovlen = m.exons[0]->overlapLen(r.exons[jmax]->start, r.exons[jmax]->end, &odta.ovlRefstart);
+ odta.ovlRefstart += r.covlen - r.exons[jmax]->len();
+ odta.ovlcode = 'o'; // only terminal exons overlap
return odta;
}
- //check intron chain overlap (match, containment, intron retention etc.)
- int i=1; //index of exon to the right of current qry intron
- int j=1; //index of exon to the right of current ref intron
- bool intron_conflict=false; //overlapping introns have at least a mismatching splice site
- //from here on we check all qry introns against ref introns
- bool junct_match=false; //true if at least a junction match is found
- bool ichain_match=false; //if there is intron (sub-)chain match, to be updated by any mismatch
- bool intron_ovl=false; //if any intron overlap is found
- bool intron_retention=false; //if any ref intron is covered by a qry exon
- //intron chain (partial) match exon-index boundaries:
- int imfirst=0; //index of exon after first intron match in query (valid>0)
- int jmfirst=0; //index of exon after first intron match in reference (valid>0)
- int imlast=0; //index of exon after last intron match in query
- int jmlast=0; //index of exon after last intron match in reference
+ // check intron chain overlap (match, containment, intron retention etc.)
+ int i = 1; // index of exon to the right of current qry intron
+ int j = 1; // index of exon to the right of current ref intron
+ bool intron_conflict = false; // overlapping introns have at least a mismatching splice site
+ // from here on we check all qry introns against ref introns
+ bool junct_match = false; // true if at least a junction match is found
+ bool ichain_match = false; // if there is intron (sub-)chain match, to be updated by any mismatch
+ bool intron_ovl = false; // if any intron overlap is found
+ bool intron_retention = false; // if any ref intron is covered by a qry exon
+ // intron chain (partial) match exon-index boundaries:
+ int imfirst = 0; // index of exon after first intron match in query (valid>0)
+ int jmfirst = 0; // index of exon after first intron match in reference (valid>0)
+ int imlast = 0; // index of exon after last intron match in query
+ int jmlast = 0; // index of exon after last intron match in reference
//--keep track of the last overlapping introns in both qry and ref:
- odta.ovlen=m.exonOverlapLen(r);
- //int q_first_iovl=-1, r_first_iovl=-1, q_last_iovl=-1, r_last_iovl=-1;
- //check for intron matches
- while (i<=imax && j<=jmax) {
- uint mstart=m.exons[i-1]->end; //qry intron start-end
- uint mend=m.exons[i]->start;
- uint rstart=r.exons[j-1]->end; //ref intron start-end
- uint rend=r.exons[j]->start;
- if (rend<mstart) { //qry intron starts after ref intron ends
- if (!intron_conflict && r.exons[j]->overlap(mstart+1, mend-1))
- intron_conflict=true; //next ref exon overlaps this qry intron
- if (!intron_retention && rstart>=m.exons[i-1]->start && rend<=m.exons[i-1]->end)
- intron_retention=true; //this ref intron is covered by previous qry exons[i-1]
- if (intron_ovl) ichain_match=false;
+ odta.ovlen = m.exonOverlapLen(r, &odta.ovlRefstart);
+ // int q_first_iovl=-1, r_first_iovl=-1, q_last_iovl=-1, r_last_iovl=-1;
+ // check for intron matches
+ while (i <= imax && j <= jmax)
+ {
+ uint mstart = m.exons[i - 1]->end; // qry intron start-end
+ uint mend = m.exons[i]->start;
+ uint rstart = r.exons[j - 1]->end; // ref intron start-end
+ uint rend = r.exons[j]->start;
+ if (rend < mstart)
+ { // qry intron starts after ref intron ends
+ if (!intron_conflict && r.exons[j]->overlap(mstart + 1, mend - 1))
+ intron_conflict = true; // next ref exon overlaps this qry intron
+ if (!intron_retention && rstart >= m.exons[i - 1]->start && rend <= m.exons[i - 1]->end)
+ intron_retention = true; // this ref intron is covered by previous qry exons[i-1]
+ if (intron_ovl)
+ ichain_match = false;
j++;
continue;
- } //no overlap with this ref intron, skipping it
- if (rstart>mend) { //qry intron ends before ref intron starts
- //if qry intron overlaps the exon on the left, we have an intron conflict
- if (!intron_conflict && r.exons[j-1]->overlap(mstart+1, mend-1))
- intron_conflict=true;
- if (!intron_retention && rstart>=m.exons[i]->start && rend<=m.exons[i]->end)
- intron_retention=true;
- if (intron_ovl) ichain_match=false;
+ } // no overlap with this ref intron, skipping it
+ if (rstart > mend)
+ { // qry intron ends before ref intron starts
+ // if qry intron overlaps the exon on the left, we have an intron conflict
+ if (!intron_conflict && r.exons[j - 1]->overlap(mstart + 1, mend - 1))
+ intron_conflict = true;
+ if (!intron_retention && rstart >= m.exons[i]->start && rend <= m.exons[i]->end)
+ intron_retention = true;
+ if (intron_ovl)
+ ichain_match = false;
i++;
continue;
- } //no intron overlap, skipping qry intron
- intron_ovl=true;
-
- //q_last_iovl=i; //keep track of the last overlapping introns in both qry and ref
- //r_last_iovl=j;
- //overlapping introns, test junction matching
- bool smatch=false;
- if (mstart==rstart) {
- smatch=true;
- odta.jbits.set( (i-1)<<1 );
+ } // no intron overlap, skipping qry intron
+ intron_ovl = true;
+
+ // q_last_iovl=i; //keep track of the last overlapping introns in both qry and ref
+ // r_last_iovl=j;
+ // overlapping introns, test junction matching
+ bool smatch = false;
+ if (mstart == rstart)
+ {
+ smatch = true;
+ odta.jbits.set((i - 1) << 1);
odta.numJmatch++;
- junct_match=true;
+ junct_match = true;
}
- bool ematch=false;
- if (mend==rend) {
- ematch=true;
- odta.jbits.set( ((i-1)<<1)+1 );
- odta.numJmatch++;
- junct_match=true;
+ bool ematch = false;
+ if (mend == rend)
+ {
+ ematch = true;
+ odta.jbits.set(((i - 1) << 1) + 1);
+ odta.numJmatch++;
+ junct_match = true;
}
- if (smatch && ematch) {
- //perfect match of this intron
- if (jmfirst==0) {
- ichain_match=true;
- jmfirst=j;
- imfirst=i;
+ if (smatch && ematch)
+ {
+ // full match of this reference intron
+ odta.inbits.set(i - 1);
+ odta.rint.set(j - 1);
+ if (jmfirst == 0)
+ {
+ ichain_match = true;
+ jmfirst = j;
+ imfirst = i;
}
- if (ichain_match) {
- imlast=i;
- jmlast=j;
+ if (ichain_match)
+ {
+ imlast = i;
+ jmlast = j;
}
- i++; j++;
+ i++;
+ j++;
continue;
}
- //intron overlapping but not fully matching
- intron_conflict=true;
- ichain_match=false;
- if (mend>rend) j++; else i++;
- } //while checking intron overlaps
+ // intron overlapping but not fully matching
+ intron_conflict = true;
+ ichain_match = false;
+ if (mend > rend)
+ j++;
+ else
+ i++;
+ } // while checking intron overlaps
// --- when qry intron chain is contained within ref intron chain
// qry terminal exons may poke (overhang) into ref's other introns
- int l_iovh=0; // overhang of q left boundary beyond the end of ref intron on the left
- int r_iovh=0; // same type of overhang through the ref intron on the right
- int qry_intron_poking=0;
+ int l_iovh = 0; // overhang of q left boundary beyond the end of ref intron on the left
+ int r_iovh = 0; // same type of overhang through the ref intron on the right
+ int qry_intron_poking = 0;
// --- when ref intron chain is contained within qry intron chain,
// terminal exons of ref may poke (overhang) into qry other introns
- int l_jovh=0; // overhang of q left boundary beyond the end of ref intron to the left
- int r_jovh=0; // same type of overhang through the ref intron on the right
- int ref_intron_poking=0;
- if (ichain_match) { //intron (sub-)chain compatible so far (but there could still be conflicts)
- if (imfirst==1 && imlast==imax) { // qry full intron chain match
- if (jmfirst==1 && jmlast==jmax) {//identical intron chains
- if (stricterMatch) {
- odta.ovlcode= (abs((int)r.exons[0]->start-(int)m.exons[0]->start)<=trange &&
- abs((int)r.exons.Last()->end-(int)m.exons.Last()->end)<=trange) ? '=' : '~';
- return odta;
+ int l_jovh = 0; // overhang of q left boundary beyond the end of ref intron to the left
+ int r_jovh = 0; // same type of overhang through the ref intron on the right
+ int ref_intron_poking = 0;
+ if (ichain_match)
+ { // intron (sub-)chain compatible so far (but there could still be conflicts)
+ if (imfirst == 1 && imlast == imax)
+ { // qry full intron chain match
+ if (jmfirst == 1 && jmlast == jmax)
+ { // identical intron chains
+ if (stricterMatch)
+ {
+ odta.ovlcode = (abs((int)r.exons[0]->start - (int)m.exons[0]->start) <= trange &&
+ abs((int)r.exons.Last()->end - (int)m.exons.Last()->end) <= trange)
+ ? '='
+ : '~';
+ }
+ else
+ {
+ odta.ovlcode = '=';
+ }
+
+ if (cdsMatch)
+ {
+ bool cdsEndsMatch = r.CDstart == m.CDstart && r.CDend == m.CDend;
+ if (!cdsEndsMatch)
+ {
+ if (odta.ovlcode == '=')
+ {
+ odta.ovlcode = ':';
+ }
+ else if (odta.ovlcode == '~')
+ {
+ odta.ovlcode = '_';
+ }
+ }
}
- odta.ovlcode='=';
+
return odta;
}
// -- a partial intron chain match
- if (jmfirst>1) {
- //find if m.start falls within any ref intron before jmfirst
- for (int j=jmfirst-1;j>0;--j)
- if (m.start<r.exons[j]->start) {
- if (m.start>r.exons[j-1]->end) { //m.start within this ref intron
+ if (jmfirst > 1)
+ {
+ // find if m.start falls within any ref intron before jmfirst
+ for (int j = jmfirst - 1; j > 0; --j)
+ if (m.start < r.exons[j]->start)
+ {
+ if (m.start > r.exons[j - 1]->end)
+ { // m.start within this ref intron
l_iovh = r.exons[j]->start - m.start;
break;
}
- else { intron_retention=true; ichain_match=false; }
+ else
+ {
+ intron_retention = true;
+ ichain_match = false;
+ }
}
}
- if (jmlast<jmax) {
- for (int j=jmlast;j<jmax;++j)
- if (m.end > r.exons[j]->end) {
- if (m.end < r.exons[j+1]->start) { //m.end within this ref intron
+ if (jmlast < jmax)
+ {
+ for (int j = jmlast; j < jmax; ++j)
+ if (m.end > r.exons[j]->end)
+ {
+ if (m.end < r.exons[j + 1]->start)
+ { // m.end within this ref intron
r_iovh = m.end - r.exons[j]->end;
- break;
+ break;
+ }
+ else
+ {
+ intron_retention = true;
+ ichain_match = false;
}
- else { intron_retention=true; ichain_match=false; }
}
}
- if (ichain_match && l_iovh<4 && r_iovh<4) {
- odta.ovlcode='c';
+ if (ichain_match && l_iovh < 4 && r_iovh < 4)
+ {
+ odta.ovlcode = 'c';
return odta;
}
- qry_intron_poking=GMAX(l_iovh, r_iovh);
- } else if ((jmfirst==1 && jmlast==jmax)) {//ref intron chain match
- //check if the reference j-chain is contained in qry i-chain
- //check for ref ends poking into qry introns
- if (imfirst>1) {
- for (int i=imfirst-1;i>0;--i)
- if (m.exons[i]->start>r.start) {
- if (r.start>m.exons[i-1]->end) {
+ qry_intron_poking = GMAX(l_iovh, r_iovh);
+ }
+ else if ((jmfirst == 1 && jmlast == jmax))
+ { // ref intron chain match
+ // check if the reference j-chain is contained in qry i-chain
+ // check for ref ends poking into qry introns
+ if (imfirst > 1)
+ {
+ for (int i = imfirst - 1; i > 0; --i)
+ if (m.exons[i]->start > r.start)
+ {
+ if (r.start > m.exons[i - 1]->end)
+ {
l_jovh = m.exons[i]->start - r.start;
break;
}
- else { ichain_match = false; }
+ else
+ {
+ ichain_match = false;
+ }
}
}
- if (imlast<imax) {
- for (int i=imlast;i<imax;++i)
- if (r.end > m.exons[i]->end) {
- if (r.end < m.exons[i+1]->start)
- { r_jovh = r.end - m.exons[i]->end; break; }
- else { ichain_match = false; }
+ if (imlast < imax)
+ {
+ for (int i = imlast; i < imax; ++i)
+ if (r.end > m.exons[i]->end)
+ {
+ if (r.end < m.exons[i + 1]->start)
+ {
+ r_jovh = r.end - m.exons[i]->end;
+ break;
+ }
+ else
+ {
+ ichain_match = false;
+ }
}
}
- if (ichain_match && l_jovh<4 && r_jovh<4) {
- odta.ovlcode='k'; //reverse containment
+ if (ichain_match && l_jovh < 4 && r_jovh < 4)
+ {
+ odta.ovlcode = 'k'; // reverse containment
return odta;
}
- ref_intron_poking=GMAX(l_jovh, r_jovh);
+ ref_intron_poking = GMAX(l_jovh, r_jovh);
}
}
//'=', 'c' and 'k' were checked and assigned, check for 'm' and 'n' before falling back to 'j'
- if (intron_retention) {
- //ref is boundary contained with qry intron chain ? that's not required for 'm'
- //GMessage("r_jovh=%d, r_iovh=%d, l_jovh=%d, l_iovh=%d\n", r_jovh, r_iovh, l_jovh, l_iovh);
- //GMessage("m.start=%d, r.exons[0]->end=%d, m.end=%d, r.exons[jmax]->start=%d\n",
- // m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
- //if (ref_intron_poking>0 && )
- //we just need to have no intron poking going on
- if (!intron_conflict && ref_intron_poking<4
- && qry_intron_poking<4) {
- odta.ovlcode='m';
+ if (intron_retention)
+ {
+ // ref is boundary contained with qry intron chain ? that's not required for 'm'
+ // GMessage("r_jovh=%d, r_iovh=%d, l_jovh=%d, l_iovh=%d\n", r_jovh, r_iovh, l_jovh, l_iovh);
+ // GMessage("m.start=%d, r.exons[0]->end=%d, m.end=%d, r.exons[jmax]->start=%d\n",
+ // m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
+ // if (ref_intron_poking>0 && )
+ // we just need to have no intron poking going on
+ if (!intron_conflict && ref_intron_poking < 4 && qry_intron_poking < 4)
+ {
+ odta.ovlcode = 'm';
return odta;
}
- odta.ovlcode='n';
+ odta.ovlcode = 'n';
return odta;
}
- //if (intron_ovl) { ?
- if (junct_match) {
- odta.ovlcode='j';
+ // if (intron_ovl) { ?
+ if (junct_match)
+ {
+ odta.ovlcode = 'j';
return odta;
}
- //what's left could be intron overlap but with no junction match = 'o'
- if (odta.ovlen>4) {
- odta.ovlcode='o';
+ // what's left could be intron overlap but with no junction match: 'o'
+ if (odta.ovlen > 4)
+ {
+ odta.ovlcode = 'o';
return odta;
}
- //but if there is no exon overlap, we have 'i' or 'y'
- //exons are within the introns of the other!
- if (m.start>r.start && r.end > m.end) {
- odta.ovlcode='i';
+ // but if there is no exon overlap, we can have 'i' or 'y'
+ // exons are within the introns of the other transcript
+ if (m.start > r.start && r.end > m.end)
+ {
+ odta.ovlcode = 'i';
return odta;
}
- odta.ovlcode='y';
- return odta; //all reference exons are within transfrag introns!
+ odta.ovlcode = 'y';
+ return odta; // all reference exons are within transfrag introns!
}
=====================================
gff.h
=====================================
@@ -1,6 +1,9 @@
#ifndef GFF_H
#define GFF_H
+#define GFF_VERSION 129
+//^^could be used for gffcompare/gffread builds to check for min version required
+
//#define CUFFLINKS 1
#include "GBase.h"
@@ -62,22 +65,47 @@ extern const byte CLASSCODE_OVL_RANK; //rank value just above 'o' class code
byte classcode_rank(char c); //returns priority value for class codes
-struct TOvlData {
- char ovlcode;
- int ovlen;
- int16_t numJmatch; //number of matching junctions (not introns)
- GBitVec jbits; //bit array with 1 bit for each junctions (total = 2 * num_introns)
- TOvlData(char oc=0, int olen=0, int16_t nmj=0, GBitVec* jb=NULL):ovlcode(oc),
- ovlen(olen),numJmatch(nmj),jbits(jb) { }
+struct TOvlData { //describe overlap with a ref transcript
+ char ovlcode=0;
+ int ovlen=0;
+ int ovlRefstart=0; //start coordinate of the overlap on reference (1-based)
+ int numJmatch=0; //number of matching splice sites (not introns!)
+ GBitVec jbits; //bit array with 1 bit for each junction (total = 2 * num_introns)
+ //a junction match is 1, otherwise 0
+ GBitVec inbits; // bit array with 1 bit per intron; ref-matching introns are set
+
+ GBitVec rint; //reference introns matched: bit array with one bit per reference intron
+ // (len = ref num_introns); bit is set if the ref intron was matched
+
+ //Note: skipped exons have 2 consecutive jbits set (starting at even index)
+ // with NO corresponding bit set in inbits
+ TOvlData(char oc=0, int olen=0, int nmj=0):ovlcode(oc),
+ ovlen(olen),ovlRefstart(0),numJmatch(nmj) { }
+ TOvlData(const TOvlData& o):ovlcode(o.ovlcode), ovlen(o.ovlen),
+ ovlRefstart(o.ovlRefstart), numJmatch(o.numJmatch),
+ jbits(o.jbits), inbits(o.inbits), rint(o.rint) {}
+ TOvlData(TOvlData&& o):ovlcode(o.ovlcode), ovlen(o.ovlen),
+ ovlRefstart(o.ovlRefstart), numJmatch(o.numJmatch) {
+ jbits=std::move(o.jbits);
+ inbits=std::move(o.inbits);
+ rint=std::move(o.rint);
+ }
+ //TODO: need move operator?
};
-TOvlData getOvlData(GffObj& m, GffObj& r, bool stricterMatch=false, int trange=0);
+TOvlData getOvlData(GffObj& m, GffObj& r, bool stricterMatch=false, int trange=0, bool cdsMatch=false);
char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange=0); //generic transcript match test
// -- return '=', '~' or 0
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange=0);
+char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange=0, int* ovlrefstart=0);
//single-exon transcript match test - returning '=', '~' or 0
+
+bool txStructureMatch(GffObj& a, GffObj& b, double SE_tolerance=0.8, int ME_range=10000);
+// generic transcript match test: for MET: intron chain match only
+// for single-exon tx: overlap >=80% of the longer transcript (SE_tolerance)
+// for multi-exon tx: terminal exons can differ <=10000 bases (ME_range)
+
//---
// -- tracking exon/CDS segments from local mRNA to genome coordinates
class GMapSeg:public GSeg {
@@ -724,6 +752,7 @@ class GffObj:public GSeg {
//deallocated when GffReader is destroyed
bool flag_FINALIZED :1; //if finalize() was already called for this GffObj
unsigned int gff_level :4; //hierarchical level (0..15)
+ unsigned int flag_USER_FLAGS :8; //user flags for setUserFlag() and getUserFlag()
};
};
//-- friends:
@@ -776,6 +805,10 @@ public:
void isGeneSegment(bool v) {flag_GENE_SEGMENT=v; }
bool promotedChildren() { return flag_CHILDREN_PROMOTED; }
void promotedChildren(bool v) { flag_CHILDREN_PROMOTED=v; }
+ // user flags are given as 8 bits bitmasks
+ void setUserFlags(byte fmask) {flag_USER_FLAGS |= fmask; }
+ void clearUserFlags(byte fmask) {flag_USER_FLAGS &= ~fmask; }
+ byte getUserFlags(byte fmask) { return (flag_USER_FLAGS & fmask); }
void setLevel(byte v) { gff_level=v; }
byte getLevel() { return gff_level; }
byte incLevel() { gff_level++; return gff_level; }
@@ -821,7 +854,7 @@ public:
if (sharedattrs) exons[0]->attrs=NULL;
}
}
- GffObj(char* anid=NULL):GSeg(0,0), exons(true,true,false), cdss(NULL), children(1,false), gscore() {
+ GffObj(const char* anid=NULL):GSeg(0,0), exons(true,true,false), cdss(NULL), children(1,false), gscore() {
//exons: sorted, free, non-unique
gffID=NULL;
uptr=NULL;
@@ -844,6 +877,42 @@ public:
geneID=NULL;
gene_name=NULL;
}
+ //constructor to use when programatically creating a new transcript
+ // (not when loading from GFF/BED/GTF input!)
+ GffObj(bool newTranscript, const char* _id=nullptr,
+ int refseq_id=-1, char _strand='+'):GSeg(0,0),
+ exons(true,true,false), cdss(NULL), children(1,false), gscore() {
+ //exons: sorted, free, non-unique
+ gffID=NULL;
+ uptr=NULL;
+ ulink=NULL;
+ flags=0;
+ udata=0;
+ parent=NULL;
+ if (newTranscript) {
+ flag_IS_TRANSCRIPT=true;
+ ftype_id=gff_fid_transcript;
+ subftype_id=gff_fid_exon;
+ flag_FINALIZED=true;
+
+ } else {
+ ftype_id=-1;
+ subftype_id=-1;
+ }
+ if (_id!=NULL) gffID=Gstrdup(_id);
+ gffnames_ref(names);
+ CDstart=0; // hasCDS <=> CDstart>0
+ CDend=0;
+ CDphase=0;
+ gseq_id=refseq_id;
+ track_id=-1;
+ strand=_strand;
+ attrs=NULL;
+ covlen=0;
+ geneID=NULL;
+ gene_name=NULL;
+ }
+
~GffObj() {
GFREE(gffID);
GFREE(gene_name);
@@ -951,28 +1020,38 @@ public:
int exonOverlapIdx(GList<GffExon>& segs, uint s, uint e, int* ovlen=NULL, int start_idx=0);
- int exonOverlapLen(GffObj& m) {
- if (start>m.end || m.start>end) return 0;
+ int exonOverlapLen(GffObj& r, int *rovlstart=NULL) {
+ if (rovlstart) *rovlstart=0;
+ if (start>r.end || r.start>end) return 0;
int i=0;
int j=0;
int ovlen=0;
- while (i<exons.Count() && j<m.exons.Count()) {
+ int rxpos=0;
+ while (i<exons.Count() && j<r.exons.Count()) {
uint istart=exons[i]->start;
uint iend=exons[i]->end;
- uint jstart=m.exons[j]->start;
- uint jend=m.exons[j]->end;
- if (istart>jend) { j++; continue; }
+ uint jstart=r.exons[j]->start;
+ uint jend=r.exons[j]->end;
+ if (istart>jend) { j++; rxpos+=jend-jstart+1; continue; }
if (jstart>iend) { i++; continue; }
//exon overlap
- uint ovstart=GMAX(istart,jstart);
+ uint ovstart=0;
+ if (istart>jstart) {
+ ovstart=istart;
+ if (rovlstart && *rovlstart==0) *rovlstart=rxpos+istart-jstart+1;
+ } else {
+ ovstart=jstart;
+ if (rovlstart && *rovlstart==0) *rovlstart=rxpos+1;
+ }
if (iend<jend) {
ovlen+=iend-ovstart+1;
i++;
}
else {
ovlen+=jend-ovstart+1;
- j++;
+ j++; rxpos+=jend-jstart+1;
}
+
}//while comparing exons
return ovlen;
}
@@ -1015,6 +1094,12 @@ public:
GFREE(geneID);
if (gene_id) geneID=Gstrdup(gene_id);
}
+ void setID(const char* tid) {
+ if (tid) {
+ GFREE(gffID);
+ gffID=Gstrdup(tid);
+ }
+ }
int addSeg(GffLine* gfline);
int addSeg(int fnid, GffLine* gfline);
void getCDSegs(GVec<GffExon>& cds);
@@ -1041,7 +1126,7 @@ public:
void printExonList(FILE* fout); //print comma delimited list of exon intervals
void printCDSList(FILE* fout); //print comma delimited list of CDS intervals
- void printBED(FILE* fout, bool cvtChars);
+ void printBED(FILE* fout, bool cvtChars=false);
//print a BED-12 line + GFF3 attributes in 13th field
void printSummary(FILE* fout=NULL);
@@ -1100,6 +1185,15 @@ class GSeqStat {
int gfo_cmpByLoc(const pointer p1, const pointer p2);
int gfo_cmpRefByID(const pointer p1, const pointer p2);
+//multi-exon transcripts basic comparison/ordering function
+// based on intron chain comparison - use only for lists with multiple multi-exon transcripts!
+int txCmpByIntrons(const pointer p1, const pointer p2);
+int txCmpByExons(const pointer p1, const pointer p2);
+
+//single-exon transcripts basic comparison/ordering function
+int seTxCompareProc(pointer* p1, pointer* p2);
+
+
class GfList: public GList<GffObj> {
public:
@@ -1224,7 +1318,9 @@ class GffReader {
//GffObj* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx);
GffObj* updateGffRec(GffObj* prevgfo, GffLine* gffline);
GffObj* updateParent(GffObj* newgfh, GffObj* parent);
- bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex = NULL);
+
+ bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex=NULL);
+
GPVec<GSeqStat> gseqStats; //populated after finalize() with only the ref seqs in this file
GffReader(FILE* f=NULL, bool t_only=false, bool sort=false):linebuf(NULL), fpos(0),
buflen(0), flags(0), fh(f), fname(NULL), commentParser(NULL), gffline(NULL),
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/d83ce93848b2fa6b3151c98438132d10febd00d8
--
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/d83ce93848b2fa6b3151c98438132d10febd00d8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20251127/6783b00a/attachment-0001.htm>
More information about the debian-med-commit
mailing list