[med-svn] [Git][med-team/libgclib][upstream] New upstream version 0.12.8+ds

Thu Nov 27 21:41:49 GMT 2025


Étienne Mollier pushed to branch upstream at Debian Med / libgclib


Commits:
d83ce938 by Étienne Mollier at 2025-11-27T22:41:06+01:00
New upstream version 0.12.8+ds
- - - - -


8 changed files:

- GBase.cpp
- GBase.h
- GBitVec.h
- GFaSeqGet.h
- GList.hh
- GVec.hh
- gff.cpp
- gff.h


Changes:

=====================================
GBase.cpp
=====================================
@@ -46,7 +46,7 @@ void GError(const char* format,...){
     va_end(arguments);
     #ifdef DEBUG
      // comment this if you do NOT want a core dump
-     abort();
+     //abort();
     #endif
   #endif
     exit(1);
@@ -119,6 +119,13 @@ int G_mkdir(const char* path, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH
  #endif
 }
 
+int Grmdir(const char *path) {
+#ifdef _WIN32
+  return !RemoveDirectoryA(path);
+#else
+  return rmdir(path);
+#endif
+}
 
 void Gmktempdir(char* templ) {
 #ifdef _WIN32
@@ -840,8 +847,9 @@ int fileExists(const char* fname) {
 int64 fileSize(const char* fpath) {
 #ifdef _WIN32
     WIN32_FILE_ATTRIBUTE_DATA fad;
+    
     if (!GetFileAttributesEx(fpath, GetFileExInfoStandard, &fad))
-        return -1; // error condition, could call GetLastError to find out more
+      return -1; // error condition, could call GetLastError to find out more
     LARGE_INTEGER size;
     size.HighPart = fad.nFileSizeHigh;
     size.LowPart = fad.nFileSizeLow;


=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
 #ifndef G_BASE_DEFINED
 #define G_BASE_DEFINED
-#define GCLIB_VERSION "0.12.7"
+#define GCLIB_VERSION "0.12.8"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -197,6 +197,7 @@ inline int iround(double x) {
 char* Grealpath(const char *path, char *resolved_path);
 
 int Gmkdir(const char *path, bool recursive=true, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH));
+int Grmdir(const char *path);
 
 void Gmktempdir(char* templ);
 
@@ -249,9 +250,10 @@ inline void GFree(pointer* ptr){
 
 inline bool GMalloc(pointer* ptr,unsigned long size){
     //GASSERT(ptr);
+    *ptr=nullptr;
     if (size!=0)
   	  *ptr=malloc(size);
-    return *ptr!=NULL;
+    return *ptr!=nullptr;
  }
 
 // Allocate 0-filled memory
@@ -415,14 +417,18 @@ struct GSeg {
         return (r->end<end)? r->end-start+1 : end-start+1;
         }
      }
-  int overlapLen(uint rstart, uint rend) {
+  // refstart = overlap start in ref coordinate space (1-based)
+  int overlapLen(uint rstart, uint rend, int* refstart=NULL) {
      if (rstart>rend) { Gswap(rstart,rend); }
+     if (refstart) *refstart=0;
      if (start<rstart) {
         if (rstart>end) return 0;
+        if (refstart) *refstart=1;
         return (rend>end) ? end-rstart+1 : rend-rstart+1;
         }
        else { //rstart<=start
         if (start>rend) return 0;
+        if (refstart) *refstart=(start-rstart)+1;
         return (rend<end)? rend-start+1 : end-start+1;
         }
   }


=====================================
GBitVec.h
=====================================
@@ -109,8 +109,8 @@ public:
 
 
   /// GBitVec default ctor - Creates an empty GBitVec.
-  GBitVec() : Size(0), Capacity(0) {
-    fBits = 0;
+  GBitVec() : fBits(0), Size(0), Capacity(0) {
+
   }
 
   /// GBitVec ctor - Creates a GBitVec of specified number of bits. All
@@ -128,13 +128,20 @@ public:
     if (value)
       clear_unused_bits();
   }
+
   unsigned long getMemorySize() const {
 	   unsigned long r = ((unsigned long) Capacity) * sizeof(BitWord);
 	   return r;
   }
+  //move constructor
+  GBitVec(GBitVec&& o):fBits(o.fBits), Size(o.Size), Capacity(o.Capacity)
+  {
+	o.fBits=nullptr;
+	o.Size=0;o.Capacity=0;
+  }
 
   GBitVec(const GBitVec* RHS) {
-    if (RHS==NULL) {
+    if (RHS==NULL || RHS->size()==0) {
       Size = 0;
       fBits = 0;
       Capacity = 0;
@@ -142,6 +149,7 @@ public:
     }
     Capacity = NumBitWords(RHS->size());
     GMALLOC(fBits, Capacity * sizeof(BitWord));
+    Size = RHS->size();
     memcpy(fBits, RHS->fBits, Capacity * sizeof(BitWord));
   }
 


=====================================
GFaSeqGet.h
=====================================
@@ -178,7 +178,7 @@ class GFastaDb {
             //GMessage("creating GFastaIndex with fastaPath=%s, fainame=%s\n", fastaPath, fainame.chars());
             faIdx=new GFastaIndex(fastaPath, fainame);
             char* fainamecwd=fainame; //will hold just the file name without the path
-            char* plast=strrchr(fainamecwd, '/'); //CHPATHSEP
+            char* plast=strrchr(fainamecwd, '/');
             if (plast!=NULL) {
               fainamecwd=plast+1; //point to the file name only
             }


=====================================
GList.hh
=====================================
@@ -11,11 +11,11 @@ Sortable collections of objects and object pointers
 #define GLIST_UNSORTED_ERR "Operation not allowed on an unsorted list!\n"
 
 //------ useful macros:
-#define BE_UNSORTED if (fCompareProc!=NULL) { GError(GLIST_SORTED_ERR); return; }
-#define BE_SORTED if (fCompareProc==NULL) { GError(GLIST_UNSORTED_ERR); return; }
+#define BE_UNSORTED if (fCompareProc!=nullptr) { GError(GLIST_SORTED_ERR); return; }
+#define BE_SORTED if (fCompareProc==nullptr) { GError(GLIST_UNSORTED_ERR); return; }
 
-#define SORTED (fCompareProc!=NULL)
-#define UNSORTED (fCompareProc==NULL)
+#define SORTED (fCompareProc!=nullptr)
+#define UNSORTED (fCompareProc==nullptr)
 
 // GArray is the sortable array type, requires the comparison operator < to be defined
 template <class OBJ> class GArray:public GVec<OBJ> {
@@ -29,7 +29,7 @@ template <class OBJ> class GArray:public GVec<OBJ> {
       }
     GCompareProc* fCompareProc;
   public:
-    GArray(GCompareProc* cmpFunc=NULL);
+    GArray(GCompareProc* cmpFunc=nullptr);
     GArray(bool sorted, bool unique=false);
     GArray(int init_capacity, bool sorted, bool unique=false);
     GArray(const GArray<OBJ>& array); //copy constructor
@@ -44,13 +44,13 @@ template <class OBJ> class GArray:public GVec<OBJ> {
              Sort();
              }
           }
-      else fCompareProc=NULL;
+      else fCompareProc=nullptr;
       }
-    //sort the array if cmpFunc not NULL or changes
+    //sort the array if cmpFunc not nullptr or changes
     int Add(OBJ* item); // specific implementation if sorted
     int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it
                        // using OBJ new operator=
-    int AddIfNew(OBJ& item, int* fidx=NULL); //requires == operator
+    int AddIfNew(OBJ& item, int* fidx=nullptr); //requires == operator
         //if equal item not found, item is added and return the index of it
         //otherwise returns -1 and fidx is set to the equal item location
     int cAdd(OBJ item) { return Add(&item); }
@@ -61,7 +61,7 @@ template <class OBJ> class GArray:public GVec<OBJ> {
     //this will reject identical items in sorted lists only!
     void setUnique(bool beUnique) { fUnique = beUnique; };
     void Sort(); //explicit sort may be requested
-    bool Sorted() { return fCompareProc!=NULL; }
+    bool Sorted() { return fCompareProc!=nullptr; }
     void Replace(int idx, OBJ& item); //Put, use operator= to copy
     int  Unique() { return fUnique; }
     int IndexOf(OBJ& item);
@@ -91,7 +91,7 @@ template <class OBJ> class GList:public GPVec<OBJ> {
   public:
     void sortInsert(int idx, OBJ* item); //special insert in sorted lists
          //WARNING: the caller must know the insert index such that the sort order is preserved!
-    GList(GCompareProc* compareProc=NULL); //free by default
+    GList(GCompareProc* compareProc=nullptr); //free by default
     GList(GCompareProc* compareProc, //unsorted by default
         GFreeProc *freeProc,
         bool beUnique=false);
@@ -105,8 +105,8 @@ template <class OBJ> class GList:public GPVec<OBJ> {
     //void Clear();
     //~GList();
     void setSorted(GCompareProc* compareProc);
-       //sorted if compareProc not NULL; sort the list if compareProc changes !
-    bool Sorted() { return fCompareProc!=NULL; }
+       //sorted if compareProc not nullptr; sort the list if compareProc changes !
+    bool Sorted() { return fCompareProc!=nullptr; }
     void setSorted(bool sorted) {
      if (sorted) {
          if (fCompareProc!=&DefaultCompareProc) {
@@ -114,12 +114,12 @@ template <class OBJ> class GList:public GPVec<OBJ> {
              Sort();
              }
           }
-      else fCompareProc=NULL;
+      else fCompareProc=nullptr;
       }
     int Add(OBJ* item); //-- specific implementation if sorted - may become an Insert()
     void Add(GList<OBJ>& list); //add all pointers from another list
 
-    OBJ* AddIfNew(OBJ* item, bool deleteIfFound=true, int* fidx=NULL);
+    OBJ* AddIfNew(OBJ* item, bool deleteIfFound=true, int* fidx=nullptr);
     // default: delete item if Found() (and pointers are not equal)!
     //returns the equal (==) object if it's in the list already
     //or the item itself if it is unique and actually added
@@ -159,7 +159,7 @@ template <class OBJ> class GList:public GPVec<OBJ> {
 template <class OBJ> GArray<OBJ>::GArray(const GArray<OBJ>& array):GVec<OBJ>(0) { //copy constructor
  this->fCount=array.fCount;
  this->fCapacity=array.fCapacity;
- this->fArray=NULL;
+ this->fArray=nullptr;
  if (this->fCapacity>0) {
     this->fArray=new OBJ[this->fCapacity];
     }
@@ -196,19 +196,19 @@ template <class OBJ> GArray<OBJ>::GArray(GCompareProc* cmpFunc):GVec<OBJ>(0) {
 
 template <class OBJ> GArray<OBJ>::GArray(bool sorted, bool unique):GVec<OBJ>(0) {
   fUnique=unique;
-  fCompareProc = sorted ? DefaultCompareProc : NULL;
+  fCompareProc = sorted ? DefaultCompareProc : nullptr;
 }
 
 template <class OBJ> GArray<OBJ>::GArray(int init_capacity,
                         bool sorted, bool unique):GVec<OBJ>(init_capacity) {
   fUnique=unique;
-  fCompareProc=sorted ? DefaultCompareProc : NULL;
+  fCompareProc=sorted ? DefaultCompareProc : nullptr;
 }
 
 template <class OBJ> void GArray<OBJ>::setSorted(GCompareProc* cmpFunc) {
   GCompareProc* old_proc=fCompareProc;
   fCompareProc=cmpFunc;
-  if (fCompareProc!=old_proc && fCompareProc!=NULL)
+  if (fCompareProc!=old_proc && fCompareProc!=nullptr)
        Sort(); //new compare method
 }
 
@@ -226,7 +226,7 @@ template <class OBJ> bool GArray<OBJ>::Exists(OBJ& item) {
 
 
 template <class OBJ> int GArray<OBJ>::Add(OBJ* item) {
- if (item==NULL) return -1;
+ if (item==nullptr) return -1;
  int result;
  if (SORTED) {
    if (Found(*item, result))
@@ -278,7 +278,7 @@ template <class OBJ> int GArray<OBJ>::AddIfNew(OBJ& item,
 	this->fArray[rpos] = item; //operator= copies the item
 	this->fCount++;
  }
- if (fidx!=NULL) *fidx=rpos;
+ if (fidx!=nullptr) *fidx=rpos;
  return rpos;
 }
 
@@ -358,8 +358,8 @@ template <class OBJ> void GArray<OBJ>::Replace(int idx, OBJ& item) {
 }
 
 template <class OBJ> void GArray<OBJ>::Sort() {
- if (fCompareProc==NULL) { fCompareProc=DefaultCompareProc; }
- if (this->fArray!=NULL && this->fCount>0)
+ if (fCompareProc==nullptr) { fCompareProc=DefaultCompareProc; }
+ if (this->fArray!=nullptr && this->fCount>0)
      this->qSort(0, this->fCount-1, fCompareProc);
 }
 
@@ -371,9 +371,11 @@ template <class OBJ> GList<OBJ>::GList(const GList<OBJ>& list):GPVec<OBJ>(list)
  fCompareProc=list.fCompareProc;
 }
 
-template <class OBJ> GList<OBJ>::GList(GList<OBJ>&& list):GPVec<OBJ>(list) { //move constructor
- fUnique=list.fUnique;
- fCompareProc=list.fCompareProc;
+//move constructor
+template <class OBJ> GList<OBJ>::GList(GList<OBJ>&& other) noexcept:
+   GPVec<OBJ>(std::move(other)), fUnique(other.fUnique), fCompareProc(other.fCompareProc){ 
+ //other.fUnique = false;
+ //other.fCompareProc = nullptr;
 }
 
 
@@ -410,19 +412,19 @@ template <class OBJ> GList<OBJ>::GList(bool sorted,
         }
        else {
         fCompareProc=&DefaultCompareProc;
-        this->fFreeProc=NULL;
+        this->fFreeProc=nullptr;
         fUnique=beUnique;
         }
      }
    else {
      if (free_elements) {
-        fCompareProc=NULL;
+        fCompareProc=nullptr;
         this->fFreeProc=GPVec<OBJ>::DefaultFreeProc;
         fUnique=beUnique;
         }
       else {
-        fCompareProc=NULL;
-        this->fFreeProc=NULL;
+        fCompareProc=nullptr;
+        this->fFreeProc=nullptr;
         fUnique=beUnique;
         }
      }
@@ -436,7 +438,7 @@ template <class OBJ> GList<OBJ>::GList(int init_capacity, bool sorted,
       fUnique=beUnique;
       }
    else {
-      fCompareProc=NULL;
+      fCompareProc=nullptr;
       fUnique=beUnique;
       }
 }
@@ -453,16 +455,14 @@ template <class OBJ> GList<OBJ>& GList<OBJ>::operator=(GList& list) {
  return *this;
 }
 
+// move operator
 template <class OBJ> GList<OBJ>& GList<OBJ>::operator=(GList&& list) {
  if (&list!=this) {
-     GPVec<OBJ>::Clear();
+     GPVec<OBJ>::operator=(std::move(list));
      fCompareProc=list.fCompareProc;
-     this->fCount=list.fCount;
-     this->fFreeProc=list.fFreeProc;
-     this->fList=list.fList;
-     list.fList=NULL;
-     list.fCount=0;
-     list.fCapacity=0;
+     fUnique=list.fUnique;    
+     //list.fUnique=false;
+     //list.fCompareProc=nullptr;
    }
  return *this;
 }
@@ -470,7 +470,7 @@ template <class OBJ> GList<OBJ>& GList<OBJ>::operator=(GList&& list) {
 template <class OBJ> void GList<OBJ>::setSorted(GCompareProc* compareProc) {
  GCompareProc* old_proc=fCompareProc;
  fCompareProc=compareProc;
- if (fCompareProc!=old_proc && fCompareProc!=NULL)
+ if (fCompareProc!=old_proc && fCompareProc!=nullptr)
        Sort(); //new compare method
 }
 
@@ -494,15 +494,15 @@ template <class OBJ> bool GList<OBJ>::Exists(OBJ* item) {
 
 template <class OBJ> int GList<OBJ>::Add(OBJ* item) {
  int result;
- if (item==NULL) return -1;
+ if (item==nullptr) return -1;
  if (SORTED) {
    if (Found(item, result))
       if (fUnique) return -1; //duplicates forbidden
    //Found sets result to the position where the item should be!
    sortInsert(result, item);
    }
-  else {
-   if (fUnique && Found(item,result)) return -1; //set behaviour
+  else { //not a sorted list
+   if (fUnique && Found(item,result)) return -1; //cannot accept duplicates
    result = this->fCount;
    if (result==this->fCapacity) GPVec<OBJ>::Grow();
    this->fList[result]=item;
@@ -521,7 +521,7 @@ template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
     if (deleteIfFound && (pointer)item != (pointer)(this->fList[r])) {
        this->deallocate_item(item);
     }
-    if (fidx!=NULL) *fidx=r;
+    if (fidx!=nullptr) *fidx=r;
     return this->fList[r]; //found
  }
  //not found:
@@ -534,7 +534,7 @@ template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
    this->fList[r]=item;
    this->fCount++;
  }
- if (fidx!=NULL) *fidx=r;
+ if (fidx!=nullptr) *fidx=r;
  return item;
 }
 
@@ -567,7 +567,7 @@ template <class OBJ> bool GList<OBJ>::Found(OBJ* item, int& idx) {
  //search the list by using fCompareProc (if defined)
  //or == operator for a non-sortable list
  //for sorted lists, even when the result is false, the idx is
- //set to the closest matching object!
+ //set to the insert index!
  int i;
  idx=-1;
  if (this->fCount==0) { idx=0;return false;}
@@ -611,6 +611,7 @@ template <class OBJ> bool GList<OBJ>::Found(OBJ* item, int& idx) {
          }
       i++;
       }
+   idx=i;   
    return false;
    }
 }
@@ -648,7 +649,7 @@ template <class OBJ> void GList<OBJ>::Put(int idx, OBJ* item, bool re_sort) {
  // this may BREAK the sort order unless the "re_sort" parameter is given
  if (idx<0 || idx>this->fCount) GError(GVEC_INDEX_ERR, idx);
  this->fList[idx]=item;
- if (SORTED && item!=NULL && re_sort) Sort(); //re-sort
+ if (SORTED && item!=nullptr && re_sort) Sort(); //re-sort
 }
 
 template <class OBJ> int GList<OBJ>::Remove(OBJ* item) {
@@ -659,8 +660,8 @@ template <class OBJ> int GList<OBJ>::Remove(OBJ* item) {
 }
 
 template <class OBJ> void GList<OBJ>::Sort() {
- if (fCompareProc==NULL) fCompareProc = DefaultCompareProc;
- if (this->fList!=NULL && this->fCount>0)
+ if (fCompareProc==nullptr) fCompareProc = DefaultCompareProc;
+ if (this->fList!=nullptr && this->fCount>0)
      this->qSort(0, this->fCount-1, fCompareProc);
 }
 


=====================================
GVec.hh
=====================================
@@ -86,7 +86,8 @@ template <class OBJ> class GVec {
          return fArray[0];
          }
     void Clear();
-    void Delete(int index);
+    void Delete(int index); //delete item at index
+    void Delete(int index, int delcount); //delete delcount items starting at index
     void Replace(int idx, OBJ& item); //Put, use operator= to copy
     void Exchange(int idx1, int idx2);
     void Swap(int idx1, int idx2)  { Exchange(idx1, idx2); }
@@ -182,7 +183,7 @@ template <class OBJ> class GPVec {
     GPVec(GPVec<OBJ>&& list); //move construstor
     GPVec(GPVec<OBJ>* list); //similar to a copy constructor
     GPVec<OBJ>& operator=(const GPVec<OBJ>& list);
-    GPVec<OBJ>& operator=(GPVec<OBJ>&& list);//move assignment operator
+    GPVec<OBJ>& operator=(GPVec<OBJ>&& list);//move operator
     inline OBJ* Get(int i) {
     	TEST_INDEX(i);
         return fList[i];
@@ -355,9 +356,9 @@ template <class OBJ> void GVec<OBJ>::Grow(int idx, OBJ& item) {
  //expands and inserts item at idx at the same time
  //insertGrow(NewCapacity, idx, item);
  OBJ* newList=new OBJ[NewCapacity];
- // operator= required!
+ // move operator= required!
  std::move(& fArray[0], & fArray[idx], & newList[0]);
- newList[idx]=item;
+ newList[idx]=item; //operator=
  //copy data after idx
  std::move(& fArray[idx], & fArray[fCount], & newList[idx+1]);
  delete[] fArray;
@@ -465,19 +466,15 @@ template <class OBJ> void GVec<OBJ>::Delete(int idx) {
  TEST_INDEX(idx);
  std::move(& fArray[idx+1], & fArray[fCount], & fArray[idx]);
  fCount--;
- /*
- if (std::is_trivial<OBJ>::value) {
-   if (index<fCount)
-    //move higher elements if any (shift down)
-      memmove(&fArray[index], &fArray[index+1], (fCount-index)*sizeof(OBJ));
-   }
- else {
-   while (index<fCount) {
-      fArray[index]=fArray[index+1];
-      index++;
-      }
-  }
-  */
+}
+
+template <class OBJ> void GVec<OBJ>::Delete(int idx, int delcount) {
+ if (delcount<1 || delcount>fCount-idx)
+	 GError("GVec::Delete error: cannot delete %d items from %d-long GVec at pos %d !\n",
+			 delcount, fCount, idx);
+ TEST_INDEX(idx);
+ std::move(& fArray[idx+delcount], & fArray[fCount], & fArray[idx]);
+ fCount-=delcount;
 }
 
 template <class OBJ> void GVec<OBJ>::setCount(int NewCount) {
@@ -559,7 +556,7 @@ template <class OBJ> GPVec<OBJ>::GPVec(const GPVec& list) { //copy constructor
  if (fCapacity>0) {
     //GMALLOC(fList, fCapacity*sizeof(OBJ*));
 	fList=new OBJ*[fCapacity];
-	std::move(&list.fList[0], &list.fList[fCount], &fList[0]);
+	std::copy(&list.fList[0], &list.fList[fCount], &fList[0]);
     //memcpy(fList, list.fList, fCount*sizeof(OBJ*));
  }
 }
@@ -583,7 +580,7 @@ template <class OBJ> GPVec<OBJ>::GPVec(GPVec* plist) { //another copy constructo
   if (fCapacity>0) {
     //GMALLOC(fList, fCapacity*sizeof(OBJ*));
     fList=new OBJ*[fCapacity];
-    std::move(& (plist->fList[0]), & (plist->fList[fCount]), &fList[0]);
+    std::copy(& (plist->fList[0]), & (plist->fList[fCount]), &fList[0]);
     //memcpy(fList, plist->fList, fCount*sizeof(OBJ*));
   }
 }
@@ -600,7 +597,7 @@ template <class OBJ> GPVec<OBJ>& GPVec<OBJ>::operator=(const GPVec& list) {
         //GMALLOC(fList, fCapacity*sizeof(OBJ*));
         //memcpy(fList, list.fList, fCount*sizeof(OBJ*));
    		fList=new OBJ*[fCapacity];
-   		std::move(&list.fList[0], &list.fList[fCount], &fList[0]);
+   		std::copy(&list.fList[0], &list.fList[fCount], &fList[0]);
         }
      }
  return *this;


=====================================
gff.cpp
=====================================
@@ -38,9 +38,11 @@ const byte CLASSCODE_J_RANK = 6; // all junctional based overlaps
 
 byte classcode_rank(char c) {
 	switch (c) {
-		case '=': return 0; //intron chain match or full exon chain match if strict matching is enabled
-		case '~': return 1; //intron chain match when strict matching is enabled
-		case 'c': return 4; //containment, perfect partial match (transfrag contained in reference)
+		case '=': return 0; // intron chain match or full exon chain match if strict matching is enabled
+		case '~': return 1; // intron chain match when strict matching is enabled
+		case ':': return 2; // intron chain match with no CDS match when cds matching is enabled
+		case '_': return 3; // intron chain match with no CDS match when strict matching and cds matching is enabled
+		case 'c': return 4; // containment, perfect partial match (transfrag contained in reference)
 		case 'k': return 4; // reverse containment (reference contained in transfrag)
 		case 'm': return 6; // full span overlap with all reference introns either matching or retained
 		case 'n': return 6; // partial overlap transfrag with at least one intron retention
@@ -52,9 +54,9 @@ byte classcode_rank(char c) {
 		case 'x': return 18; // generic overlap on opposite strand (usually wrong strand mapping)
 		case 'i': return 20; // intra-intron (transfrag fully contained within a reference intron)
 		case 'y': return 30; // no exon overlap: ref exons fall within transfrag introns! (reverse of i)
-		case 'p': return 90; //polymerase run
-		case 'r': return 92; //repeats
-		case 'u': return 94; //intergenic
+		case 'p': return 90; // polymerase run
+		case 'r': return 92; // repeats
+		case 'u': return 94; // intergenic
 		case  0 : return 100;
 		 default: return 96;
 		}
@@ -101,6 +103,86 @@ int gfo_cmpRefByID(const pointer p1, const pointer p2) {
              else return (g1.gseq_id-g2.gseq_id); // sort refs by their id# order
 }
 
+//only for multi-exon transcripts: comparison/ordering function
+// based on intron chain comparison and matching only!
+int txCmpByIntrons(const pointer p1, const pointer p2) {
+  //const GffObj* a = static_cast<const GffObj*>(p1);
+  //const GffObj* b = static_cast<const GffObj*>(p2);
+  GffObj* a = (GffObj*)p1;
+  GffObj* b = (GffObj*)p2;
+  // check chromosome and strand
+  if (a->gseq_id != b->gseq_id) return a->gseq_id < b->gseq_id ? -1 : 1;
+  if (a->strand != b->strand) return a->strand < b->strand ? -1 : 1;
+  // compare intron chains by their start and end, regardless of exon count
+  int min_exon_count = GMIN(a->exons.Count(), b->exons.Count());
+  for (int i = 0; i < min_exon_count-1; ++i) {
+        int a_istart = a->exons[i]->end+1;
+        int a_iend = a->exons[i+1]->start-1;
+        int b_istart = b->exons[i]->end+1;
+        int b_iend = b->exons[i+1]->start-1;
+
+        if (a_istart != b_istart) return a_istart < b_istart ? -1 : 1;
+        if (a_iend != b_iend) return a_iend < b_iend ? -1 : 1;
+  }
+  // after comparing shared introns, tx with more exons is "greater"
+  if (a->exons.Count() != b->exons.Count()) {
+      return a->exons.Count() < b->exons.Count() ? -1 : 1;
+  }
+  // intron chains are identical, they are considered "equal"
+  return 0;
+}
+
+// compare transcripts with strict ordering by their exon positions
+// function returns 0 (equal) if both transcripts have exact same exons
+int txCmpByExons(const pointer p1, const pointer p2) {
+	GffObj* t1 = (GffObj*)p1;
+	GffObj* t2 = (GffObj*)p2;
+	// check chromosome and strand
+	if (t1->gseq_id != t2->gseq_id) return t1->gseq_id - t2->gseq_id;
+ 	if (t1->strand != t2->strand) return t1->strand < t2->strand ? -1 : 1;
+	int64_t t1_start = t1->start;
+	int64_t t1_end = t1->end;
+	int64_t t2_start = t2->start;
+	int64_t t2_end = t2->end;
+
+   if (t1_start != t2_start) return (int)(t1_start - t2_start);
+   if (t1_end != t2_end) return (int)(t1_end - t2_end);
+
+
+   int minExonCount = GMIN(t1->exons.Count(), t2->exons.Count());
+   for (int i = 0; i < minExonCount; ++i) {
+        int64_t e1_start = t1->exons[i]->start;
+		int64_t e1_end = t1->exons[i]->end;
+		int64_t e2_start = t2->exons[i]->start;
+		int64_t e2_end = t2->exons[i]->end;
+
+        if (e1_start != e2_start) return (int)(e1_start - e2_start);
+        if (e1_end != e2_end) return (int)(e1_end - e2_end);
+    }
+
+    if (t1->exons.Count() != t2->exons.Count()) return t1->exons.Count()-t2->exons.Count();
+
+    return 0; // Equal if they have the same number and position of exons
+}
+
+//single-exon transcripts basic comparison/ordering function
+int seTxCompareProc(pointer* p1, pointer* p2) {
+  // check chromosome and strand
+  GffObj* a = (GffObj*)p1;
+  GffObj* b = (GffObj*)p2;
+  if (a->gseq_id != b->gseq_id) return a->gseq_id < b->gseq_id ? -1 : 1;
+  if (a->strand != b->strand) return a->strand < b->strand ? -1 : 1;
+  //first by start positions
+  if (a->start != b->start) return a->start < b->start ? -1 : 1;
+  // then by end positions
+  if (a->end != b->end) return a->end < b->end ? -1 : 1;
+  return 0; // they are equal (both start and end match)
+}
+
+
+
+
+
 char* GffLine::extractGFFAttr(char* & infostr, const char* oline, const char* attr, bool caseStrict,
 		   bool enforce_GTF2, int* rlen, bool deleteAttr) {
  //parse a key attribute and remove it from the info string
@@ -1010,6 +1092,10 @@ int GffObj::addExon(uint segstart, uint segend, int8_t exontype, char phase, Gff
 	} //exon overlap/adjacent check
    //new exon/CDS, not merged in a previous one
    GffExon* enew=new GffExon(segstart, segend, exontype, phase, exon_score.score, exon_score.precision);
+   if (segs==&exons && exons.Count()==0) {
+	   start=segstart;
+	   end=segend;
+   }
    int eidx=segs->Add(enew);
    if (eidx<0) {
     //this would actually be possible if the object is a "Gene" and "exons" are in fact isoforms
@@ -2953,10 +3039,23 @@ bool GffObj::printAttrs(FILE* fout,  const char* sep, bool GTFstyle, bool cvtCha
 		  if (strcmp(attrname, "gene_id")==0) continue;
 		  if (cvtChars) {
 			  decodeHexChars(dbuf, attrval, DBUF_LEN-1);
-			  fprintf(fout,"%s%s \"%s\"", prsep, attrname, dbuf);
+			  int dl=0;
+			  char* av=dbuf;
+			  if ( (dl=strlen(av))>0) {
+				  if (av[dl-1]=='"') av[dl-1]=0;
+				  if (av[0]=='"') ++av;
+			  }
+			  fprintf(fout,"%s%s \"%s\"", prsep, attrname, av);
+		  }
+		  else {
+			  int dl=0;
+			  char* av=attrs->Get(i)->attr_val;
+			  if ( (dl=strlen(av))>0) {
+				  if (av[dl-1]=='"') av[dl-1]=0;
+				  if (av[0]=='"') ++av;
+			  }
+			 fprintf(fout,"%s%s \"%s\"", prsep, attrname, av);
 		  }
-		  else
-			 fprintf(fout,"%s%s \"%s\"", prsep, attrname, attrs->Get(i)->attr_val);
 		  prsep=sep;
 		  pr=true;
 		}
@@ -3156,361 +3255,578 @@ void GffObj::getCDSegs(GVec<GffExon>& cds) {
 //-- transcript match/overlap classification functions
 
 
-char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange) {
-	//return '=' if exact exon match or transcripts ends are within tdelta distance
-	// '~' if intron-chain match (or 80% overlap, for single-exon)
-	// or 0 otherwise
-	int imax=a.exons.Count()-1;
-	int jmax=b.exons.Count()-1;
-	ovlen=0;
-	if (imax!=jmax) return false; //different number of exons, cannot match
-	if (imax==0) //single-exon mRNAs
-	    return (singleExonTMatch(a,b,ovlen, trange));
-	if ( a.exons[imax]->start<b.exons[0]->end ||
-		b.exons[jmax]->start<a.exons[0]->end )
-		return 0; //intron chains do not overlap at all
-	//check intron overlaps
-	ovlen=a.exons[0]->end-(GMAX(a.start,b.start))+1;
-	ovlen+=(GMIN(a.end,b.end))-a.exons.Last()->start;
-	for (int i=1;i<=imax;i++) {
-		if (i<imax) ovlen+=a.exons[i]->len();
-		if ((a.exons[i-1]->end!=b.exons[i-1]->end) ||
-			(a.exons[i]->start!=b.exons[i]->start)) {
-			return 0; //intron mismatch
+char transcriptMatch(GffObj &a, GffObj &b, int &ovlen, int trange)
+{
+	// return '=' if exact exon match or transcripts ends are within tdelta distance
+	//  '~' if intron-chain match (or 80% overlap, for single-exon)
+	//  '-' if CDS chains do not match but otherwise would be '=' or '~'
+	//  or 0 otherwise
+	int imax = a.exons.Count() - 1;
+	int jmax = b.exons.Count() - 1;
+	ovlen = 0;
+	if (imax != jmax)
+		return false; // different number of exons, cannot match
+	if (imax == 0)	  // single-exon mRNAs
+		return (singleExonTMatch(a, b, ovlen, trange));
+	if (a.exons[imax]->start < b.exons[0]->end ||
+		b.exons[jmax]->start < a.exons[0]->end)
+		return 0; // intron chains do not overlap at all
+	// check intron overlaps
+	ovlen = a.exons[0]->end - (GMAX(a.start, b.start)) + 1;
+	ovlen += (GMIN(a.end, b.end)) - a.exons.Last()->start;
+	for (int i = 1; i <= imax; i++)
+	{
+		if (i < imax)
+			ovlen += a.exons[i]->len();
+		if ((a.exons[i - 1]->end != b.exons[i - 1]->end) ||
+			(a.exons[i]->start != b.exons[i]->start))
+		{
+			return 0; // intron mismatch
 		}
 	}
 	//--- full intron chain match
-	//check if it's an exact
-	if (abs((int)a.exons[0]->start-(int)b.exons[0]->start)<=trange &&
-		abs((int)a.exons.Last()->end-(int)b.exons.Last()->end)<=trange)
-		   return '=';
-	return '~';
-}
-
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange) {
- //return '=' if boundaries match within tdelta distance,
- //    or '~' if the overlap is >=80% of the longer sequence length
- // return 0 if there is no overlap
- GSeg mseg(m.start, m.end);
- ovlen=mseg.overlapLen(r.start,r.end);
- if (ovlen<=0) return 0;
- // fuzzy matching for single-exon transcripts:
- // matching = overlap is at least 80% of the length of the longer transcript
- // *OR* in case of reverse containment (reference contained in m)
- //   it's also considered "matching" if the overlap is at least 80% of
- //   the reference len AND at least 70% of the query len
- if (abs((int)m.start-(int)r.start)<=trange
-	 && abs((int)m.end-(int)r.end)<=trange)
-	  return '=';
- if (m.covlen>r.covlen) {
-   if ( (ovlen >= m.covlen*0.8) ||
-		   (ovlen >= r.covlen*0.8 && ovlen >= m.covlen* 0.7 ) )
-		   //allow also some fuzzy reverse containment
-           return '~';
- } else {
-   if (ovlen >= r.covlen*0.8) return '~';
- }
- return 0;
+	char result = 0;
+	// check if it's an exact
+	if (abs((int)a.exons[0]->start - (int)b.exons[0]->start) <= trange &&
+		abs((int)a.exons.Last()->end - (int)b.exons.Last()->end) <= trange)
+		result = '=';
+	else
+		result = '~';
+
+	bool cdsEndsMatch = a.CDstart == b.CDstart && a.CDend == b.CDend;
+	if (!cdsEndsMatch)
+	{
+		if (result == '=')
+		{
+			result = ':';
+		}
+		else if (result == '~')
+		{
+			result = '_';
+		}
+	}
+
+	return result;
+}
+
+bool txStructureMatch(GffObj& a, GffObj& b, double SE_tolerance, int ME_range) {
+    // Check if transcripts are on the same genomic sequence and overlap
+    if (a.gseq_id != b.gseq_id || !a.overlap(b.start, b.end)) {
+        return false; // No match if they are on different sequences or do not overlap
+    }
+    int exc = a.exons.Count();
+    if (exc != b.exons.Count())
+		return false; // No match if they have different exon counts
+    // Check for multi-exon transcripts with identical intron chain structure
+    if (exc > 1) {
+        for (int i = 0; i < exc-1; ++i) { // Compare intron coordinates
+            if (a.exons[i]->end != b.exons[i]->end ||
+			    a.exons[i + 1]->start != b.exons[i + 1]->start) {
+                return false; // Intron chains do not match
+            }
+        }
+		if (abs((int)a.start - (int)b.start) > ME_range || abs((int)a.end - (int)b.end) > ME_range)
+			return false; // Transcript ends are too far apart
+        return true; // All intron chains match
+    } else {
+        // Calculate overlap for single-exon transcripts
+        int ov_start = GMAX(a.start, b.start);
+        int ov_end = GMIN(a.end, b.end);
+        int ovlen = ov_end - ov_start + 1;
+        //int shortest_span = GMIN(a.len(), b.len());
+		int longer_span = GMAX(a.len(), b.len());
+        if (static_cast<double>(ovlen) / longer_span >= SE_tolerance) {
+            return true; // Overlap meets or exceeds tolerance
+        }
+    }
+    return false; // Default case: no match
+}
+
+char singleExonTMatch(GffObj &m, GffObj &r, int &ovlen, int trange, int *ovlrefstart)
+{
+	// return '=' if boundaries match within tdelta distance,
+	//   or '~' if the overlap is >=80% of the longer sequence length
+	//  ':' and '_' respectively if CDS chains do not match but otherwise would be '=' or '~'
+	//  return 0 if there is no overlap
+	GSeg mseg(m.start, m.end);
+	ovlen = mseg.overlapLen(r.start, r.end, ovlrefstart);
+	if (ovlen <= 0)
+		return 0;
+	// fuzzy matching for single-exon transcripts:
+	// matching = overlap is at least 80% of the length of the longer transcript
+	// *OR* in case of reverse containment (reference contained in m)
+	//   it's also considered "matching" if the overlap is at least 80% of
+	//   the reference len AND at least 70% of the query len
+	char result = 0;
+	if (abs((int)m.start - (int)r.start) <= trange && abs((int)m.end - (int)r.end) <= trange)
+		result = '=';
+	if (m.covlen > r.covlen)
+	{
+		if ((ovlen >= m.covlen * 0.8) ||
+			(ovlen >= r.covlen * 0.8 && ovlen >= m.covlen * 0.7))
+			// allow also some fuzzy reverse containment
+			result = '~';
+	}
+	else
+	{
+		if (ovlen >= r.covlen * 0.8)
+			result = '~';
+	}
+
+	// check if CDS chains match and modify result accordingly
+	bool cdsEndsMatch = r.CDstart == m.CDstart && r.CDend == m.CDend;
+	if (!cdsEndsMatch)
+	{
+		if (result == '=')
+		{
+			result = ':';
+		}
+		else if (result == '~')
+		{
+			result = '_';
+		}
+	}
+
+	return 0;
 }
 
-TOvlData getOvlData(GffObj& m, GffObj& r, bool stricterMatch, int trange) {
+//NOTE: getOvlData() does not check the strands of the transcripts
+TOvlData getOvlData(GffObj &m, GffObj &r, bool stricterMatch, int trange, bool cdsMatch)
+{
 	TOvlData odta;
-	if (!m.overlap(r.start,r.end)) return odta;
-	int jmax=r.exons.Count()-1;
-	//char rcode=0;
-	if (m.exons.Count()==1) { //single-exon transfrag
+	if (!m.overlap(r.start, r.end))
+		return odta;
+	int jmax = r.exons.Count() - 1;
+	// char rcode=0;
+	if (m.exons.Count() == 1)
+	{ // single-exon transfrag
 		GSeg mseg(m.start, m.end);
-		if (jmax==0) { //also single-exon ref
-			char eqcode=0;
-			if ((eqcode=singleExonTMatch(m, r, odta.ovlen, trange))>0) {
-				odta.ovlcode=(stricterMatch) ? eqcode : '=';
+		if (jmax == 0)
+		{ // also single-exon ref
+			char eqcode = 0;
+			if ((eqcode = singleExonTMatch(m, r, odta.ovlen, trange, &odta.ovlRefstart)) > 0)
+			{
+				odta.ovlcode = (stricterMatch) ? eqcode : '=';
 				return odta;
 			}
-			if (m.covlen<r.covlen)
-			   { if (odta.ovlen >= m.covlen*0.8) { odta.ovlcode='c'; return odta; } } // fuzzy containment
-			else
-				if (odta.ovlen >= r.covlen*0.8 ) { odta.ovlcode='k'; return odta; }   // fuzzy reverse containment
-			odta.ovlcode='o';
+			// singleExonTMatch set odta.ovlen and odta.ovlRefstart anyway
+			if (m.covlen < r.covlen)
+			{
+				if (odta.ovlen >= m.covlen * 0.8)
+				{
+					odta.ovlcode = 'c';
+					return odta;
+				}
+			} // fuzzy containment
+			else if (odta.ovlen >= r.covlen * 0.8)
+			{
+				odta.ovlcode = 'k';
+				return odta;
+			} // fuzzy reverse containment
+			odta.ovlcode = 'o';
 			return odta;
-			//just plain overlapping
+			// just plain overlapping
 		}
 		//-- single-exon qry overlaping multi-exon ref
-		//check full pre-mRNA case (all introns retained): code 'm'
-		if (m.start<=r.exons[0]->end && m.end>=r.exons[jmax]->start)
-			{ odta.ovlcode='m'; return odta; }
-
-		for (int j=0;j<=jmax;j++) {
-			//check if it's ~contained by an exon
-			int exovlen=mseg.overlapLen(r.exons[j]);
-			if (exovlen>0) {
-				odta.ovlen+=exovlen;
-				if (m.start>r.exons[j]->start-4 && m.end<r.exons[j]->end+4) {
-					odta.ovlcode='c';
-					return odta; //close enough to be considered contained in this exon
+		// check full pre-mRNA case (all introns retained): code 'm'
+		if (m.start <= r.exons[0]->end && m.end >= r.exons[jmax]->start)
+		{
+			odta.ovlcode = 'm';
+			odta.ovlRefstart = ((r.exons[0]->start > m.start) ? 1 : m.start - r.exons[0]->start + 1);
+			int rovlend = (r.exons[jmax]->end < m.end) ? r.covlen : r.exons[jmax]->end - m.end + 1;
+			odta.ovlen = rovlend - odta.ovlRefstart + 1;
+			return odta;
+		}
+		int refxpos = 0;
+		for (int j = 0; j <= jmax; j++)
+		{
+			int ovlst = 0; // check if it's ~contained by an exon
+			int exovlen = mseg.overlapLen(r.exons[j]->start, r.exons[j]->end, &ovlst);
+			if (exovlen > 0)
+			{
+				if (odta.ovlen == 0)
+				{ // first exon being overlapped
+					odta.ovlRefstart = refxpos + ovlst;
+				}
+				odta.ovlen += exovlen;
+
+				if (m.start > r.exons[j]->start - 4 && m.end < r.exons[j]->end + 4)
+				{
+					odta.ovlcode = 'c';
+					return odta; // close enough to be considered contained in this exon
 				}
 			}
-			if (j==jmax) break; //last exon here, no intron to check
-			//check if it fully covers an intron (retained intron)
-			if (m.start<r.exons[j]->end && m.end>r.exons[j+1]->start)
-				{ odta.ovlcode='n'; return odta; }
-			//check if it's fully contained by an intron
-			if (m.end<r.exons[j+1]->start && m.start>r.exons[j]->end)
-			{ odta.ovlcode='i'; return odta; }
+			refxpos += r.exons[j]->len();
+			if (j == jmax)
+				break; // last exon here, no intron to check
+			// check if it fully covers an intron (retained intron)
+			if (m.start < r.exons[j]->end && m.end > r.exons[j + 1]->start)
+			{
+				odta.ovlcode = 'n';
+				return odta;
+			}
+			// check if it's fully contained by an intron
+			if (m.end < r.exons[j + 1]->start && m.start > r.exons[j]->end)
+			{
+				odta.ovlcode = 'i';
+				return odta;
+			}
 			// check if it's a potential pre-mRNA transcript
 			// (if overlaps this intron at least 10 bases)
-			uint introvl=mseg.overlapLen(r.exons[j]->end+1, r.exons[j+1]->start-1);
-			//iovlen+=introvl;
-			if (introvl>=10 && mseg.len()>introvl+10) { odta.ovlcode='e'; }
-		} //for each ref exon
-		if (odta.ovlcode>0) return odta;
-		odta.ovlcode='o'; //plain overlap, uncategorized
+			uint introvl = mseg.overlapLen(r.exons[j]->end + 1, r.exons[j + 1]->start - 1);
+			// iovlen+=introvl;
+			if (introvl >= 10 && mseg.len() > introvl + 10)
+			{
+				odta.ovlcode = 'e';
+			}
+		} // for each ref exon
+		if (odta.ovlcode > 0)
+			return odta;
+		odta.ovlcode = 'o'; // plain overlap, uncategorized
 		return odta;
-	} //single-exon transfrag
+	} // single-exon transfrag
 	//-- multi-exon transfrag --
-	int imax=m.exons.Count()-1;// imax>0 here
-	odta.jbits.resize(imax << 1); //num_junctions = 2 * num_introns
-	if (jmax==0) { //single-exon reference overlap
-		//any exon overlap?
+	int imax = m.exons.Count() - 1; // imax>0 here
+	odta.jbits.resize(imax << 1);	// num_junctions = 2 * num_introns
+	odta.inbits.resize(imax);		// num_introns
+	if (jmax == 0)
+	{ // single-exon reference overlap
+		// any exon overlap?
 		GSeg rseg(r.start, r.end);
-		for (int i=0;i<=imax;i++) {
-			//check if it's ~contained by an exon
-			int exovlen=rseg.overlapLen(m.exons[i]);
-			if (exovlen>0) {
-				odta.ovlen+=exovlen;
-				if (r.start>m.exons[i]->start-4 && r.end<m.exons[i]->end+4) {
-					odta.ovlcode='k';
-					return odta; //reference contained in this assembled exon
+		for (int i = 0; i <= imax; i++)
+		{
+			// check if it's ~contained by an exon
+			int rxpos = 0;
+			int exovlen = m.exons[i]->overlapLen(r.start, r.end, &rxpos);
+			if (exovlen > 0)
+			{
+				if (odta.ovlRefstart == 0)
+					odta.ovlRefstart = rxpos;
+				odta.ovlen += exovlen;
+				if (r.start > m.exons[i]->start - 4 && r.end < m.exons[i]->end + 4)
+				{
+					odta.ovlcode = 'k';
+					return odta; // reference contained in this assembled exon
 				}
 			}
-			if (i==imax) break;
-			if (r.end<m.exons[i+1]->start && r.start>m.exons[i]->end) {
-				 odta.ovlcode='y'; //ref contained in this transfrag intron
-				 return odta;
+			if (i == imax)
+				break;
+			if (r.end < m.exons[i + 1]->start && r.start > m.exons[i]->end)
+			{
+				odta.ovlcode = 'y'; // ref contained in this transfrag intron
+				return odta;
 			}
 		}
-		odta.ovlcode='o';
+		odta.ovlcode = 'o'; // ref is just partially overlapped by this transfrag
 		return odta;
 	} // SET reference
 	// -- MET comparison ---
+	odta.rint.resize(jmax); // initialize bitvector of reference introns (1=intron matched)
 	// check if qry contained by a ref intron
-	for (int j=0;j<jmax;j++) {
-		if (m.end<r.exons[j+1]->start && m.start>r.exons[j]->end)
-			{ odta.ovlcode='i'; return odta; }
-	}
-	if (m.exons[imax]->start<r.exons[0]->end) {
-		//qry intron chain ends before ref intron chain starts
-		//check if last qry exon plugs the 1st ref intron
-		if (m.exons[imax]->start<=r.exons[0]->end &&
-			m.exons[imax]->end>=r.exons[1]->start) {
-			odta.ovlen=m.exonOverlapLen(r);
-			odta.ovlcode='n';
+	for (int j = 0; j < jmax; j++)
+	{
+		if (m.end < r.exons[j + 1]->start && m.start > r.exons[j]->end)
+		{
+			odta.ovlcode = 'i';
 			return odta;
 		}
-		odta.ovlen=m.exons[imax]->overlapLen(r.exons[0]);
-		odta.ovlcode='o'; //only terminal exons overlap
+	}
+	if (m.exons[imax]->start < r.exons[0]->end)
+	{
+		// qry intron chain ends before ref intron chain starts
+		// check if last qry exon plugs the 1st ref intron
+		if (m.exons[imax]->start <= r.exons[0]->end &&
+			m.exons[imax]->end >= r.exons[1]->start)
+		{
+			odta.ovlen = m.exonOverlapLen(r, &odta.ovlRefstart);
+			odta.ovlcode = 'n';
+			return odta;
+		}
+		odta.ovlen = m.exons[imax]->overlapLen(r.exons[0]->start, r.exons[0]->end, &odta.ovlRefstart);
+		odta.ovlcode = 'o'; // only terminal exons overlap
 		return odta;
 	}
-	else if (r.exons[jmax]->start<m.exons[0]->end) {
-		//qry intron chain starts after ref intron chain ends
-		//check if first qry exon plugs the last ref intron
-		if (m.exons[0]->start<=r.exons[jmax-1]->end &&
-			m.exons[0]->end>=r.exons[jmax]->start) {
-			odta.ovlen=m.exonOverlapLen(r);
-			odta.ovlcode='n';
+	else if (r.exons[jmax]->start < m.exons[0]->end)
+	{
+		// qry intron chain starts after ref intron chain ends
+		// check if first qry exon plugs the last ref intron
+		if (m.exons[0]->start <= r.exons[jmax - 1]->end &&
+			m.exons[0]->end >= r.exons[jmax]->start)
+		{
+			odta.ovlen = m.exonOverlapLen(r, &odta.ovlRefstart);
+			odta.ovlcode = 'n';
 			return odta;
 		}
-		odta.ovlen=m.exons[0]->overlapLen(r.exons[jmax]);
-		odta.ovlcode='o'; //only terminal exons overlap
+		odta.ovlen = m.exons[0]->overlapLen(r.exons[jmax]->start, r.exons[jmax]->end, &odta.ovlRefstart);
+		odta.ovlRefstart += r.covlen - r.exons[jmax]->len();
+		odta.ovlcode = 'o'; // only terminal exons overlap
 		return odta;
 	}
-	//check intron chain overlap (match, containment, intron retention etc.)
-	int i=1; //index of exon to the right of current qry intron
-	int j=1; //index of exon to the right of current ref intron
-	bool intron_conflict=false; //overlapping introns have at least a mismatching splice site
-	//from here on we check all qry introns against ref introns
-	bool junct_match=false; //true if at least a junction match is found
-	bool ichain_match=false; //if there is intron (sub-)chain match, to be updated by any mismatch
-	bool intron_ovl=false; //if any intron overlap is found
-	bool intron_retention=false; //if any ref intron is covered by a qry exon
-	//intron chain (partial) match exon-index boundaries:
-	int imfirst=0; //index of exon after first intron match in query (valid>0)
-	int jmfirst=0; //index of exon after first intron match in reference (valid>0)
-	int imlast=0;  //index of exon after last intron match in query
-	int jmlast=0;  //index of  exon after last intron match in reference
+	// check intron chain overlap (match, containment, intron retention etc.)
+	int i = 1;					  // index of exon to the right of current qry intron
+	int j = 1;					  // index of exon to the right of current ref intron
+	bool intron_conflict = false; // overlapping introns have at least a mismatching splice site
+	// from here on we check all qry introns against ref introns
+	bool junct_match = false;	   // true if at least a junction match is found
+	bool ichain_match = false;	   // if there is intron (sub-)chain match, to be updated by any mismatch
+	bool intron_ovl = false;	   // if any intron overlap is found
+	bool intron_retention = false; // if any ref intron is covered by a qry exon
+	// intron chain (partial) match exon-index boundaries:
+	int imfirst = 0; // index of exon after first intron match in query (valid>0)
+	int jmfirst = 0; // index of exon after first intron match in reference (valid>0)
+	int imlast = 0;	 // index of exon after last intron match in query
+	int jmlast = 0;	 // index of  exon after last intron match in reference
 	//--keep track of the last overlapping introns in both qry and ref:
-	odta.ovlen=m.exonOverlapLen(r);
-    //int q_first_iovl=-1, r_first_iovl=-1, q_last_iovl=-1, r_last_iovl=-1;
-	//check for intron matches
-	while (i<=imax && j<=jmax) {
-		uint mstart=m.exons[i-1]->end; //qry intron start-end
-		uint mend=m.exons[i]->start;
-		uint rstart=r.exons[j-1]->end; //ref intron start-end
-		uint rend=r.exons[j]->start;
-		if (rend<mstart) { //qry intron starts after ref intron ends
-			if (!intron_conflict && r.exons[j]->overlap(mstart+1, mend-1))
-				intron_conflict=true; //next ref exon overlaps this qry intron
-			if (!intron_retention && rstart>=m.exons[i-1]->start && rend<=m.exons[i-1]->end)
-				intron_retention=true; //this ref intron is covered by previous qry exons[i-1]
-			if (intron_ovl) ichain_match=false;
+	odta.ovlen = m.exonOverlapLen(r, &odta.ovlRefstart);
+	// int q_first_iovl=-1, r_first_iovl=-1, q_last_iovl=-1, r_last_iovl=-1;
+	// check for intron matches
+	while (i <= imax && j <= jmax)
+	{
+		uint mstart = m.exons[i - 1]->end; // qry intron start-end
+		uint mend = m.exons[i]->start;
+		uint rstart = r.exons[j - 1]->end; // ref intron start-end
+		uint rend = r.exons[j]->start;
+		if (rend < mstart)
+		{ // qry intron starts after ref intron ends
+			if (!intron_conflict && r.exons[j]->overlap(mstart + 1, mend - 1))
+				intron_conflict = true; // next ref exon overlaps this qry intron
+			if (!intron_retention && rstart >= m.exons[i - 1]->start && rend <= m.exons[i - 1]->end)
+				intron_retention = true; // this ref intron is covered by previous qry exons[i-1]
+			if (intron_ovl)
+				ichain_match = false;
 			j++;
 			continue;
-		} //no overlap with this ref intron, skipping it
-		if (rstart>mend) { //qry intron ends before ref intron starts
-			//if qry intron overlaps the exon on the left, we have an intron conflict
-			if (!intron_conflict && r.exons[j-1]->overlap(mstart+1, mend-1))
-				intron_conflict=true;
-			if (!intron_retention && rstart>=m.exons[i]->start && rend<=m.exons[i]->end)
-				intron_retention=true;
-			if (intron_ovl) ichain_match=false;
+		} // no overlap with this ref intron, skipping it
+		if (rstart > mend)
+		{ // qry intron ends before ref intron starts
+			// if qry intron overlaps the exon on the left, we have an intron conflict
+			if (!intron_conflict && r.exons[j - 1]->overlap(mstart + 1, mend - 1))
+				intron_conflict = true;
+			if (!intron_retention && rstart >= m.exons[i]->start && rend <= m.exons[i]->end)
+				intron_retention = true;
+			if (intron_ovl)
+				ichain_match = false;
 			i++;
 			continue;
-		} //no intron overlap, skipping qry intron
-		intron_ovl=true;
-
-		//q_last_iovl=i; //keep track of the last overlapping introns in both qry and ref
-		//r_last_iovl=j;
-		//overlapping introns, test junction matching
-		bool smatch=false;
-		if (mstart==rstart) {
-			smatch=true;
-			odta.jbits.set( (i-1)<<1 );
+		} // no intron overlap, skipping qry intron
+		intron_ovl = true;
+
+		// q_last_iovl=i; //keep track of the last overlapping introns in both qry and ref
+		// r_last_iovl=j;
+		// overlapping introns, test junction matching
+		bool smatch = false;
+		if (mstart == rstart)
+		{
+			smatch = true;
+			odta.jbits.set((i - 1) << 1);
 			odta.numJmatch++;
-		    junct_match=true;
+			junct_match = true;
 		}
-		bool ematch=false;
-		if (mend==rend) {
-			ematch=true;
-			odta.jbits.set( ((i-1)<<1)+1 );
-		    odta.numJmatch++;
-		    junct_match=true;
+		bool ematch = false;
+		if (mend == rend)
+		{
+			ematch = true;
+			odta.jbits.set(((i - 1) << 1) + 1);
+			odta.numJmatch++;
+			junct_match = true;
 		}
-		if (smatch && ematch) {
-			//perfect match of this intron
-			if (jmfirst==0) {
-				ichain_match=true;
-				jmfirst=j;
-				imfirst=i;
+		if (smatch && ematch)
+		{
+			// full match of this reference intron
+			odta.inbits.set(i - 1);
+			odta.rint.set(j - 1);
+			if (jmfirst == 0)
+			{
+				ichain_match = true;
+				jmfirst = j;
+				imfirst = i;
 			}
-			if (ichain_match) {
-  		       imlast=i;
-			   jmlast=j;
+			if (ichain_match)
+			{
+				imlast = i;
+				jmlast = j;
 			}
-			i++; j++;
+			i++;
+			j++;
 			continue;
 		}
-		//intron overlapping but not fully matching
-		intron_conflict=true;
-		ichain_match=false;
-		if (mend>rend) j++; else i++;
-	} //while checking intron overlaps
+		// intron overlapping but not fully matching
+		intron_conflict = true;
+		ichain_match = false;
+		if (mend > rend)
+			j++;
+		else
+			i++;
+	} // while checking intron overlaps
 	// --- when qry intron chain is contained within ref intron chain
 	//     qry terminal exons may poke (overhang) into ref's other introns
-	int l_iovh=0;   // overhang of q left boundary beyond the end of ref intron on the left
-	int r_iovh=0;   // same type of overhang through the ref intron on the right
-	int qry_intron_poking=0;
+	int l_iovh = 0; // overhang of q left boundary beyond the end of ref intron on the left
+	int r_iovh = 0; // same type of overhang through the ref intron on the right
+	int qry_intron_poking = 0;
 	// --- when ref intron chain is contained within qry intron chain,
 	//     terminal exons of ref may poke (overhang) into qry other introns
-	int l_jovh=0;   // overhang of q left boundary beyond the end of ref intron to the left
-	int r_jovh=0;   // same type of overhang through the ref intron on the right
-	int ref_intron_poking=0;
-	if (ichain_match) { //intron (sub-)chain compatible so far (but there could still be conflicts)
-		if (imfirst==1 && imlast==imax) { // qry full intron chain match
-			if (jmfirst==1 && jmlast==jmax) {//identical intron chains
-				if (stricterMatch) {
-					odta.ovlcode= (abs((int)r.exons[0]->start-(int)m.exons[0]->start)<=trange &&
-							abs((int)r.exons.Last()->end-(int)m.exons.Last()->end)<=trange) ? '=' : '~';
-					return odta;
+	int l_jovh = 0; // overhang of q left boundary beyond the end of ref intron to the left
+	int r_jovh = 0; // same type of overhang through the ref intron on the right
+	int ref_intron_poking = 0;
+	if (ichain_match)
+	{ // intron (sub-)chain compatible so far (but there could still be conflicts)
+		if (imfirst == 1 && imlast == imax)
+		{ // qry full intron chain match
+			if (jmfirst == 1 && jmlast == jmax)
+			{ // identical intron chains
+				if (stricterMatch)
+				{
+					odta.ovlcode = (abs((int)r.exons[0]->start - (int)m.exons[0]->start) <= trange &&
+									abs((int)r.exons.Last()->end - (int)m.exons.Last()->end) <= trange)
+									   ? '='
+									   : '~';
+				}
+				else
+				{
+					odta.ovlcode = '=';
+				}
+
+				if (cdsMatch)
+				{
+					bool cdsEndsMatch = r.CDstart == m.CDstart && r.CDend == m.CDend;
+					if (!cdsEndsMatch)
+					{
+						if (odta.ovlcode == '=')
+						{
+							odta.ovlcode = ':';
+						}
+						else if (odta.ovlcode == '~')
+						{
+							odta.ovlcode = '_';
+						}
+					}
 				}
-				odta.ovlcode='=';
+
 				return odta;
 			}
 			// -- a partial intron chain match
-			if (jmfirst>1) {
-				//find if m.start falls within any ref intron before jmfirst
-				for (int j=jmfirst-1;j>0;--j)
-					if (m.start<r.exons[j]->start) {
-						if (m.start>r.exons[j-1]->end) { //m.start within this ref intron
+			if (jmfirst > 1)
+			{
+				// find if m.start falls within any ref intron before jmfirst
+				for (int j = jmfirst - 1; j > 0; --j)
+					if (m.start < r.exons[j]->start)
+					{
+						if (m.start > r.exons[j - 1]->end)
+						{ // m.start within this ref intron
 							l_iovh = r.exons[j]->start - m.start;
 							break;
 						}
-						else { intron_retention=true; ichain_match=false; }
+						else
+						{
+							intron_retention = true;
+							ichain_match = false;
+						}
 					}
 			}
-			if (jmlast<jmax) {
-				for (int j=jmlast;j<jmax;++j)
-					if (m.end > r.exons[j]->end) {
-						if (m.end < r.exons[j+1]->start) { //m.end within this ref intron
+			if (jmlast < jmax)
+			{
+				for (int j = jmlast; j < jmax; ++j)
+					if (m.end > r.exons[j]->end)
+					{
+						if (m.end < r.exons[j + 1]->start)
+						{ // m.end within this ref intron
 							r_iovh = m.end - r.exons[j]->end;
-						    break;
+							break;
+						}
+						else
+						{
+							intron_retention = true;
+							ichain_match = false;
 						}
-						else { intron_retention=true; ichain_match=false; }
 					}
 			}
-			if (ichain_match && l_iovh<4 && r_iovh<4) {
-				odta.ovlcode='c';
+			if (ichain_match && l_iovh < 4 && r_iovh < 4)
+			{
+				odta.ovlcode = 'c';
 				return odta;
 			}
-			qry_intron_poking=GMAX(l_iovh, r_iovh);
-		} else if ((jmfirst==1 && jmlast==jmax)) {//ref intron chain match
-			//check if the reference j-chain is contained in qry i-chain
-			//check for ref ends poking into qry introns
-			if (imfirst>1)  {
-				for (int i=imfirst-1;i>0;--i)
-					if (m.exons[i]->start>r.start) {
-						if (r.start>m.exons[i-1]->end) {
+			qry_intron_poking = GMAX(l_iovh, r_iovh);
+		}
+		else if ((jmfirst == 1 && jmlast == jmax))
+		{ // ref intron chain match
+			// check if the reference j-chain is contained in qry i-chain
+			// check for ref ends poking into qry introns
+			if (imfirst > 1)
+			{
+				for (int i = imfirst - 1; i > 0; --i)
+					if (m.exons[i]->start > r.start)
+					{
+						if (r.start > m.exons[i - 1]->end)
+						{
 							l_jovh = m.exons[i]->start - r.start;
 							break;
 						}
-						else { ichain_match = false; }
+						else
+						{
+							ichain_match = false;
+						}
 					}
 			}
-			if (imlast<imax) {
-				for (int i=imlast;i<imax;++i)
-					if (r.end > m.exons[i]->end) {
-						if (r.end < m.exons[i+1]->start)
-							 { r_jovh = r.end - m.exons[i]->end; break; }
-						else { ichain_match = false; }
+			if (imlast < imax)
+			{
+				for (int i = imlast; i < imax; ++i)
+					if (r.end > m.exons[i]->end)
+					{
+						if (r.end < m.exons[i + 1]->start)
+						{
+							r_jovh = r.end - m.exons[i]->end;
+							break;
+						}
+						else
+						{
+							ichain_match = false;
+						}
 					}
 			}
-			if (ichain_match && l_jovh<4 && r_jovh<4) {
-				odta.ovlcode='k'; //reverse containment
+			if (ichain_match && l_jovh < 4 && r_jovh < 4)
+			{
+				odta.ovlcode = 'k'; // reverse containment
 				return odta;
 			}
-			ref_intron_poking=GMAX(l_jovh, r_jovh);
+			ref_intron_poking = GMAX(l_jovh, r_jovh);
 		}
 	}
 	//'=', 'c' and 'k' were checked and assigned, check for 'm' and 'n' before falling back to 'j'
-	if (intron_retention) {
-			//ref is boundary contained with qry intron chain ? that's not required for 'm'
-		    //GMessage("r_jovh=%d, r_iovh=%d, l_jovh=%d, l_iovh=%d\n", r_jovh, r_iovh, l_jovh, l_iovh);
-		    //GMessage("m.start=%d, r.exons[0]->end=%d, m.end=%d, r.exons[jmax]->start=%d\n",
-		    //		m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
-		    //if (ref_intron_poking>0 && )
-		//we just need to have no intron poking going on
-		if (!intron_conflict && ref_intron_poking<4
-						&& qry_intron_poking<4) {
-			odta.ovlcode='m';
+	if (intron_retention)
+	{
+		// ref is boundary contained with qry intron chain ? that's not required for 'm'
+		// GMessage("r_jovh=%d, r_iovh=%d, l_jovh=%d, l_iovh=%d\n", r_jovh, r_iovh, l_jovh, l_iovh);
+		// GMessage("m.start=%d, r.exons[0]->end=%d, m.end=%d, r.exons[jmax]->start=%d\n",
+		//		m.start, r.exons[0]->end, m.end, r.exons[jmax]->start);
+		// if (ref_intron_poking>0 && )
+		// we just need to have no intron poking going on
+		if (!intron_conflict && ref_intron_poking < 4 && qry_intron_poking < 4)
+		{
+			odta.ovlcode = 'm';
 			return odta;
 		}
-		odta.ovlcode='n';
+		odta.ovlcode = 'n';
 		return odta;
 	}
-	//if (intron_ovl) { ?
-	if (junct_match) {
-		odta.ovlcode='j';
+	// if (intron_ovl) { ?
+	if (junct_match)
+	{
+		odta.ovlcode = 'j';
 		return odta;
 	}
-	//what's left could be intron overlap but with no junction match = 'o'
-	if (odta.ovlen>4) {
-		odta.ovlcode='o';
+	// what's left could be intron overlap but with no junction match: 'o'
+	if (odta.ovlen > 4)
+	{
+		odta.ovlcode = 'o';
 		return odta;
 	}
-	//but if there is no exon overlap, we have 'i' or 'y'
-	//exons are within the introns of the other!
-	if (m.start>r.start && r.end > m.end) {
-		odta.ovlcode='i';
+	// but if there is no exon overlap, we can have 'i' or 'y'
+	// exons are within the introns of the other transcript
+	if (m.start > r.start && r.end > m.end)
+	{
+		odta.ovlcode = 'i';
 		return odta;
 	}
-	odta.ovlcode='y';
-	return odta; //all reference exons are within transfrag introns!
+	odta.ovlcode = 'y';
+	return odta; // all reference exons are within transfrag introns!
 }


=====================================
gff.h
=====================================
@@ -1,6 +1,9 @@
 #ifndef GFF_H
 #define GFF_H
 
+#define GFF_VERSION 129
+//^^could be used for gffcompare/gffread builds to check for min version required
+
 //#define CUFFLINKS 1
 
 #include "GBase.h"
@@ -62,22 +65,47 @@ extern const byte CLASSCODE_OVL_RANK; //rank value just above 'o' class code
 
 byte classcode_rank(char c); //returns priority value for class codes
 
-struct TOvlData {
-	char ovlcode;
-	int ovlen;
-	int16_t numJmatch; //number of matching junctions (not introns)
-	GBitVec jbits; //bit array with 1 bit for each junctions (total = 2 * num_introns)
-	TOvlData(char oc=0, int olen=0, int16_t nmj=0, GBitVec* jb=NULL):ovlcode(oc),
-			ovlen(olen),numJmatch(nmj),jbits(jb) { }
+struct TOvlData { //describe overlap with a ref transcript
+	char ovlcode=0;
+	int ovlen=0;
+	int ovlRefstart=0; //start coordinate of the overlap on reference (1-based)
+	int numJmatch=0; //number of matching splice sites (not introns!)
+	GBitVec jbits; //bit array with 1 bit for each junction (total = 2 * num_introns)
+	              //a junction match is 1, otherwise 0
+	GBitVec inbits; // bit array with 1 bit per intron; ref-matching introns are set
+
+	GBitVec rint; //reference introns matched: bit array with one bit per reference intron
+	   // (len = ref num_introns); bit is set if the ref intron was matched
+
+	//Note: skipped exons have 2 consecutive jbits set (starting at even index)
+	//      with NO corresponding bit set in inbits
+	TOvlData(char oc=0, int olen=0, int nmj=0):ovlcode(oc),
+			ovlen(olen),ovlRefstart(0),numJmatch(nmj) { }
+	TOvlData(const TOvlData& o):ovlcode(o.ovlcode), ovlen(o.ovlen),
+			ovlRefstart(o.ovlRefstart), numJmatch(o.numJmatch),
+			jbits(o.jbits), inbits(o.inbits), rint(o.rint)  {}
+	TOvlData(TOvlData&& o):ovlcode(o.ovlcode), ovlen(o.ovlen),
+				ovlRefstart(o.ovlRefstart), numJmatch(o.numJmatch) {
+				jbits=std::move(o.jbits);
+				inbits=std::move(o.inbits);
+				rint=std::move(o.rint);
+	}
+	//TODO: need move operator?
 };
 
-TOvlData getOvlData(GffObj& m, GffObj& r, bool stricterMatch=false, int trange=0);
+TOvlData getOvlData(GffObj& m, GffObj& r, bool stricterMatch=false, int trange=0, bool cdsMatch=false);
 
 char transcriptMatch(GffObj& a, GffObj& b, int& ovlen, int trange=0); //generic transcript match test
 // -- return '=', '~'  or 0
-char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange=0);
+char singleExonTMatch(GffObj& m, GffObj& r, int& ovlen, int trange=0, int* ovlrefstart=0);
 //single-exon transcript match test - returning '=', '~'  or 0
 
+
+bool txStructureMatch(GffObj& a, GffObj& b, double SE_tolerance=0.8, int ME_range=10000);
+// generic transcript match test: for MET: intron chain match only
+// for single-exon tx: overlap >=80% of the longer transcript (SE_tolerance)
+// for multi-exon tx: terminal exons can differ <=10000 bases (ME_range)
+
 //---
 // -- tracking exon/CDS segments from local mRNA to genome coordinates
 class GMapSeg:public GSeg {
@@ -724,6 +752,7 @@ class GffObj:public GSeg {
     	                                  //deallocated when GffReader is destroyed
     	  bool flag_FINALIZED         :1; //if finalize() was already called for this GffObj
     	  unsigned int gff_level      :4; //hierarchical level (0..15)
+        unsigned int flag_USER_FLAGS :8; //user flags for setUserFlag() and getUserFlag()
       };
    };
    //-- friends:
@@ -776,6 +805,10 @@ public:
   void isGeneSegment(bool v) {flag_GENE_SEGMENT=v; }
   bool promotedChildren() { return flag_CHILDREN_PROMOTED; }
   void promotedChildren(bool v) { flag_CHILDREN_PROMOTED=v; }
+  // user flags are given as 8 bits bitmasks
+  void setUserFlags(byte fmask) {flag_USER_FLAGS |= fmask; }
+  void clearUserFlags(byte fmask) {flag_USER_FLAGS &= ~fmask; }
+  byte getUserFlags(byte fmask) { return (flag_USER_FLAGS & fmask); }
   void setLevel(byte v) { gff_level=v; }
   byte getLevel() { return gff_level; }
   byte incLevel() { gff_level++; return gff_level; }
@@ -821,7 +854,7 @@ public:
       if (sharedattrs) exons[0]->attrs=NULL;
       }
     }
-  GffObj(char* anid=NULL):GSeg(0,0), exons(true,true,false), cdss(NULL), children(1,false), gscore() {
+  GffObj(const char* anid=NULL):GSeg(0,0), exons(true,true,false), cdss(NULL), children(1,false), gscore() {
                                    //exons: sorted, free, non-unique
        gffID=NULL;
        uptr=NULL;
@@ -844,6 +877,42 @@ public:
        geneID=NULL;
        gene_name=NULL;
    }
+   //constructor to use when programatically creating a new transcript
+   // (not when loading from GFF/BED/GTF input!)
+   GffObj(bool newTranscript, const char* _id=nullptr,
+          int refseq_id=-1, char _strand='+'):GSeg(0,0),
+       exons(true,true,false), cdss(NULL), children(1,false), gscore() {
+                                   //exons: sorted, free, non-unique
+       gffID=NULL;
+       uptr=NULL;
+       ulink=NULL;
+       flags=0;
+       udata=0;
+       parent=NULL;
+       if (newTranscript) {
+          flag_IS_TRANSCRIPT=true;
+          ftype_id=gff_fid_transcript;
+          subftype_id=gff_fid_exon;
+          flag_FINALIZED=true;
+
+       } else {
+          ftype_id=-1;
+          subftype_id=-1;
+       }
+       if (_id!=NULL) gffID=Gstrdup(_id);
+       gffnames_ref(names);
+       CDstart=0; // hasCDS <=> CDstart>0
+       CDend=0;
+       CDphase=0;
+       gseq_id=refseq_id;
+       track_id=-1;
+       strand=_strand;
+       attrs=NULL;
+       covlen=0;
+       geneID=NULL;
+       gene_name=NULL;
+   }
+
    ~GffObj() {
        GFREE(gffID);
        GFREE(gene_name);
@@ -951,28 +1020,38 @@ public:
 
    int exonOverlapIdx(GList<GffExon>& segs, uint s, uint e, int* ovlen=NULL, int start_idx=0);
 
-   int exonOverlapLen(GffObj& m) {
-      if (start>m.end || m.start>end) return 0;
+   int exonOverlapLen(GffObj& r, int *rovlstart=NULL) {
+	  if (rovlstart) *rovlstart=0;
+      if (start>r.end || r.start>end) return 0;
       int i=0;
       int j=0;
       int ovlen=0;
-      while (i<exons.Count() && j<m.exons.Count()) {
+      int rxpos=0;
+      while (i<exons.Count() && j<r.exons.Count()) {
         uint istart=exons[i]->start;
         uint iend=exons[i]->end;
-        uint jstart=m.exons[j]->start;
-        uint jend=m.exons[j]->end;
-        if (istart>jend) { j++; continue; }
+        uint jstart=r.exons[j]->start;
+        uint jend=r.exons[j]->end;
+        if (istart>jend) { j++; rxpos+=jend-jstart+1; continue; }
         if (jstart>iend) { i++; continue; }
         //exon overlap
-        uint ovstart=GMAX(istart,jstart);
+        uint ovstart=0;
+        if (istart>jstart) {
+        	ovstart=istart;
+        	if (rovlstart && *rovlstart==0) *rovlstart=rxpos+istart-jstart+1;
+        } else {
+        	ovstart=jstart;
+        	if (rovlstart && *rovlstart==0) *rovlstart=rxpos+1;
+        }
         if (iend<jend) {
            ovlen+=iend-ovstart+1;
            i++;
            }
         else {
            ovlen+=jend-ovstart+1;
-           j++;
+           j++; rxpos+=jend-jstart+1;
            }
+
         }//while comparing exons
       return ovlen;
       }
@@ -1015,6 +1094,12 @@ public:
         GFREE(geneID);
         if (gene_id) geneID=Gstrdup(gene_id);
    }
+   void setID(const char* tid) {
+        if (tid) {
+          GFREE(gffID);
+          gffID=Gstrdup(tid);
+        }
+   }
    int addSeg(GffLine* gfline);
    int addSeg(int fnid, GffLine* gfline);
    void getCDSegs(GVec<GffExon>& cds);
@@ -1041,7 +1126,7 @@ public:
    void printExonList(FILE* fout); //print comma delimited list of exon intervals
    void printCDSList(FILE* fout); //print comma delimited list of CDS intervals
 
-   void printBED(FILE* fout, bool cvtChars);
+   void printBED(FILE* fout, bool cvtChars=false);
        //print a BED-12 line + GFF3 attributes in 13th field
    void printSummary(FILE* fout=NULL);
 
@@ -1100,6 +1185,15 @@ class GSeqStat {
 int gfo_cmpByLoc(const pointer p1, const pointer p2);
 int gfo_cmpRefByID(const pointer p1, const pointer p2);
 
+//multi-exon transcripts basic comparison/ordering function
+// based on intron chain comparison - use only for lists with multiple multi-exon transcripts!
+int txCmpByIntrons(const pointer p1, const pointer p2);
+int txCmpByExons(const pointer p1, const pointer p2);
+
+//single-exon transcripts basic comparison/ordering function
+int seTxCompareProc(pointer* p1, pointer* p2);
+
+
 class GfList: public GList<GffObj> {
  public:
 
@@ -1224,7 +1318,9 @@ class GffReader {
   //GffObj* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx);
   GffObj* updateGffRec(GffObj* prevgfo, GffLine* gffline);
   GffObj* updateParent(GffObj* newgfh, GffObj* parent);
-  bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex = NULL);
+
+  bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex=NULL);
+
   GPVec<GSeqStat> gseqStats; //populated after finalize() with only the ref seqs in this file
   GffReader(FILE* f=NULL, bool t_only=false, bool sort=false):linebuf(NULL), fpos(0),
 		  buflen(0), flags(0), fh(f), fname(NULL), commentParser(NULL), gffline(NULL),



View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/d83ce93848b2fa6b3151c98438132d10febd00d8

-- 
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/d83ce93848b2fa6b3151c98438132d10febd00d8
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20251127/6783b00a/attachment-0001.htm>