[med-svn] [libgff] 01/02: Imported Upstream version 1.0

Michael Crusoe misterc-guest at moszumanska.debian.org
Sat Sep 19 06:30:13 UTC 2015


This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository libgff.

commit aa3684a37b5064bb9efe2ecf57e96f5db14b5b9e
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date:   Fri Sep 18 20:29:29 2015 -0700

    Imported Upstream version 1.0
---
 BoostLicense.txt      |   23 +
 CMakeLists.txt        |   27 +
 Readme.md             |    9 +
 include/GArgs.h       |   98 +++
 include/GBase.h       |  458 +++++++++++
 include/GFaSeqGet.h   |  112 +++
 include/GFastaIndex.h |   79 ++
 include/GHash.hh      |  561 +++++++++++++
 include/GList.hh      |  638 +++++++++++++++
 include/GStr.h        |  213 +++++
 include/GVec.hh       |  907 +++++++++++++++++++++
 include/codons.h      |   54 ++
 include/gdna.h        |   15 +
 include/gff.h         | 1088 +++++++++++++++++++++++++
 include/gff_utils.h   |  610 ++++++++++++++
 src/GArgs.cpp         |  376 +++++++++
 src/GBase.cpp         |  780 ++++++++++++++++++
 src/GFaSeqGet.cpp     |  319 ++++++++
 src/GFastaIndex.cpp   |  170 ++++
 src/GStr.cpp          | 1345 +++++++++++++++++++++++++++++++
 src/TestGFFParse.cpp  |   34 +
 src/codons.cpp        |   90 +++
 src/gdna.cpp          |   90 +++
 src/gff.cpp           | 2125 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/gff_utils.cpp     |  664 +++++++++++++++
 25 files changed, 10885 insertions(+)

diff --git a/BoostLicense.txt b/BoostLicense.txt
new file mode 100644
index 0000000..3998b97
--- /dev/null
+++ b/BoostLicense.txt
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17, 2003
+ 
+ Permission is hereby granted, free of charge, to any person or organization
+ obtaining a copy of the software and accompanying documentation covered by
+ this license (the "Software") to use, reproduce, display, distribute,
+ execute, and transmit the Software, and to prepare [[derivative work]]s of the
+ Software, and to permit third-parties to whom the Software is furnished to
+ do so, all subject to the following:
+  
+  The copyright notices in the Software and this entire statement, including
+  the above license grant, this restriction and the following disclaimer,
+  must be included in all copies of the Software, in whole or in part, and
+  all derivative works of the Software, unless such copies or derivative
+  works are solely in the form of machine-executable object code generated by
+  a source language processor.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+   SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+   FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..de5d58e
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,27 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+PROJECT(gff)
+
+SET(PROJECT_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
+SET(PROJECT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+SET( CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -O3 -Wall -D_FILE_OFFSET_BITS=64" )
+
+SET(GFFLib_SRCS
+    ${PROJECT_SOURCE_DIR}/codons.cpp
+    ${PROJECT_SOURCE_DIR}/GArgs.cpp
+    ${PROJECT_SOURCE_DIR}/GBase.cpp
+    ${PROJECT_SOURCE_DIR}/gdna.cpp
+    ${PROJECT_SOURCE_DIR}/GFaSeqGet.cpp
+    ${PROJECT_SOURCE_DIR}/GFastaIndex.cpp
+    ${PROJECT_SOURCE_DIR}/gff.cpp
+    ${PROJECT_SOURCE_DIR}/gff_utils.cpp
+    ${PROJECT_SOURCE_DIR}/GStr.cpp)
+
+INCLUDE_DIRECTORIES(${PROJECT_INCLUDE_DIR})
+
+ADD_LIBRARY(${PROJECT_NAME} STATIC ${GFFLib_SRCS})
+ADD_EXECUTABLE(TestGFFParse ${PROJECT_SOURCE_DIR}/TestGFFParse.cpp)
+TARGET_LINK_LIBRARIES(TestGFFParse ${PROJECT_NAME})
+
+INSTALL(FILES ${CMAKE_BINARY_DIR}/lib${PROJECT_NAME}.a DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
+INSTALL(DIRECTORY ${PROJECT_INCLUDE_DIR} DESTINATION ${CMAKE_INSTALL_PREFIX})
diff --git a/Readme.md b/Readme.md
new file mode 100644
index 0000000..a177580
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,9 @@
+LibGFF
+======
+
+This is an attempt to perform a simple "libraryfication" of the GFF/GTF parsing
+code that is used in the [Cufflinks](http://cufflinks.cbcb.umd.edu/index.html)
+codebase.  There are not many (any?) relatively lightweight GTF/GFF parsers
+exposing a C++ interface, and the goal of this library is to provide this
+functionality without the necessity of drawing in a heavy-weight dependency
+like SeqAn.  
diff --git a/include/GArgs.h b/include/GArgs.h
new file mode 100644
index 0000000..92f32fb
--- /dev/null
+++ b/include/GArgs.h
@@ -0,0 +1,98 @@
+/*
+GArgs is a quick'n'dirty object oriented replacement for the standard 
+   getopts library call available on many unix platforms;
+   it accepts the regular single dash style options 
+     -<letter>[ ][<value>] 
+   but also attr=value style options:
+     <optname>=<value>
+*/
+
+#ifndef G_ARGS_DEFINED
+#define G_ARGS_DEFINED
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+
+struct GArgsDef {
+  const char* longopt;
+  char opt; //equivalent one-char option, if any
+  bool req_value; //true if the string that follows must be a value
+  int code; //an enum code to be associated with this option
+};
+
+class GArgs {
+   //structure for parsing arguments format definition
+   struct fmtdef {
+     char* longopt;
+     char opt; //equivalent one-char option, if any
+     bool req_value; //true if the string that follows must be a value
+     int code; //an enum code to be associated with this option
+     };
+   int fmtcount;
+   fmtdef* fmt; //this will store format definition after parsing it
+   struct argdata {
+     char*  opt; // this is NULL for non-dashed arguments
+                 // a single character for single dash style arguments
+                //  a string for ARG=VALUE or --long_option style arguments
+     char* value; // is NULL for switches (dashed flags)
+     int fmti; //index in fmt table
+     //int code; // if GArgsDef[] constructor was used, for getOpt
+     };
+   int _argc;            
+   char* const *_argv; //the original main() values
+   argdata* args; //arguments table after parsing it
+   int count; //total count of elements in 'args' array
+   int nonOptCount; //count of non-dashed, non= arguments
+   int nonOptPos; //current position for nonOpt arguments iterator
+   int optPos; //current position for options iterator
+   int errarg; //argv error position after parsing
+   bool err_valmissing; //if the error is strictly about missing value for errarg option
+   int parseArgs(bool nodigitopts=false);
+   //parsing helper functions
+   int validOpt(int c);  
+   int validShortOpt(char o);  
+   int validLongOpt(char* o, char* to);
+ public:
+ 
+   GArgs(int argc, char* const argv[], const char* format, bool nodigitopts=false);
+   /* format can be:
+       <string>{;|=} e.g. disable-test;PID=S= for --disable-test PID=50 (or --PID 50) S=3.5 etc.
+       <letter>[:]  e.g. p:hT  for -p testing (or -ptesting) -h -T
+   This means that the long options, if present, should be given at the beginning
+   of the format string, before the single-dash, single-char options
+   */
+   GArgs(int argc, char* const argv[], const GArgsDef fmtrecs[], bool nodigitopts=false);
+   
+   ~GArgs();
+   int isError(); // returns the offending argv position or 0 if no error
+   int getCount() { return count; } //total number of arguments given
+   int getFmtCount() { return fmtcount; } //total number of option definitions
+   int getNonOptCount() { return nonOptCount; } //total number of non-option arguments
+   char* getOpt(const char* o); /* retrieve the value for option o
+                   returns 
+                       NULL    if option not given at all
+                     !=NULL    if boolean option was given
+                     opt's value if value option was given
+                     */
+   char* getOpt(const char o);
+   char* getOpt(int c); //retrieve value by enum code
+   char* getOptName(int c); //retrieve name of by enum code
+   int startOpt(); //init iteration through option arguments
+       // returns number of option args
+       
+   char* nextOpt(); //get next option argument's string
+   int nextCode(); //get next option argument's code
+
+   int startNonOpt(void); //init iteration through non-option arguments
+             // returns the number of non-option arguments
+   void printError(FILE* fout, const char* usage=NULL,
+                      bool exitProgram=false);
+   void printError(const char* usage=NULL, bool exitProgram=false);
+   void printCmdLine(FILE* fout);
+   char* nextNonOpt(); //get the next non-option argument
+};
+
+#endif
diff --git a/include/GBase.h b/include/GBase.h
new file mode 100644
index 0000000..fc3c5ba
--- /dev/null
+++ b/include/GBase.h
@@ -0,0 +1,458 @@
+#ifndef G_BASE_DEFINED
+#define G_BASE_DEFINED
+#ifndef _POSIX_SOURCE
+//mostly for MinGW
+#define _POSIX_SOURCE
+#endif
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+
+#if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
+  #ifndef __WIN32__
+    #define __WIN32__
+  #endif
+  #include <windows.h>
+  #include <io.h>
+  #define CHPATHSEP '\\'
+  #undef off_t
+  #define off_t int64_t
+  #ifndef popen
+   #define popen _popen
+  #endif
+  #ifndef fseeko
+		#ifdef _fseeki64
+			#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
+		#else
+			/*
+			#define _DEFINE_WIN32_FSEEKO
+			int fseeko(FILE *stream, off_t offset, int whence);
+			*/
+			#define fseeko fseek
+		#endif
+  #endif
+ #ifndef ftello
+  #ifdef _ftelli64
+    #define ftello(stream) _ftelli64(stream)
+  #else
+    /*
+    #define _DEFINE_WIN32_FTELLO
+    off_t ftello(FILE *stream);
+    */
+    #define ftello ftell
+  #endif
+ #endif
+ #else
+  #define CHPATHSEP '/'
+  #include <unistd.h>
+#endif
+
+#ifndef fseeko
+ #define fseeko fseek
+#endif
+#ifndef ftello
+ #define ftello ftell
+#endif
+
+#ifdef DEBUG
+#undef NDEBUG
+#endif
+
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int16_t int16;
+typedef uint16_t uint16;
+
+typedef unsigned char uchar;
+typedef unsigned char byte;
+
+#ifndef MAXUINT
+#define MAXUINT ((unsigned int)-1)
+#endif
+
+#ifndef MAXINT
+#define MAXINT INT_MAX
+#endif
+
+#ifndef MAX_UINT
+#define MAX_UINT ((unsigned int)-1)
+#endif
+
+#ifndef MAX_INT
+#define MAX_INT INT_MAX
+#endif
+
+typedef int64_t int64;
+typedef uint64_t uint64;
+
+/****************************************************************************/
+
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS 0
+#endif
+
+/****************************************************************************/
+#define ERR_ALLOC "Error allocating memory.\n"
+
+//-------------------
+
+// Debug helpers
+#ifndef NDEBUG
+ #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
+ #ifdef TRACE
+  #define GTRACE(exp)  (GMessage exp)
+ #else
+  #define GTRACE(exp)  ((void)0)
+ #endif
+#else
+ #define GASSERT(exp) ((void)0)
+ #define GTRACE(exp)  ((void)0)
+#endif
+
+#define GERROR(exp) (GError exp)
+/**********************************  Macros  ***********************************/
+// Abolute value
+#define GABS(val) (((val)>=0)?(val):-(val))
+
+// Min and Max
+#define GMAX(a,b) (((a)>(b))?(a):(b))
+#define GMIN(a,b) (((a)>(b))?(b):(a))
+
+// Min of three
+#define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
+
+// Max of three
+#define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
+
+// Return minimum and maximum of a, b
+#define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
+
+// Clamp value x to range [lo..hi]
+#define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
+
+typedef void* pointer;
+typedef unsigned int uint;
+
+typedef int GCompareProc(const pointer item1, const pointer item2);
+typedef long GFStoreProc(const pointer item1, FILE* fstorage); //for serialization
+typedef pointer GFLoadProc(FILE* fstorage); //for deserialization
+
+typedef void GFreeProc(pointer item); //usually just delete,
+      //but may also support structures with embedded dynamic members
+
+#define GMALLOC(ptr,size)  if (!GMalloc((pointer*)(&ptr),size)) \
+                                     GError(ERR_ALLOC)
+#define GCALLOC(ptr,size)  if (!GCalloc((pointer*)(&ptr),size)) \
+                                     GError(ERR_ALLOC)
+#define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
+                                     GError(ERR_ALLOC)
+#define GFREE(ptr)       GFree((pointer*)(&ptr))
+
+inline char* strMin(char *arg1, char *arg2) {
+    return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
+}
+
+inline char* strMax(char *arg1, char *arg2) {
+    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
+}
+
+inline int iround(double x) {
+   return (int)floor(x + 0.5);
+}
+
+/****************************************************************************/
+
+inline int Gintcmp(int a, int b) {
+ //return (a>b)? 1 : ((a==b)?0:-1);
+  return a-b;
+}
+
+int Gstrcmp(const char* a, const char* b, int n=-1);
+//same as strcmp but doesn't crash on NULL pointers
+
+int Gstricmp(const char* a, const char* b, int n=-1);
+
+//basic swap template function
+template<class T> void Gswap(T& lhs, T& rhs) {
+ //register T tmp=lhs;
+ T tmp=lhs; //requires copy operator
+ lhs=rhs;
+ rhs=tmp;
+}
+
+
+/**************** Memory management ***************************/
+
+bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
+bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
+bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
+void GFree(pointer* ptr); // Free memory, resets ptr to NULL
+
+
+//int saprintf(char **retp, const char *fmt, ...);
+
+void GError(const char* format,...); // Error routine (aborts program)
+void GMessage(const char* format,...);// Log message to stderr
+// Assert failed routine:- usually not called directly but through GASSERT
+void GAssert(const char* expression, const char* filename, unsigned int lineno);
+
+// ****************** string manipulation *************************
+char *Gstrdup(const char* str);
+//duplicate a string by allocating a copy for it and returning it
+char* Gstrdup(const char* sfrom, const char* sto);
+//same as GStrdup, but with an early termination (e.g. on delimiter)
+
+char* Gsubstr(const char* str, char* from, char* to=NULL);
+//extracts a substring, allocating it, including boundaries (from/to)
+
+int strsplit(char* str, char** fields, int maxfields, const char* delim);
+int strsplit(char* str, char** fields, int maxfields, const char delim);
+int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
+
+char* replaceStr(char* &str, char* newvalue);
+
+//conversion: to Lower/Upper case
+// creating a new string:
+char* upCase(const char* str);
+char* loCase(const char* str);
+// changing string in place:
+char* strlower(char * str);
+char* strupper(char * str);
+
+//strstr but for memory zones: scans a memory region
+//for a substring:
+void* Gmemscan(void *mem, unsigned int len,
+                  void *part, unsigned int partlen);
+
+// test if a char is in a string:
+bool chrInStr(char c, const char* str);
+
+char* rstrchr(char* str, char ch);
+/* returns a pointer to the rightmost
+  occurence of ch in str - like rindex for platforms missing it*/
+
+char* strchrs(const char* s, const char* chrs);
+//strchr but with a set of chars instead of only one
+
+char* rstrfind(const char* str, const char *substr); 
+// like rindex() but for strings;  right side version of strstr()
+
+char* reverseChars(char* str, int slen=0); //in place reversal of string 
+
+char* rstrstr(const char* rstart, const char *lend, const char* substr);
+/*the reversed, rightside equivalent of strstr: starts searching
+ from right end (rstart), going back to left end (lend) and returns
+ a pointer to the last (right) matching character in str */
+
+char* strifind(const char* str,  const char* substr);
+// the case insensitive version of strstr -- finding a string within a strin
+
+
+//Determines if a string begins with a given prefix
+//(returns false when any of the params is NULL,
+// but true when prefix is '' (empty string)!)
+bool startsWith(const char* s, const char* prefix);
+
+bool endsWith(const char* s, const char* suffix);
+//Note: returns true if suffix is empty string, but false if it's NULL
+
+
+// ELF hash function for strings
+int strhash(const char* str);
+
+
+
+//---- generic base GSeg : genomic segment (interval) --
+// coordinates are considered 1-based (so 0 is invalid)
+class GSeg {
+ public:
+  uint start; //start<end always!
+  uint end;
+  GSeg(uint s=0,uint e=0) {
+    if (s>e) { start=e;end=s; }
+        else { start=s;end=e; }
+    }
+  //check for overlap with other segment
+  uint len() { return end-start+1; }
+  bool overlap(GSeg* d) {
+     //return start<d->start ? (d->start<=end) : (start<=d->end);
+     return (start<=d->end && end>=d->start);
+     }
+
+  bool overlap(GSeg& d) {
+     //return start<d.start ? (d.start<=end) : (start<=d.end);
+     return (start<=d.end && end>=d.start);
+     }
+
+  bool overlap(GSeg& d, int fuzz) {
+     //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
+     return (start<=d.end+fuzz && end+fuzz>=d.start);
+     }
+
+  bool overlap(uint s, uint e) {
+     if (s>e) { Gswap(s,e); }
+     //return start<s ? (s<=end) : (start<=e);
+     return (start<=e && end>=s);
+     }
+
+  //return the length of overlap between two segments
+  int overlapLen(GSeg* r) {
+     if (start<r->start) {
+        if (r->start>end) return 0;
+        return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
+        }
+       else { //r->start<=start
+        if (start>r->end) return 0;
+        return (r->end<end)? r->end-start+1 : end-start+1;
+        }
+     }
+  int overlapLen(uint rstart, uint rend) {
+     if (rstart>rend) { Gswap(rstart,rend); }
+     if (start<rstart) {
+        if (rstart>end) return 0;
+        return (rend>end) ? end-rstart+1 : rend-rstart+1;
+        }
+       else { //rstart<=start
+        if (start>rend) return 0;
+        return (rend<end)? rend-start+1 : end-start+1;
+        }
+     }
+
+  //fuzzy coordinate matching:
+  bool coordMatch(GSeg* s, uint fuzz=0) {
+    if (fuzz==0) return (start==s->start && end==s->end);
+    uint sd = (start>s->start) ? start-s->start : s->start-start;
+    uint ed = (end>s->end) ? end-s->end : s->end-end;
+    return (sd<=fuzz && ed<=fuzz);
+    }
+  //comparison operators required for sorting
+  bool operator==(GSeg& d){
+      return (start==d.start && end==d.end);
+      }
+  bool operator<(GSeg& d){
+     return (start==d.start)?(end<d.end):(start<d.start);
+     }
+};
+
+
+
+//--------------------------------------------------------
+// ************** simple line reading class for text files
+
+//GLineReader -- text line reading/buffering class
+class GLineReader {
+   bool closeFile;
+   int len;
+   int allocated;
+   char* buf;
+   bool isEOF;
+   FILE* file;
+   off_t filepos; //current position
+   bool pushed; //pushed back
+   int lcount; //line counter (read lines)
+ public:
+   char* chars() { return buf; }
+   char* line() { return buf; }
+   int readcount() { return lcount; } //number of lines read
+   void setFile(FILE* stream) { file=stream; }
+   int length() { return len; }
+   int size() { return len; } //same as size();
+   bool isEof() {return isEOF; }
+   bool eof() { return isEOF; }
+   off_t getfpos() { return filepos; }
+   off_t getFpos() { return filepos; }
+   char* nextLine() { return getLine(); }
+   char* getLine() { if (pushed) { pushed=false; return buf; }
+                            else return getLine(file);  }
+   char* getLine(FILE* stream) {
+                 if (pushed) { pushed=false; return buf; }
+                          else return getLine(stream, filepos); }
+   char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
+                           // the given file position
+   void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
+            // so the next call will in fact return the same line
+   GLineReader(const char* fname) {
+      FILE* f=fopen(fname, "rb");
+      if (f==NULL) GError("Error opening file '%s'!\n",fname);
+      closeFile=true;
+      init(f);
+      }
+   GLineReader(FILE* stream=NULL, off_t fpos=0) {
+     closeFile=false;
+     init(stream,fpos);
+     }
+   void init(FILE* stream, off_t fpos=0) {
+     len=0;
+     isEOF=false;
+     allocated=1024;
+     GMALLOC(buf,allocated);
+     lcount=0;
+     buf[0]=0;
+     file=stream;
+     filepos=fpos;
+     pushed=false;
+     }
+   ~GLineReader() {
+     GFREE(buf);
+     if (closeFile) fclose(file);
+     }
+};
+
+
+/* extended fgets() -  to read one full line from a file and
+  update the file position correctly !
+  buf will be reallocated as necessary, to fit the whole line
+  */
+char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
+
+
+//print int/values nicely formatted in 3-digit groups
+char* commaprint(uint64 n);
+
+/*********************** File management functions *********************/
+
+// removes the last part (file or directory name) of a full path
+// WARNING: this is a destructive operation for the given string!
+void delFileName(char* filepath);
+
+// returns a pointer to the last file or directory name in a full path
+const char* getFileName(const char* filepath);
+// returns a pointer to the file "extension" part in a filename
+const char* getFileExt(const char* filepath);
+
+
+int fileExists(const char* fname);
+//returns 0 if file entry doesn't exist
+//        1 if it's a directory
+//        2 if it's a regular file
+//        3 otherwise (?)
+
+int64 fileSize(const char* fpath);
+
+//write a formatted fasta record, fasta formatted
+void writeFasta(FILE *fw, const char* seqid, const char* descr,
+        const char* seq, int linelen=60, int seqlen=0);
+
+//parses the next number found in a string at the current position
+//until a non-digit (and not a '.', 'e','E','-','+') is encountered;
+//updates the char* pointer to be after the last digit parsed
+bool parseNumber(char* &p, double& v);
+bool parseDouble(char* &p, double& v); //just an alias for parseNumber
+
+bool parseInt(char* &p, int& i);
+bool parseUInt(char* &p, uint& i);
+bool parseHex(char* &p,  uint& i);
+
+#endif /* G_BASE_DEFINED */
diff --git a/include/GFaSeqGet.h b/include/GFaSeqGet.h
new file mode 100644
index 0000000..d655a86
--- /dev/null
+++ b/include/GFaSeqGet.h
@@ -0,0 +1,112 @@
+#ifndef GFASEQGET_H
+#define GFASEQGET_H
+#include "GList.hh"
+
+#define MAX_FASUBSEQ 0x20000000
+//max 512MB sequence data held in memory at a time
+
+class GSubSeq {
+ public:
+  uint sqstart; //1-based coord of subseq start on sequence
+  uint sqlen;   //length of subseq loaded
+  char* sq; //actual subsequence data will be stored here
+                // (with end-of-line characters removed)
+
+  /*char* xseq; //the exposed pointer to the last requested subsequence start
+  off_t xstart; //the coordinate start for the last requested subseq
+  off_t xlen; //the last requested subseq len*/
+  GSubSeq() {
+     sqstart=0;
+     sqlen=0;
+     sq=NULL;
+     /* xseq=NULL;
+     xstart=0;
+     xlen=0;*/
+     }
+  ~GSubSeq() {
+     GFREE(sq);
+     }
+  // genomic, 1-based coordinates:
+  void setup(uint sstart, int slen, int sovl=0, int qfrom=0, int qto=0, uint maxseqlen=0);
+    //check for overlap with previous window and realloc/extend appropriately
+    //returns offset from seq that corresponds to sstart
+    // the window will keep extending until MAX_FASUBSEQ is reached
+};
+
+class GFaSeqGet {
+  char* fname;
+  FILE* fh;
+  //raw offset in the file where the sequence actually starts:
+  off_t fseqstart;
+  uint seq_len; //total sequence length, if known (when created from GFastaIndex)
+  int line_len; //length of each line of text
+  int line_blen; //binary length of each line
+                 // = line_len + number of EOL character(s)
+  GSubSeq* lastsub;
+  void initialParse(off_t fofs=0, bool checkall=true);
+  const char* loadsubseq(uint cstart, int& clen);
+  void finit(const char* fn, off_t fofs, bool validate);
+ public:
+  GFaSeqGet() {
+    fh=NULL;
+    fseqstart=0;
+    seq_len=0;
+    line_len=0;
+    line_blen=0;
+    fname=NULL;
+    lastsub=NULL;
+    }
+  GFaSeqGet(const char* fn, off_t fofs, bool validate=false) {
+     seq_len=0;
+     finit(fn,fofs,validate); 
+     }
+  GFaSeqGet(const char* fn, bool validate=false) {
+     seq_len=0;
+     finit(fn,0,validate);
+     }
+
+  GFaSeqGet(const char* faname, uint seqlen, off_t fseqofs, int l_len, int l_blen);
+  //constructor from GFastaIndex record
+
+  GFaSeqGet(FILE* f, off_t fofs=0, bool validate=false);
+
+  ~GFaSeqGet() {
+    if (fname!=NULL) {
+       GFREE(fname);
+       fclose(fh);
+       }
+    delete lastsub;
+    }
+  const char* subseq(uint cstart, int& clen);
+  const char* getRange(uint cstart=1, uint cend=0) {
+      if (cend==0) cend=(seq_len>0)?seq_len : MAX_FASUBSEQ;
+      if (cstart>cend) { Gswap(cstart, cend); }
+      int clen=cend-cstart+1;
+      //int rdlen=clen;
+      return subseq(cstart, clen);
+      }
+
+  char* copyRange(uint cstart, uint cend, bool revCmpl=false, bool upCase=false);
+  //caller is responsible for deallocating the return string
+
+  void loadall(uint32 max_len=0) {
+    //TODO: better read the whole sequence differently here - line by line
+    //so when EOF or another '>' line is found, the reading stops!
+    int clen=(seq_len>0) ? seq_len : ((max_len>0) ? max_len : MAX_FASUBSEQ);
+    subseq(1, clen);
+    }
+  void load(uint cstart, uint cend) {
+     //cache as much as possible
+      if (seq_len>0 && cend>seq_len) cend=seq_len; //correct a bad request
+      int clen=cend-cstart+1;
+      subseq(cstart, clen);
+     }
+  int getsublen() { return lastsub!=NULL ? lastsub->sqlen : 0 ; }
+  off_t getseqofs() { return fseqstart; }
+  int getLineLen() { return line_len; }
+  int getLineBLen() { return line_blen; }
+  //reads a subsequence starting at genomic coordinate cstart (1-based)
+ };
+
+
+#endif
diff --git a/include/GFastaIndex.h b/include/GFastaIndex.h
new file mode 100644
index 0000000..1509f3a
--- /dev/null
+++ b/include/GFastaIndex.h
@@ -0,0 +1,79 @@
+/*
+ * GFaIdx.h
+ *
+ *  Created on: Aug 25, 2010
+ *      Author: gpertea
+ */
+
+#ifndef GFAIDX_H_
+#define GFAIDX_H_
+
+#include "GHash.hh"
+#include "GList.hh"
+
+class GFastaRec {
+ public:
+  char* seqname;
+  uint seqlen;
+  off_t fpos;
+  int line_len; //effective line length (without EoL)
+  int line_blen; //length of line including EoL characters
+  GFastaRec(uint slen=0, off_t fp=0, int llen=0, int llenb=0) {
+    seqname=NULL; //only a pointer copy
+    seqlen=slen;
+    fpos=fp;
+    line_len=llen;
+    line_blen=llenb;
+    }
+  bool operator==(GFastaRec& d){
+      return (fpos==d.fpos);
+      }
+  bool operator>(GFastaRec& d){
+     return (fpos>d.fpos);
+     }
+  bool operator<(GFastaRec& d){
+    return (fpos<d.fpos);
+    }
+
+};
+
+class GFastaIndex {
+  char* fa_name;
+  char* fai_name;
+  bool haveFai;
+ public:
+  GHash<GFastaRec> records;
+  void addRecord(const char* seqname, uint seqlen,
+                    off_t foffs, int llen, int llen_full);
+
+  GFastaRec* getRecord(const char* seqname) {
+    return records.Find(seqname);
+    }
+  bool hasIndex() { return haveFai; }
+  int loadIndex(const char* finame);
+  int buildIndex(); //build index in memory by parsing the whole fasta file
+  int storeIndex(const char* finame);
+  int storeIndex(FILE* fai);
+  int getCount() { return records.Count(); }
+  GFastaIndex(const char* fname, const char* finame=NULL):records() {
+    if (fileExists(fname)!=2) GError("Error: fasta file %s not found!\n",fname);
+    if (fileSize(fname)<=0) GError("Error: invalid fasta file %s !\n",fname);
+    fa_name=Gstrdup(fname);
+    fai_name=finame!=NULL ? Gstrdup(finame) : NULL;
+    if (fileSize(fa_name)==0) {
+      GError("Error creating GFastaIndex(%s): invalid fasta file!\n",fa_name);
+      }
+    haveFai=false;
+    if (fai_name!=NULL && fileSize(fai_name)>0) {
+       //try to load the index file if it exists
+       loadIndex(fai_name);
+       haveFai=(records.Count()>0);
+       }
+    }
+  ~GFastaIndex() {
+    GFREE(fa_name);
+    GFREE(fai_name);
+    }
+};
+
+#endif /* GFAIDX_H_ */
diff --git a/include/GHash.hh b/include/GHash.hh
new file mode 100644
index 0000000..5122e1d
--- /dev/null
+++ b/include/GHash.hh
@@ -0,0 +1,561 @@
+/********************************************************************************
+*                  Hash table class template (char* based)                               *
+*********************************************************************************/
+
+#ifndef GHash_HH
+#define GHash_HH
+#include "GBase.h"
+
+/**
+* This class maintains a fast-access hash table of entities
+* indexed by a character string (essentially, maps strings to pointers)
+*/
+
+
+template <class OBJ> class GHash {
+ protected:
+	struct GHashEntry {
+	     char*   key;              // Key string
+	     bool    keyalloc;         //shared key flag (to not free the key chars)
+	     int     hash;             // Hash value of key
+	     pointer data;              // Data
+	     bool    mark;             // Entry is marked
+	     };
+  GHashEntry* hash;         // Hash
+  int         fCapacity;     // table size
+  int         fCount;        // number of valid entries
+  int  fCurrentEntry;
+  char* lastkeyptr; //pointer to last key string added
+    //---------- Raw data retrieval (including empty entries
+  // Return key at position pos.
+  const char* Key(uint pos) const { return hash[pos].key; }
+  // return data OBJ* at given position
+  OBJ* Data(uint pos) const { return (OBJ*) hash[pos].data; }
+  // Return mark flag of entry at position pos.
+  bool Mark(uint pos) const { return hash[pos].mark; }
+  // Return position of first filled slot, or >= fCapacity
+  int First() const;
+  // Return position of last filled slot or -1
+  int Last() const;
+  // Return position of next filled slot in hash table
+  // or a value greater than or equal to fCapacity if no filled
+  // slot was found
+  int Next(int pos) const;
+  //Return position of previous filled slot in hash table
+  //or a -1 if no filled slot was found
+  int Prev(int pos) const;
+
+private:
+  GHash(const GHash&);
+  GHash &operator=(const GHash&);
+  GFreeProc* fFreeProc; //procedure to free item data
+protected:
+public:
+  static void DefaultFreeProc(pointer item) {
+      delete (OBJ*)item;
+      }
+public:
+  GHash(GFreeProc* freeProc); // constructs of an empty hash
+  GHash(bool doFree=true); // constructs of an empty hash (free the item objects)
+  void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
+  void setFreeItem(bool doFree) { fFreeProc=(doFree)? &DefaultFreeProc : NULL; }
+  int Capacity() const { return fCapacity; } // table's size, including the empty slots.
+  void Resize(int m);  // Resize the table to the given size.
+  int Count() const { return fCount; }// the total number of entries in the table.
+  // Insert a new entry into the table given key and mark.
+  // If there is already an entry with that key, leave it unchanged,
+  const OBJ* Add(const char* ky, const OBJ* ptr=NULL, bool mrk=false);
+  //same as Add, but the key pointer is stored directly, no string duplicate
+  //is made (shared-key-Add)
+  const OBJ* shkAdd(const char* ky, const OBJ* ptr, bool mrk=false);
+
+  // Replace data at key, if the entry's mark is less than
+  // or equal to the given mark.  If there was no existing entry,
+  // a new entry is inserted with the given mark.
+  OBJ* Replace(const char* ky, const OBJ* ptr, bool mrk=false);
+  // Remove a given key and its data
+  OBJ* Remove(const char* ky);
+  // Find data OBJ* given key.
+  OBJ* Find(const char* ky, char** keyptr=NULL);
+  bool hasKey(const char* ky);
+  char* getLastKey() { return lastkeyptr; }
+  OBJ* operator[](const char* ky) { return Find(ky); }
+  void startIterate(); //iterator-like initialization
+  char* NextKey(); //returns next valid key in the table (NULL if no more)
+  OBJ* NextData(); //returns next valid hash[].data
+  OBJ* NextData(char*& nextkey); //returns next valid hash[].data
+                                //or NULL if no more
+                                //nextkey is SET to the corresponding key
+  GHashEntry* NextEntry() { //returns a pointer to a GHashEntry
+  	 register int pos=fCurrentEntry;
+  	 while (pos<fCapacity && hash[pos].hash<0) pos++;
+  	 if (pos==fCapacity) {
+  	                 fCurrentEntry=fCapacity;
+  	                 return NULL;
+  	                 }
+  	              else {
+  	                 fCurrentEntry=pos+1;
+  	                 return &hash[pos];
+  	                 }
+  }
+  /// Clear all entries
+  void Clear();
+
+  /// Destructor
+  virtual ~GHash();
+  };
+//
+//======================== method definitions ========================
+//
+/*
+  Notes:
+  - The hash algorithm should yield a fCount in the range [0...GHash::EMPTY)
+     GHash::EMPTY and GHash::UNUSED are needed for flag purposes.
+  - Since the algorithm doubles the table size when exceeding MAX_LOAD,
+    it would be prudent to keep MIN_LOAD less than 1/2 MAX_LOAD;
+    otherwise, the algorithm might hip-hop between halving and doubling,
+    which would be quite expensive!!
+  - Not many people seem to know that hash tables don't have to be prime
+    numbers; in fact, a table size of 2**n and odd probe distance are very
+    easy to arrange, and this works just as well!
+  - We store the hash key, so that 99.999% of the time we can compare hash numbers;
+    only when hash numbers match do we need to compare keys.
+    Thus, with a good hash function, the fCount of calls to strcmp() should be
+    roughly the same as the fCount of successful lookups.
+  - The hash table should NEVER get full, or stuff will loop forever!!
+*/
+
+// Initial table size (MUST be power of 2)
+#define DEF_HASH_SIZE      32
+// Maximum hash table load factor (%)
+#define MAX_LOAD           80
+// Minimum hash table load factor (%)
+#define MIN_LOAD           10
+// Probe Position [0..n-1]
+#define HASH1(x,n) (((unsigned int)(x)*13)%(n))
+// Probe Distance [1..n-1]
+#define HASH2(x,n) (1|(((unsigned int)(x)*17)%((n)-1)))
+
+#define FREEDATA (fFreeProc!=NULL)
+
+/*******************************************************************************/
+// Construct empty hash
+template <class OBJ> GHash<OBJ>::GHash(GFreeProc* freeProc) {
+  GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
+  fCurrentEntry=-1;
+  fFreeProc=freeProc;
+  lastkeyptr=NULL;
+  for (uint i=0; i<DEF_HASH_SIZE; i++)
+         hash[i].hash=-1; //this will be an indicator for 'empty' entries
+  fCapacity=DEF_HASH_SIZE;
+  fCount=0;
+  }
+
+template <class OBJ> GHash<OBJ>::GHash(bool doFree) {
+  GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
+  fCurrentEntry=-1;
+  lastkeyptr=NULL;
+  fFreeProc = (doFree)?&DefaultFreeProc : NULL;
+  for (uint i=0; i<DEF_HASH_SIZE; i++)
+         hash[i].hash=-1; //this will be an indicator for 'empty' entries
+  fCapacity=DEF_HASH_SIZE;
+  fCount=0;
+  }
+
+
+// Resize table
+template <class OBJ> void GHash<OBJ>::Resize(int m){
+  register int i,n,p,x,h;
+  GHashEntry *k;
+  GASSERT(fCount<=fCapacity);
+  if(m<DEF_HASH_SIZE) m=DEF_HASH_SIZE;
+  n=fCapacity;
+  while((n>>2)>m) n>>=1;            // Shrink until n/4 <= m
+  while((n>>1)<m) n<<=1;            // Grow until m <= n/2
+  GASSERT(m<=(n>>1));
+  GASSERT(DEF_HASH_SIZE<=n);
+  if(n!=fCapacity){
+    GASSERT(m<=n);
+    GMALLOC(k, sizeof(GHashEntry)*n);
+    for(i=0; i<n; i++) k[i].hash=-1;
+    for(i=0; i<fCapacity; i++){
+      h=hash[i].hash;
+      if(0<=h){
+        p=HASH1(h,n);
+        GASSERT(0<=p && p<n);
+        x=HASH2(h,n);
+        GASSERT(1<=x && x<n);
+        while(k[p].hash!=-1) p=(p+x)%n;
+        GASSERT(k[p].hash<0);
+        k[p]=hash[i];
+        }
+      }
+    GFREE(hash);
+    hash=k;
+    fCapacity=n;
+    }
+  }
+
+// add a new entry, or update it if it already exists
+template <class OBJ> const OBJ* GHash<OBJ>::Add(const char* ky,
+                      const OBJ* pdata,bool mrk){
+  register int p,i,x,h,n;
+  if(!ky) GError("GHash::insert: NULL key argument.\n");
+  GASSERT(fCount<fCapacity);
+  h=strhash(ky);
+  GASSERT(0<=h);
+  p=HASH1(h,fCapacity);
+  GASSERT(0<=p && p<fCapacity);
+  x=HASH2(h,fCapacity);
+  GASSERT(1<=x && x<fCapacity);
+  i=-1;
+  n=fCapacity;
+  while(n && hash[p].hash!=-1){
+    if ((i==-1)&&(hash[p].hash==-2)) i=p;
+    if (hash[p].hash==h && strcmp(hash[p].key,ky)==0) {
+      //replace hash data for this key!
+      lastkeyptr=hash[p].key;
+      hash[p].data = (void*) pdata;
+      return (OBJ*)hash[p].data;
+      }
+    p=(p+x)%fCapacity;
+    n--;
+    }
+  if(i==-1) i=p;
+  GTRACE(("GHash::insert: key=\"%s\"\n",ky));
+  //GMessage("GHash::insert: key=\"%s\"\n",ky);
+  GASSERT(0<=i && i<fCapacity);
+  GASSERT(hash[i].hash<0);
+  hash[i].hash=h;
+  hash[i].mark=mrk;
+  hash[i].key=Gstrdup(ky);
+  hash[i].keyalloc=true;
+  lastkeyptr=hash[i].key;
+  hash[i].data= (void*) pdata;
+  fCount++;
+  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+  GASSERT(fCount<fCapacity);
+  return pdata;
+  }
+
+template <class OBJ> const OBJ* GHash<OBJ>::shkAdd(const char* ky,
+                      const OBJ* pdata,bool mrk){
+  register int p,i,x,h,n;
+  if(!ky) GError("GHash::insert: NULL key argument.\n");
+  GASSERT(fCount<fCapacity);
+  h=strhash(ky);
+  GASSERT(0<=h);
+  p=HASH1(h,fCapacity);
+  GASSERT(0<=p && p<fCapacity);
+  x=HASH2(h,fCapacity);
+  GASSERT(1<=x && x<fCapacity);
+  i=-1;
+  n=fCapacity;
+  while(n && hash[p].hash!=-1){
+    if((i==-1)&&(hash[p].hash==-2)) i=p;
+    if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+      //replace hash data for this key!
+      lastkeyptr=hash[p].key;
+      hash[p].data = (void*) pdata;
+      return (OBJ*)hash[p].data;
+      }
+    p=(p+x)%fCapacity;
+    n--;
+    }
+  if(i==-1) i=p;
+  GTRACE(("GHash::insert: key=\"%s\"\n",ky));
+  //GMessage("GHash::insert: key=\"%s\"\n",ky);
+  GASSERT(0<=i && i<fCapacity);
+  GASSERT(hash[i].hash<0);
+  hash[i].hash=h;
+  hash[i].mark=mrk;
+  hash[i].key=(char *)ky;
+  lastkeyptr=hash[i].key;
+  hash[i].keyalloc=false;
+  hash[i].data= (void*) pdata;
+  fCount++;
+  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+  GASSERT(fCount<fCapacity);
+  return pdata;
+  }
+
+
+// Add or replace entry
+template <class OBJ>  OBJ* GHash<OBJ>::Replace(const char* ky,const OBJ* pdata, bool mrk){
+  register int p,i,x,h,n;
+  if(!ky){ GError("GHash::replace: NULL key argument.\n"); }
+  GASSERT(fCount<fCapacity);
+  h=strhash(ky);
+  GASSERT(0<=h);
+  p=HASH1(h,fCapacity);
+  GASSERT(0<=p && p<fCapacity);
+  x=HASH2(h,fCapacity);
+  GASSERT(1<=x && x<fCapacity);
+  i=-1;
+  n=fCapacity;
+  while(n && hash[p].hash!=-1){
+    if((i==-1)&&(hash[p].hash==-2)) i=p;
+    if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+      if(hash[p].mark<=mrk){
+        GTRACE(("GHash::replace: %08x: replacing: \"%s\"\n",this,ky));
+        if (FREEDATA) (*fFreeProc)(hash[p].data);
+        hash[p].mark=mrk;
+        hash[p].data=pdata;
+        }
+      return hash[p].data;
+      }
+    p=(p+x)%fCapacity;
+    n--;
+    }
+  if(i==-1) i=p;
+  GTRACE(("GHash::replace: %08x: inserting: \"%s\"\n",this,ky));
+  GASSERT(0<=i && i<fCapacity);
+  GASSERT(hash[i].hash<0);
+  hash[i].hash=h;
+  hash[i].mark=mrk;
+  hash[i].key=Gstrdup(ky);
+  hash[i].data=pdata;
+  fCount++;
+  if((100*fCount)>=(MAX_LOAD*fCapacity)) Resize(fCount);
+  GASSERT(fCount<fCapacity);
+  return pdata;
+  }
+
+
+// Remove entry
+template <class OBJ> OBJ* GHash<OBJ>::Remove(const char* ky){
+  register int p,x,h,n;
+  if(!ky){ GError("GHash::remove: NULL key argument.\n"); }
+  if(0<fCount){
+    h=strhash(ky);
+    GASSERT(0<=h);
+    p=HASH1(h,fCapacity);
+    GASSERT(0<=p && p<fCapacity);
+    x=HASH2(h,fCapacity);
+    GASSERT(1<=x && x<fCapacity);
+    GASSERT(fCount<fCapacity);
+    n=fCapacity;
+    while(n && hash[p].hash!=-1){
+      if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+        GTRACE(("GHash::remove: %08x removing: \"%s\"\n",this,ky));
+        hash[p].hash=-2;
+        hash[p].mark=false;
+        if (hash[p].keyalloc) GFREE((hash[p].key));
+        if (FREEDATA) (*fFreeProc)(hash[p].data);
+        hash[p].key=NULL;
+        hash[p].data=NULL;
+        fCount--;
+        if((100*fCount)<=(MIN_LOAD*fCapacity)) Resize(fCount);
+        GASSERT(fCount<fCapacity);
+        return NULL;
+        }
+      p=(p+x)%fCapacity;
+      n--;
+      }
+    }
+  return NULL;
+  }
+
+
+// Find entry
+template <class OBJ> bool GHash<OBJ>::hasKey(const char* ky) {
+  register int p,x,h,n;
+  if(!ky){ GError("GHash::find: NULL key argument.\n"); }
+  if(0<fCount){
+    h=strhash(ky);
+    GASSERT(0<=h);
+    p=HASH1(h,fCapacity);
+    GASSERT(0<=p && p<fCapacity);
+    x=HASH2(h,fCapacity);
+    GASSERT(1<=x && x<fCapacity);
+    GASSERT(fCount<fCapacity);
+    n=fCapacity;
+    while(n && hash[p].hash!=-1){
+      if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+        return true;
+        }
+      p=(p+x)%fCapacity;
+      n--;
+      }
+    }
+  return false;
+}
+
+template <class OBJ> OBJ* GHash<OBJ>::Find(const char* ky, char** keyptr){
+  register int p,x,h,n;
+  if(!ky){ GError("GHash::find: NULL key argument.\n"); }
+  if(0<fCount){
+    h=strhash(ky);
+    GASSERT(0<=h);
+    p=HASH1(h,fCapacity);
+    GASSERT(0<=p && p<fCapacity);
+    x=HASH2(h,fCapacity);
+    GASSERT(1<=x && x<fCapacity);
+    GASSERT(fCount<fCapacity);
+    n=fCapacity;
+    while(n && hash[p].hash!=-1){
+      if(hash[p].hash==h && strcmp(hash[p].key,ky)==0){
+        if (keyptr!=NULL) *keyptr = hash[p].key;
+        return (OBJ*)hash[p].data;
+        }
+      p=(p+x)%fCapacity;
+      n--;
+      }
+    }
+  return NULL;
+  }
+
+
+template <class OBJ> void GHash<OBJ>::startIterate() {// initialize a key iterator; call
+ fCurrentEntry=0;
+}
+
+template <class OBJ> char* GHash<OBJ>::NextKey() {
+ register int pos=fCurrentEntry;
+ while (pos<fCapacity && hash[pos].hash<0) pos++;
+ if (pos==fCapacity) {
+                 fCurrentEntry=fCapacity;
+                 return NULL;
+                 }
+              else {
+                 fCurrentEntry=pos+1;
+                 return hash[pos].key;
+                 }
+}
+
+template <class OBJ> OBJ* GHash<OBJ>::NextData() {
+ register int pos=fCurrentEntry;
+ while (pos<fCapacity && hash[pos].hash<0) pos++;
+ if (pos==fCapacity) {
+                 fCurrentEntry=fCapacity;
+                 return NULL;
+                 }
+              else {
+                 fCurrentEntry=pos+1;
+                 return (OBJ*)hash[pos].data;
+                 }
+
+}
+
+template <class OBJ> OBJ* GHash<OBJ>::NextData(char* &nextkey) {
+ register int pos=fCurrentEntry;
+ while (pos<fCapacity && hash[pos].hash<0) pos++;
+ if (pos==fCapacity) {
+                 fCurrentEntry=fCapacity;
+                 nextkey=NULL;
+                 return NULL;
+                 }
+              else {
+                 fCurrentEntry=pos+1;
+                 nextkey=hash[pos].key;
+                 return (OBJ*)hash[pos].data;
+                 }
+
+}
+
+
+// Get first non-empty entry
+template <class OBJ> int GHash<OBJ>::First() const {
+  register int pos=0;
+  while(pos<fCapacity){ if(0<=hash[pos].hash) break; pos++; }
+  GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
+  return pos;
+  }
+
+// Get last non-empty entry
+template <class OBJ> int GHash<OBJ>::Last() const {
+  register int pos=fCapacity-1;
+  while(0<=pos){ if(0<=hash[pos].hash) break; pos--; }
+  GASSERT(pos<0 || 0<=hash[pos].hash);
+  return pos;
+  }
+
+
+// Find next valid entry
+template <class OBJ> int GHash<OBJ>::Next(int pos) const {
+  GASSERT(0<=pos && pos<fCapacity);
+  while(++pos <= fCapacity-1){ if(0<=hash[pos].hash) break; }
+  GASSERT(fCapacity<=pos || 0<=hash[pos].hash);
+  return pos;
+  }
+
+
+// Find previous valid entry
+template <class OBJ> int GHash<OBJ>::Prev(int pos) const {
+  GASSERT(0<=pos && pos<fCapacity);
+  while(--pos >= 0){ if(0<=hash[pos].hash) break; }
+  GASSERT(pos<0 || 0<=hash[pos].hash);
+  return pos;
+  }
+
+
+// Remove all
+template <class OBJ> void GHash<OBJ>::Clear(){
+  register int i;
+  for(i=0; i<fCapacity; i++){
+    if(hash[i].hash>=0){
+      if (hash[i].keyalloc) GFREE((hash[i].key));
+      if (FREEDATA)
+            (*fFreeProc)(hash[i].data);
+      }
+    }
+  GFREE(hash);
+  GMALLOC(hash, sizeof(GHashEntry)*DEF_HASH_SIZE);
+  //reinitialize it
+  for (i=0; i<DEF_HASH_SIZE; i++)
+         hash[i].hash=-1; //this will be an indicator for 'empty' entries
+  fCapacity=DEF_HASH_SIZE;
+  fCount=0;
+  }
+
+
+// Save data
+/*
+void GHash::Save(Stream& store) const {
+  Object::save(store);
+  store << fCapacity;
+  store << fCount;
+  for(int i=0; i<fCapacity; i++){
+    store << hash[i].hash;
+    if(hash[i].hash>=0){
+      uint len=strlen(hash[i].key);
+      store << len;
+      store << hash[i].mark;
+      store.save(hash[i].key,len);
+      }
+    }
+  }
+
+
+// Load data
+void GHash::Load(Stream& store){
+  Object::load(store);
+  store >> fCapacity;
+  store >> fCount;
+  for(int i=0; i<fCapacity; i++){
+    store >> hash[i].hash;
+    if(hash[i].hash>=0){
+      uint len;
+      store >> len;
+      store >> hash[i].mark;
+      GMALLOC(hash[i].key,len+1);
+      store.load(hash[i].key,len);
+      hash[i].key[len]='\0';
+      }
+    }
+  }
+*/
+
+// Destroy table
+template <class OBJ> GHash<OBJ>::~GHash(){
+  register int i;
+  for(i=0; i<fCapacity; i++){
+    if(hash[i].hash>=0){
+      if (hash[i].keyalloc) GFREE((hash[i].key));
+      if (FREEDATA) (*fFreeProc)(hash[i].data);
+      }
+    }
+  GFREE(hash);
+  }
+
+#endif
diff --git a/include/GList.hh b/include/GList.hh
new file mode 100644
index 0000000..13e0729
--- /dev/null
+++ b/include/GList.hh
@@ -0,0 +1,638 @@
+//---------------------------------------------------------------------------
+/*
+Sortable collections of objects and object pointers
+*/
+#ifndef _GList_HH
+#define _GList_HH
+
+#include "GVec.hh"
+
+#define GLIST_SORTED_ERR "Operation not allowed on a sorted list!\n"
+#define GLIST_UNSORTED_ERR "Operation not allowed on an unsorted list!\n"
+
+//------ useful macros:
+#define BE_UNSORTED if (fCompareProc!=NULL) { GError(GLIST_SORTED_ERR); return; }
+#define BE_SORTED if (fCompareProc==NULL) { GError(GLIST_UNSORTED_ERR); return; }
+
+#define SORTED (fCompareProc!=NULL)
+#define UNSORTED (fCompareProc==NULL)
+
+// GArray is the sortable array type, requires the comparison operator < to be defined
+template <class OBJ> class GArray:public GVec<OBJ> {
+  protected:
+    bool fUnique;
+    static int DefaultCompareProc(const pointer item1, const pointer item2) {
+      //operator< MUST be defined for OBJ class!
+      if (*((OBJ*)item2) < *((OBJ*)item1)) return 1;
+        else if (*((OBJ*)item1) < *((OBJ*)item2)) return -1;
+                                             else return  0;
+      }
+    GCompareProc* fCompareProc;
+  public:
+    GArray(GCompareProc* cmpFunc=NULL);
+    GArray(bool sorted, bool unique=false);
+    GArray(int init_capacity, bool sorted, bool unique=false);
+    GArray(GArray<OBJ>& array); //copy constructor
+    const GArray<OBJ>& operator=(GArray<OBJ>& array);
+    //~GArray();
+    //assignment operator
+    void setSorted(GCompareProc* cmpFunc);
+    void setSorted(bool sorted) {
+     if (sorted) {
+         if (fCompareProc!=&DefaultCompareProc) {
+             fCompareProc=&DefaultCompareProc;
+             Sort();
+             }
+          }
+      else fCompareProc=NULL;
+      }
+    //sort the array if cmpFunc not NULL or changes
+    int Add(OBJ* item); // specific implementation if sorted
+    int Add(OBJ& item) { return Add(&item); } //both will CREATE a new OBJ and COPY to it
+                       // using OBJ new operator=
+    int cAdd(OBJ item) { return Add(&item); }
+    int cPush(OBJ item) { return Add(&item); }
+    int Push(OBJ& item) { return Add(&item); }
+
+    void Add(GArray<OBJ>& list); //add copies of all items from another list
+    //this will reject identical items in sorted lists only!
+    void setUnique(bool beUnique) { fUnique = beUnique; };
+    void Sort(); //explicit sort may be requested
+    bool Sorted() { return fCompareProc!=NULL; }
+    void Replace(int idx, OBJ& item); //Put, use operator= to copy
+    int  Unique() { return fUnique; }
+    int IndexOf(OBJ& item);
+         //this needs the == operator to have been defined for OBJ
+    bool Found(OBJ& item, int& idx); // for sorted arrays only;
+         //search by content; if found, returns true and idx will be the index
+         //of the first item found matching for which fCompareProc returns 0
+    bool Exists(OBJ& item); //same as above without existing index info
+    //unsorted only, place item at position idx:
+    void Move(int curidx, int newidx);
+    void Insert(int idx, OBJ* item);
+    void Insert(int idx, OBJ item) { Insert(idx,&item); }
+};
+
+//GList is a sortable collection of pointers to objects; requires operator< to be defined, or a custom compare function
+template <class OBJ> class GList:public GPVec<OBJ> {
+  protected:
+    bool fUnique;
+    GCompareProc* fCompareProc; //a pointer to a Compare function
+    
+    static int DefaultCompareProc(const pointer item1, const pointer item2) {
+      //operator< MUST be defined for OBJ class!
+      if (*((OBJ*)item2) < *((OBJ*)item1)) return 1;
+        else if (*((OBJ*)item1) < *((OBJ*)item2)) return -1;
+                                             else return  0;
+      }
+  public:
+    void sortInsert(int idx, OBJ* item);
+    GList(GCompareProc* compareProc=NULL); //free by default
+    GList(GCompareProc* compareProc, //unsorted by default
+        GFreeProc *freeProc,
+        bool beUnique=false);
+    GList(bool sorted, bool free_elements=true, bool beUnique=false);
+    GList(int init_capacity, bool sorted, bool free_elements=true, bool beUnique=false);
+    GList(GList<OBJ>& list); //copy constructor?
+    GList(GList<OBJ>* list); //kind of a copy constructor
+    const GList<OBJ>& operator=(GList<OBJ>& list);
+    //void Clear();
+    //~GList();
+    void setSorted(GCompareProc* compareProc);
+       //sorted if compareProc not NULL; sort the list if compareProc changes !
+    bool Sorted() { return fCompareProc!=NULL; }
+    void setSorted(bool sorted) {
+     if (sorted) {
+         if (fCompareProc!=&DefaultCompareProc) {
+             fCompareProc=&DefaultCompareProc;
+             Sort();
+             }
+          }
+      else fCompareProc=NULL;
+      }
+    int Add(OBJ* item); //-- specific implementation if sorted
+    void Add(GList<OBJ>& list); //add all pointers from another list
+
+    OBJ* AddIfNew(OBJ* item, bool deleteIfFound=true, int* fidx=NULL);
+    // default: delete item if Found() (and pointers are not equal)!
+    //returns the equal (==) object if it's in the list already
+    //or the item itself if it is unique and actually added
+
+    int AddedIfNew(OBJ* item);
+    // if Found(item) (and pointers are not equal) delete item and returns -1
+    // if added, returns the new item index
+
+
+    int Unique() { return fUnique; }
+    //this will reject identical items in sorted lists only!
+    void setUnique(bool beUnique) { fUnique = beUnique; };
+
+    GCompareProc* GetCompareProc() {return fCompareProc;}
+    int IndexOf(OBJ* item); //this has a specific implementation for sorted lists
+               //if list is sorted, item data is located by binary search
+               //based on the Compare function
+               //if not, a linear search is performed, but
+               //this needs the == operator to have been defined for OBJ
+    
+    void Put(int idx, OBJ* item, bool re_sort=false);
+    bool Found(OBJ* item, int & idx); // sorted only;
+               //search by content; if found, returns true and idx will be the index
+               //of the first item found matching for which GTCompareProc returns 0
+    bool Exists(OBJ* item); //same as above without existing index info
+    bool Exists(OBJ& item); //same as above without existing index info
+    void Sort(); //explicit sort may be requested using this function
+    int Remove(OBJ* item); //search for pointer, using binary search if sorted
+    void Insert(int idx, OBJ* item); //unsorted only, place item at position idx
+    void Move(int curidx, int newidx);
+}; //GList 
+
+
+
+//-------------------- TEMPLATE IMPLEMENTATION-------------------------------
+
+template <class OBJ> GArray<OBJ>::GArray(GArray<OBJ>& array):GVec<OBJ>(0) { //copy constructor
+ this->fCount=array.fCount;
+ this->fCapacity=array.fCapacity;
+ this->fArray=NULL;
+ if (this->fCapacity>0) {
+    //GMALLOC(this->fArray, this->fCapacity*sizeof(OBJ));
+    this->fArray=new OBJ[this->fCapacity];
+    }
+ this->fCount=array.fCount;
+ fUnique=array.fUnique;
+ fCompareProc=array.fCompareProc;
+ // uses OBJ operator=
+ for (int i=0;i<this->fCount;i++) this->fArray[i]=array[i];
+ }
+
+template <class OBJ> const GArray<OBJ>& GArray<OBJ>::operator=(GArray<OBJ>& array) {
+ if (&array==this) return *this;
+ GVec<OBJ>::Clear();
+ this->fCount=array.fCount;
+ this->fUnique=array.fUnique;
+ this->fCapacity=array.fCapacity;
+ if (this->fCapacity>0) {
+    //GMALLOC(this->fArray, this->fCapacity*sizeof(OBJ));
+    this->fArray=new OBJ[this->fCapacity];
+    }
+ this->fCompareProc=array.fCompareProc;
+ this->fCount=array.fCount;
+ // uses OBJ operator=
+ for (int i=0;i<this->fCount;i++) {
+   this->fArray[i]=array[i];
+   }
+ return *this;
+}
+
+template <class OBJ> GArray<OBJ>::GArray(GCompareProc* cmpFunc):GVec<OBJ>(0) {
+  fCompareProc = cmpFunc;
+  fUnique = false; //only affects sorted lists
+}
+
+template <class OBJ> GArray<OBJ>::GArray(bool sorted, bool unique):GVec<OBJ>(0) {
+  fUnique=unique;
+  fCompareProc = sorted ? DefaultCompareProc : NULL;
+}
+
+template <class OBJ> GArray<OBJ>::GArray(int init_capacity,
+                        bool sorted, bool unique):GVec<OBJ>(init_capacity) {
+  fUnique=unique;
+  fCompareProc=sorted ? DefaultCompareProc : NULL;
+}
+
+template <class OBJ> void GArray<OBJ>::setSorted(GCompareProc* cmpFunc) {
+  GCompareProc* old_proc=fCompareProc;
+  fCompareProc=cmpFunc;
+  if (fCompareProc!=old_proc && fCompareProc!=NULL)
+       Sort(); //new compare method
+}
+
+template <class OBJ> int GArray<OBJ>::IndexOf(OBJ& item) {
+ int result=0;
+ if (Found(item, result)) return result;
+                     else return -1;
+ }
+
+template <class OBJ> bool GArray<OBJ>::Exists(OBJ& item) {
+ int result=0;
+ if (Found(item, result)) return true;
+                     else return false;
+ }
+
+
+template <class OBJ> int GArray<OBJ>::Add(OBJ* item) {
+ if (item==NULL) return -1;
+ int result;
+ if (SORTED) {
+   if (Found(*item, result))
+      if (fUnique) return -1; //cannot add a duplicate!
+   //Found sets result to the position where the item should be!
+   GVec<OBJ>::Insert(result, *item);
+   }
+  else {
+   if (fUnique && Found(*item,result)) return -1; //set behaviour
+   result = this->fCount;
+   if (result==this->fCapacity) GVec<OBJ>::Grow();
+   this->fArray[result] = *item; //operator=, copies the item
+   this->fCount++;
+   }
+ return result;
+}
+
+
+template <class OBJ> void GArray<OBJ>::Add(GArray<OBJ>& list) {
+  if (list.Count()==0) return;
+  if (SORTED) {
+    for (int i=0;i<list.fCount;i++) Add(&list[i]);
+    }
+  else { //simply copy
+    this->setCapacity(this->fCapacity+list.fCount);
+    int s=this->fCount;
+    for (int i=0;i<list.fCount;i++)
+           this->fArray[s+i]=list.fArray[i];
+    this->fCount+=list.fCount;
+    }
+}
+
+template <class OBJ> bool GArray<OBJ>::Found(OBJ& item, int& idx) {
+ //search the list by using fCompareProc (if defined)
+ //or == operator for a non-sortable list
+ //for sorted lists, even when the result is false, the idx is
+ //set to the closest matching object!
+ int i;
+ idx=-1;
+ if (this->fCount==0) { idx=0;return false;}
+ if (SORTED) { //binary search based on fCompareProc
+   //do the simplest tests first:
+   if ((*fCompareProc)(&(this->fArray[0]),&item)>0) {
+                       idx=0;
+                       return false;
+                       }
+   if ((*fCompareProc)(&item, &(this->fArray[this->fCount-1]))>0) {
+                       idx=this->fCount;
+                       return false;
+                       }
+
+   int l=0;
+   int h = this->fCount - 1;
+   int c;
+   while (l <= h) {
+       i = (l + h) >> 1;
+       c = (*fCompareProc)(&(this->fArray[i]), &item);
+       if (c < 0)  l = i + 1;
+         else {
+            h = i - 1;
+            if (c == 0) { //found!
+                 idx=i;
+                 return true;
+                }
+            }
+       } //while
+   idx = l;
+   return false;
+   }
+ else {//not sorted: use linear search
+   // needs == operator to compare user defined objects !
+   i=0;
+   while (i<this->fCount) {
+      if (this->fArray[i]==item) { //requires operator==
+         idx=i;
+         return true;
+         }
+      i++;
+      }
+   return false;
+   }
+}
+
+template <class OBJ> void GArray<OBJ>::Insert(int idx, OBJ* item) {
+ //idx can be [0..fCount] so an item can be actually added
+ BE_UNSORTED; //forbid this operation on sorted data
+ GVec<OBJ>::Insert(idx, item);
+}
+
+
+template <class OBJ> void GArray<OBJ>::Move(int curidx, int newidx) {
+ BE_UNSORTED; //cannot do this in a sorted list!
+ if (curidx!=newidx || newidx>=this->fCount)
+     GError(GVEC_INDEX_ERR, newidx);
+
+ OBJ tmp=this->fArray[curidx]; //copy constructor here
+ this->fArray[curidx]=this->fArray[newidx];
+ this->fArray[newidx]=tmp;
+}
+
+template <class OBJ> void GArray<OBJ>::Replace(int idx, OBJ& item) {
+ //TEST_INDEX(idx);
+ if (idx<0 || idx>=this->fCount) GError(GVEC_INDEX_ERR, __FILE__,__LINE__, idx);
+ this->fArray[idx]=item;
+ if ( SORTED ) Sort(); //re-sort ! this could be very expensive, don't do it
+}
+
+template <class OBJ> void GArray<OBJ>::Sort() {
+ if (fCompareProc==NULL) { fCompareProc=DefaultCompareProc; }
+ if (this->fArray!=NULL && this->fCount>0)
+     this->qSort(0, this->fCount-1, fCompareProc);
+}
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//*=> GList implementation -- sortable array of pointers to OBJ
+
+template <class OBJ> GList<OBJ>::GList(GList<OBJ>& list):GPVec<OBJ>(list) { //copy constructor
+ fUnique=list.fUnique;
+ fCompareProc=list.fCompareProc;
+}
+
+template <class OBJ> GList<OBJ>::GList(GList<OBJ>* plist):GPVec<OBJ>(0) { //another copy constructor
+ this->fCapacity=plist->fCapacity;
+ this->fList=NULL;
+ if (this->fCapacity>0) {
+     GMALLOC(this->fList, this->fCapacity*sizeof(OBJ*));
+     }
+ fUnique=plist->fUnique;
+ fCompareProc=plist->fCompareProc;
+ this->fFreeProc=plist->fFreeProc;
+ this->fCount=plist->fCount;
+ memcpy(this->fList, plist->fList, this->fCount*sizeof(OBJ*));
+ //for (int i=0;i<list->fCount;i++) Add(plist->Get(i));
+}
+
+template <class OBJ> void GList<OBJ>::Add(GList<OBJ>& list) {
+  if (list.Count()==0) return;
+  if (SORTED) {
+    for (int i=0;i<list.Count();i++) Add(list[i]);
+    }
+  else { //simply copy
+    this->setCapacity(this->fCapacity+list.fCount);
+    memcpy( & (this->fList[this->fCount]), list.fList, list.fCount*sizeof(OBJ*));
+    this->fCount+=list.fCount;
+    }
+}
+
+
+template <class OBJ> GList<OBJ>::GList(GCompareProc* compareProc,
+       GFreeProc* freeProc, bool beUnique) {
+  fCompareProc = compareProc;
+  this->fFreeProc    = freeProc;
+  fUnique = beUnique; //only affects sorted lists
+}
+
+template <class OBJ> GList<OBJ>::GList(GCompareProc* compareProc) {
+  fCompareProc = compareProc;
+  this->fFreeProc = GPVec<OBJ>::DefaultFreeProc;
+  fUnique = false; //only affects sorted lists
+}
+
+template <class OBJ> GList<OBJ>::GList(bool sorted,
+    bool free_elements, bool beUnique) {
+  if (sorted) {
+     if (free_elements) {
+        fCompareProc=&DefaultCompareProc;
+        this->fFreeProc = GPVec<OBJ>::DefaultFreeProc;
+        fUnique=beUnique;
+        }
+       else {
+        fCompareProc=&DefaultCompareProc;
+        this->fFreeProc=NULL;
+        fUnique=beUnique;
+        }
+     }
+   else {
+     if (free_elements) {
+        fCompareProc=NULL;
+        this->fFreeProc=GPVec<OBJ>::DefaultFreeProc;
+        fUnique=beUnique;
+        }
+      else {
+        fCompareProc=NULL;
+        this->fFreeProc=NULL;
+        fUnique=beUnique;
+        }
+     }
+}
+
+
+template <class OBJ> GList<OBJ>::GList(int init_capacity, bool sorted,
+    bool free_elements, bool beUnique):GPVec<OBJ>(init_capacity, free_elements) {
+  if (sorted) {
+      fCompareProc=&DefaultCompareProc;
+      fUnique=beUnique;
+      }
+   else {
+      fCompareProc=NULL;
+      fUnique=beUnique;
+      }
+}
+
+template <class OBJ> const GList<OBJ>& GList<OBJ>::operator=(GList& list) {
+ if (&list!=this) {
+     GPVec<OBJ>::Clear();
+     fCompareProc=list.fCompareProc;
+     this->fFreeProc=list.fFreeProc;
+     //Attention: the object pointers are copied directly,
+     //but the actual objects are NOT duplicated
+     for (int i=0;i<list.Count();i++) Add(list[i]);
+     }
+ return *this;
+}
+
+template <class OBJ> void GList<OBJ>::setSorted(GCompareProc* compareProc) {
+ GCompareProc* old_proc=fCompareProc;
+ fCompareProc=compareProc;
+ if (fCompareProc!=old_proc && fCompareProc!=NULL)
+       Sort(); //new compare method
+}
+
+template <class OBJ> int GList<OBJ>::IndexOf(OBJ* item) {
+ int result=0;
+ if (Found(item, result)) return result;
+                     else return -1;
+ }
+
+template <class OBJ> bool GList<OBJ>::Exists(OBJ& item) {
+ int result=0;
+ if (Found(&item, result)) return true;
+                      else return false;
+ }
+
+template <class OBJ> bool GList<OBJ>::Exists(OBJ* item) {
+ int result=0;
+ if (Found(item, result)) return true;
+                      else return false;
+ }
+
+template <class OBJ> int GList<OBJ>::Add(OBJ* item) {
+ int result;
+ if (item==NULL) return -1;
+ if (SORTED) {
+   if (Found(item, result))
+      if (fUnique) return -1; //duplicates forbidden
+   //Found sets result to the position where the item should be!
+   sortInsert(result, item);
+   }
+  else {
+   if (fUnique && Found(item,result)) return -1; //set behaviour
+   result = this->fCount;
+   if (result==this->fCapacity) GPVec<OBJ>::Grow();
+   this->fList[result]=item;
+   this->fCount++;
+   }
+ return result;
+}
+
+//by default, it deletes the item if it has an equal in the list!
+//returns the existing equal (==) object if it's in the list already
+//or returns the item itself if it's unique (and adds it)
+template <class OBJ> OBJ* GList<OBJ>::AddIfNew(OBJ* item,
+                                     bool deleteIfFound, int* fidx) {
+ int r;
+ if (Found(item, r)) {
+    if (deleteIfFound && (pointer)item != (pointer)(this->fList[r])) {
+       this->deallocate_item(item);
+       }
+    if (fidx!=NULL) *fidx=r;
+    return this->fList[r]; //found
+    }
+ //not found:
+ if (SORTED) {
+   //Found() set result to the position where the item should be inserted:
+   sortInsert(r, item);
+   }
+  else {
+   r = this->fCount;
+   if (r==this->fCapacity) GPVec<OBJ>::Grow();
+   this->fList[r]=item;
+   this->fCount++;
+   }
+ if (fidx!=NULL) *fidx=r;
+ return item;
+}
+
+//if item is found already in the list DELETE it and return -1
+//otherwise the item is added and its index is returned
+template <class OBJ> int GList<OBJ>::AddedIfNew(OBJ* item) {
+ int r;
+ if (Found(item, r)) {
+    if ((pointer)item != (pointer)(this->fList[r])) {
+        this->deallocate_item(item);
+        }
+    return -1;
+    }
+ //not found:
+ if (SORTED) {
+   //Found() set r to the position where the item should be inserted:
+   sortInsert(r, item);
+   }
+  else {
+   r = this->fCount;
+   if (r==this->fCapacity) GPVec<OBJ>::Grow();
+   this->fList[r]=item;
+   this->fCount++;
+   }
+ return r;
+}
+
+
+template <class OBJ> bool GList<OBJ>::Found(OBJ* item, int& idx) {
+ //search the list by using fCompareProc (if defined)
+ //or == operator for a non-sortable list
+ //for sorted lists, even when the result is false, the idx is
+ //set to the closest matching object!
+ int i;
+ idx=-1;
+ if (this->fCount==0) { idx=0;return false;}
+ if (SORTED) { //binary search based on fCompareProc
+   //do the simple test first:
+
+   if ((*fCompareProc)(this->fList[0],item)>0) {
+                       idx=0;
+                       return false;
+                       }
+   if ((*fCompareProc)(item, this->fList[this->fCount-1])>0) {
+                       idx=this->fCount;
+                       return false;
+                       }
+
+   int l, h, c;
+   l = 0;
+   h = this->fCount - 1;
+   while (l <= h) {
+       i = (l + h) >> 1;
+       c = (*fCompareProc)(this->fList[i], item);
+       if (c < 0)  l = i + 1;
+         else {
+            h = i - 1;
+            if (c == 0) {
+                 idx=i;
+                 return true;
+                }
+            }
+       } //while
+   idx = l;
+   return false;
+   }
+ else {//not sorted: use linear search
+   // needs == operator to compare user defined objects !
+   i=0;
+   while (i<this->fCount) {
+      if (*this->fList[i]==*item) {
+         idx=i;
+         return true;
+         }
+      i++;
+      }
+   return false;
+   }
+}
+
+template <class OBJ> void GList<OBJ>::sortInsert(int idx, OBJ* item) {
+ //idx must be the new position this new item must have
+ //so the allowed range is [0..fCount]
+ //the old idx item all the above will be shifted to idx+1
+ if (idx<0 || idx>this->fCount) GError(GVEC_INDEX_ERR, idx);
+ if (this->fCount==this->fCapacity) {
+    GPVec<OBJ>::Grow(idx, item);
+    //expand and also copy/move data and insert the new item
+    return;
+    }
+ //room still left, just move data around and insert the new one
+ if (idx<this->fCount) //copy/move pointers only!
+      memmove(&(this->fList[idx+1]), &(this->fList[idx]), (this->fCount-idx)*sizeof(OBJ*));
+ this->fList[idx]=item;
+ this->fCount++;
+}
+
+template <class OBJ> void GList<OBJ>::Insert(int idx, OBJ* item) {
+ //idx can be [0..fCount] so an item can be actually added
+ BE_UNSORTED; //cannot do that with a sorted list!
+ GPVec<OBJ>::Insert(idx,item);
+}
+
+template <class OBJ> void GList<OBJ>::Move(int curidx, int newidx) {
+ BE_UNSORTED; //cannot do this in a sorted list!
+ GPVec<OBJ>::Move(curidx,newidx);
+}
+
+template <class OBJ> void GList<OBJ>::Put(int idx, OBJ* item, bool re_sort) {
+ //WARNING: this will never free the replaced item!
+ // this may BREAK the sort order unless the "re_sort" parameter is given
+ if (idx<0 || idx>this->fCount) GError(GVEC_INDEX_ERR, idx);
+ this->fList[idx]=item;
+ if (SORTED && item!=NULL && re_sort) Sort(); //re-sort
+}
+
+template <class OBJ> int GList<OBJ>::Remove(OBJ* item) {
+//removes an item if it's in our list
+ int result=IndexOf(item);
+ if (result>=0) GPVec<OBJ>::Delete(result);
+ return result;
+}
+
+template <class OBJ> void GList<OBJ>::Sort() {
+ if (fCompareProc==NULL) fCompareProc = DefaultCompareProc;
+ if (this->fList!=NULL && this->fCount>0)
+     this->qSort(0, this->fCount-1, fCompareProc);
+}
+
+//---------------------------------------------------------------------------
+#endif
diff --git a/include/GStr.h b/include/GStr.h
new file mode 100644
index 0000000..e2a89e7
--- /dev/null
+++ b/include/GStr.h
@@ -0,0 +1,213 @@
+//---------------------------------------------------------------------------
+#ifndef GSTR_H
+#define GSTR_H
+//---------------------------------------------------------------------------
+#include "GBase.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// This class uses reference counting and copy-on-write semantics
+
+// All indexes are zero-based.  For all functions that accept an index, a
+// negative index specifies an index from the right of the string.  Also,
+// for all functions that accept a length, a length of -1 specifies the rest
+// of the string.
+enum enTokenizeMode {
+ tkFullString,
+ tkCharSet
+ };
+
+class GStr {
+        friend GStr operator+(const char* s1, const GStr& s2);
+        friend bool operator==(const char* s1, const GStr& s2);
+        friend bool operator<(const char* s1, const GStr& s2);
+        friend bool operator<=(const char* s1, const GStr& s2);
+        friend bool operator>(const char* s1, const GStr& s2);
+        friend bool operator>=(const char* s1, const GStr& s2);
+        friend bool operator!=(const char* s1, const GStr& s2);
+        friend void Gswap(GStr& s1, GStr& s2);
+    public:
+        GStr();
+        GStr(const GStr& s);
+        GStr(const char* s);
+        GStr(const int i);
+        GStr(const double f);
+        GStr(char c, int n = 1);
+        ~GStr();
+        operator const char* () const { return my_data->chars;} //inline here
+        char& operator[](int index);
+        char operator[](int index) const;
+        GStr& operator=(const GStr& s);
+        GStr& operator=(const char* s);
+        GStr& operator=(const int i);
+        GStr& operator=(const double f);
+        GStr operator+(const GStr& s) const;
+        GStr operator+(const char* s) const;
+        GStr operator+(const char c) const;
+        GStr operator+(const int i) const;
+        GStr operator+(const double f) const;
+        bool operator==(const GStr& s) const;
+        bool operator==(const char* s) const;
+        bool operator<(const GStr& s) const;
+        bool operator<(const char* s) const;
+        bool operator<=(const GStr& s) const;
+        bool operator<=(const char* s) const;
+        bool operator>(const GStr& s) const;
+        bool operator>(const char* s) const;
+        bool operator>=(const GStr& s) const;
+        bool operator>=(const char* s) const;
+        bool operator!=(const GStr& s) const;
+        bool operator!=(const char* s) const;
+        GStr& operator+=(const GStr& s);
+        GStr& operator+=(const char* s);
+        GStr& operator+=(const char c);
+        GStr& operator+=(const int i);
+        GStr& operator+=(const double f);
+      //interface:
+      public:
+        int length() const;
+        bool is_empty() const;
+        bool is_space() const;
+        GStr substr(int index = 0, int len = -1) const;
+        GStr to(char c); //return the first part up to first occurence of c
+                           //or whole string if c not found
+        GStr from(char c); //same as to, but starting from the right side
+        GStr copy() const;
+        GStr& format(const char *fmt,...);
+        GStr& reverse();
+        GStr& appendfmt(const char *fmt,...);
+        GStr& cut(int index = 0, int len = -1); //delete a specified length
+        GStr& remove(int from, int to) {
+            return cut(from, to-from+1);
+            }
+
+        //paste a string at the specified position
+        GStr& paste(const GStr& s, int index = 0, int len=-1);
+        GStr& paste(const char* s, int index = 0, int len = -1);
+        GStr& replace(const char* from, const char* to=NULL);
+        GStr& insert(const GStr& s, int index = 0);
+        GStr& insert(const char* s, int index = 0);
+        GStr& append(const char* s);
+        GStr& append(const GStr& s);
+        GStr& upper();
+        GStr& lower();
+        GStr& clear();//make empty
+        //character translation or removal:
+        GStr& tr(const char* from, const char* to=NULL);
+        //number of occurences of a char in the string:
+        int count(char c);
+        void startTokenize(const char* delimiter=" \t\n", enTokenizeMode tokenizemode=tkCharSet);
+        bool nextToken(GStr& token);
+        int asInt(int base=10);
+        double asReal();
+        double asDouble() { return asReal(); }
+        bool asReal(double& r);
+        bool asDouble(double& r) { return asReal(r); }
+        bool asInt(int& r, int base=10);
+        int index(const GStr& s, int start_index = 0) const;
+        int index(const char* s, int start_index = 0) const;
+        int index(char c, int start_index = 0) const;
+        int rindex(char c, int end_index = -1) const;
+        int rindex(const char* str, int end_index = -1) const;
+        bool contains(const GStr& s) const;
+        bool contains(const char* s) const;
+        bool contains(char c) const;
+        bool startsWith(const char* s) const;
+        bool startsWith(const GStr& s) const;
+        bool endsWith(const char* s) const;
+        bool endsWith(const GStr& s) const;
+        GStr split(const char* delim);
+        GStr split(char c);
+           /* splits "this" in two parts, at the first (leftmost)
+                 encounter of delim:
+                 1st would stay in "this"
+                 (which this way is truncated)
+                 2nd will go to the returned string
+           */
+        GStr splitr(const char* delim);
+        GStr splitr(char c);
+           /* splits "this" in two parts, at the last (rightmost)
+                 encounter of delim:
+                 1st would stay in "this"
+                 2nd will be returned
+           */
+
+        int peelInt() const; //extract an integer, (left to right), from a
+                //mixed alphanumeric string, e.g. 'T24HC1234b'=> 2
+        int peelIntR() const; //same as above, but starts from the right side
+        //e.g. 'T2HC1234b'=> 1234
+        GStr& trim(char c);
+        GStr& trim(const char* c=" \t\n\r"); //trim both ends of characters in given set
+        GStr& trimR(const char* c=" \t\n\r"); //trim only right end
+        GStr& trimR(char c=' ');
+        GStr& chomp(char c='\n') { return trimR(c); }
+        GStr& chomp(const char* cstr); //like trimR, but given string is taken as a whole
+        GStr& trimL(const char* c=" \t\n\r"); //trim only left end
+        GStr& trimL(char c=' ');
+        GStr& padR(int len, char c=' '); //align it in len spaces to the right
+        GStr& padL(int len, char c=' '); //align it in len spaces to the left
+        GStr& padC(int len, char c=' '); //center it
+        size_t read(FILE* stream, const char* delimiter="\n", size_t bufsize=4096);
+          //read next token from stream, using the given string as
+          //a marker where the block should stop
+        const char* chars() const;
+        const char* text() const;
+    protected:
+        char* fTokenDelimiter;
+        int fLastTokenStart;
+        enTokenizeMode fTokenizeMode;
+        void* readbuf; //file read buffer for the read() function
+        size_t readbufsize; //last setting for the readbuf
+        static void invalid_args_error(const char* fname);
+        static void invalid_index_error(const char* fname);
+        struct Data {//structure holding actual
+                     //string data and reference count information
+               Data() { ref_count=0; length=0; chars[0] = '\0'; }
+               unsigned int ref_count;
+               int length;
+               char chars[1];
+              };
+        static Data* new_data(int length); //alloc a specified length string's Data
+        static Data* new_data(const char* str); //alloc a copy of a specified string
+        void replace_data(int length);
+        void replace_data(Data* data);
+        void make_unique();
+        char* chrs(); // this is dangerous, length should not be affected
+        static Data null_data; //a null (empty) string Data is available here
+        Data* my_data; //pointer to a Data object holding actual string data
+};
+
+/***************************************************************************/
+
+inline int GStr::length() const {
+ return my_data->length;
+ }
+
+
+inline const char *GStr::chars() const {
+ return my_data->chars;
+ }
+
+inline char *GStr::chrs() { //protected version, allows modification of the chars
+ return my_data->chars;
+ }
+
+inline const char *GStr::text() const {
+ return my_data->chars;
+ }
+
+inline bool operator>=(const char *s1, const GStr& s2) {
+ return (strcmp(s1, s2.chars()) >= 0);
+ }
+
+inline bool operator!=(const char *s1, const GStr& s2) {
+ return (strcmp(s1, s2.chars()) != 0);
+ }
+
+inline void Gswap(GStr& s1, GStr& s2) {
+ GStr::Data *tmp = s1.my_data; s1.my_data = s2.my_data;
+ s2.my_data = tmp;
+ }
+
+#endif
diff --git a/include/GVec.hh b/include/GVec.hh
new file mode 100644
index 0000000..25b095c
--- /dev/null
+++ b/include/GVec.hh
@@ -0,0 +1,907 @@
+//---------------------------------------------------------------------------
+/*
+Sortable collection of pointers to objects
+*/
+
+#ifndef _GVec_HH
+#define _GVec_HH
+
+#include "GBase.h"
+
+#define GVEC_INDEX_ERR "GVec error: invalid index: %d\n"
+ #if defined(NDEBUG) || defined(NODEBUG) || defined(_NDEBUG) || defined(NO_DEBUG)
+ #define TEST_INDEX(x) 
+#else 
+ #define TEST_INDEX(x) \
+ if (x<0 || x>=fCount) GError(GVEC_INDEX_ERR, x)
+#endif
+
+#define GVEC_CAPACITY_ERR "GVec error: invalid capacity: %d\n"
+#define GVEC_COUNT_ERR "GVec error: invalid count: %d\n"
+
+#define MAXLISTSIZE INT_MAX-1
+
+#define FREEDATA (fFreeProc!=NULL)
+
+template<class T> struct IsPrimitiveType {
+    enum { VAL = 0 };
+};
+
+template<> struct IsPrimitiveType<bool> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<void*> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<float> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<double> { enum { VAL = 1 }; };
+
+template<> struct IsPrimitiveType<int> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<unsigned int> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<char> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<unsigned char> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<short> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<unsigned short> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<long> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<unsigned long> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<long long> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<unsigned long long> { enum { VAL = 1 }; };
+
+/*
+template<> struct IsPrimitiveType<int64_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<uint64_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<int32_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<uint32_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<int16_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<uint16_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<int8_t> { enum { VAL = 1 }; };
+template<> struct IsPrimitiveType<uint8_t> { enum { VAL = 1 }; };
+*/
+
+
+template <class OBJ> int DefLTCompareProc(const pointer p1, const pointer p2) {
+ const OBJ& o1 = *((OBJ*) p1);
+ const OBJ& o2 = *((OBJ*) p2);
+ if (o1 < o2) return -1;
+   else return ((o2 < o1) ? 1 : 0 );
+}
+
+//basic template for array of objects;
+//so it doesn't require comparison operators to be defined
+template <class OBJ> class GVec {
+  protected:
+    OBJ* fArray;
+    int fCount;
+    int fCapacity;
+    void qSort(int L, int R, GCompareProc* cmpFunc);
+  public:
+    GVec(int init_capacity=2);
+    GVec(int init_count, const OBJ init_val);
+    GVec(GVec<OBJ>& array); //copy constructor
+    const GVec<OBJ>& operator=(GVec<OBJ>& array); //copy operator
+    virtual ~GVec();
+    void Insert(int idx, OBJ item) { Insert(idx, &item); }
+    void Insert(int idx, OBJ* item);
+    void idxInsert(int idx, OBJ& item) { Insert(idx, &item); }
+    void Grow();
+    void Grow(int idx, OBJ& item); //grow and add/insert item copy
+    void Reverse(); //WARNING: will break the sort order if SORTED!
+    int Add(OBJ* item); // simply append to the end of fArray, reallocating as needed
+    int Add(OBJ& item) { return Add(&item); } 
+    int cAdd(OBJ item) { return Add(&item); } //all these will CREATE a new OBJ and COPY to it
+    //                   // using OBJ copy operator=
+    // -- stack/queue usage:
+    //int Push(OBJ& item) { return Add(&item); }
+    int Push(OBJ& item) { return Add(&item); }
+    int cPush(OBJ item) { return Add(&item); }
+    OBJ Pop();// Stack use; removes and returns a copy of the last item
+    OBJ Shift(); //Queue use: removes and returns a copy of the first item
+
+    void Add(GVec<OBJ>& list); //append copies of all items from another list
+
+    OBJ& Get(int idx) {
+          TEST_INDEX(idx);
+          return fArray[idx];
+          }
+    inline OBJ& operator[](int i) {
+          TEST_INDEX(i);
+          return fArray[i];
+          }
+    OBJ& Last() {
+         TEST_INDEX(fCount-1);
+         return fArray[fCount-1];
+         }
+    OBJ& First() {
+         TEST_INDEX(0);
+         return fArray[0];
+         }
+    void Clear();
+    void Delete(int index);
+    void Replace(int idx, OBJ& item); //Put, use operator= to copy
+    void Exchange(int idx1, int idx2);
+    void Swap(int idx1, int idx2)  { Exchange(idx1, idx2); } 
+    int  Capacity() { return fCapacity; }
+    //this will reject identical items in sorted lists only!
+    void setCapacity(int NewCapacity);
+    int  Count() { return fCount; }
+
+    void setCount(int NewCount);         // will trim or expand the array as needed
+    void setCount(int NewCount, OBJ* v); //same as setCount() but new objects are set to v
+    void setCount(int NewCount, OBJ v);
+    void Resize(int NewCount) { setCount(NewCount); }
+    //void Resize(int NewCount, OBJ* v) { setCount(NewCount, v); }
+    void Resize(int NewCount, OBJ v) { setCount(NewCount, &v); }
+
+    //void Move(int curidx, int newidx);
+    bool isEmpty() { return fCount==0; }
+    bool notEmpty() { return fCount>0; }
+
+    void Sort(GCompareProc* cmpFunc);
+    void Sort();
+};
+
+//---- template for dynamic array of object pointers
+//---- it's faster than GVec<OBJ*> and has item deallocation awareness
+template <class OBJ> class GPVec {
+  protected:
+    OBJ** fList; //pointer to an array of pointers to objects
+    int fCount; //total number of entries in list
+    int fCapacity; //current allocated size
+    GFreeProc* fFreeProc; //useful for deleting objects
+    //---
+    void Expand();
+    void Grow();
+    void Grow(int idx, OBJ* newitem);
+    void qSort(int L, int R, GCompareProc* cmpFunc);
+  public:  
+    static void DefaultFreeProc(pointer item) {
+      delete (OBJ*)item;
+      }
+    virtual ~GPVec();
+    GPVec(int init_capacity=2, bool free_elements=true); //also the default constructor
+    GPVec(bool free_elements);
+    GPVec(GPVec<OBJ>& list); //copy constructor?
+    GPVec(GPVec<OBJ>* list); //kind of a copy constructor
+    const GPVec<OBJ>& operator=(GPVec<OBJ>& list);
+    OBJ* Get(int i);
+    OBJ* operator[](int i) { return this->Get(i); }
+    void Reverse(); //reverse pointer array; WARNING: will break the sort order if sorted!
+    void freeItem(int idx); //calls fFreeProc (or DefaultFreeProc) on fList[idx] and sets NULL there, doesn't pack!
+                      //it will free even if fFreeProc is NULL!
+    void setFreeItem(GFreeProc *freeProc) { fFreeProc=freeProc; }
+    void setFreeItem(bool doFree) {
+       if (doFree) fFreeProc=DefaultFreeProc;
+             else  fFreeProc=NULL;
+       }
+    // -- stack usage:
+    int Push(OBJ* item) { return Add(item); }
+    OBJ* Pop();// Stack use; removes and returns last item,but does NOT FREE it
+    OBJ* Shift(); //Queue use: removes and returns first item, but does NOT FREE it
+    void deallocate_item(OBJ*& item); //forcefully call fFreeProc or delete on item
+    void Clear();
+    void Exchange(int idx1, int idx2);
+    void Swap(int idx1, int idx2)  { Exchange(idx1, idx2); }
+    OBJ* First() { return (fCount>0)?fList[0]:NULL; }
+    OBJ* Last()  { return (fCount>0)?fList[fCount-1]:NULL;}
+    bool isEmpty() { return fCount==0; }
+    bool notEmpty() { return fCount>0; }
+    int Capacity() { return fCapacity; }
+    int Count()   { return fCount; }
+    void setCapacity(int NewCapacity);
+    void setCount(int NewCount); //the same as setCapacity() but the new item range is filled with NULLs
+    int Add(OBJ* item); //simply append the pointer copy
+    void Add(GPVec<OBJ>& list); //add all pointers from another list
+    void Insert(int idx, OBJ* item);
+    void Move(int curidx, int newidx);
+    void Put(int idx, OBJ* item);
+    void Pack();
+    void Delete(int index); //also frees the item if fFreeProc!=NULL, and shifts the successor items
+    void Forget(int idx); //simply places a NULL at fList[idx], nothing else
+    int RemovePtr(pointer item); //always use linear search to find the pointer! calls Delete() if found
+    int IndexOf(pointer item); //a linear search for pointer address!
+    void Sort(GCompareProc* cmpFunc);
+    void Sort();
+ };
+
+//-------------------- TEMPLATE IMPLEMENTATION-------------------------------
+
+template <class OBJ> GVec<OBJ>::GVec(int init_capacity) {
+  fCount=0;
+  fCapacity=0;
+  fArray=NULL;
+  setCapacity(init_capacity);
+}
+
+
+template <class OBJ> GVec<OBJ>::GVec(int init_count, const OBJ init_val) {
+  fCount=0;
+  fCapacity=0;
+  fArray=NULL;
+  setCapacity(init_count);
+  fCount = init_count;
+  for (int i=0;i<fCount;i++)
+    fArray[i]=init_val;
+}
+
+
+template <class OBJ> GVec<OBJ>::GVec(GVec<OBJ>& array) { //copy constructor
+ this->fCount=array.fCount;
+ this->fCapacity=array.fCapacity;
+ this->fArray=NULL;
+ if (this->fCapacity>0) {
+   if (IsPrimitiveType<OBJ>::VAL) {
+     GMALLOC(fArray, fCapacity*sizeof(OBJ));
+     memcpy(fArray, array.fArray, fCount*sizeof(OBJ));
+   }
+   else {
+     fArray=new OBJ[this->fCapacity]; //]()
+     // uses OBJ operator=
+     for (int i=0;i<this->fCount;i++) fArray[i]=array[i];
+   }
+ }
+ this->fCount=array.fCount;
+ }
+
+template <class OBJ> const GVec<OBJ>& GVec<OBJ>::operator=(GVec<OBJ>& array) {
+ if (&array==this) return *this;
+ Clear();
+ fCapacity=array.fCapacity;
+ fCount=array.fCount;
+ if (fCapacity>0) {
+   if (IsPrimitiveType<OBJ>::VAL) {
+     GMALLOC(fArray, fCapacity*sizeof(OBJ));
+     memcpy(fArray, array.fArray, fCount*sizeof(OBJ));
+     }
+   else {
+    fArray=new OBJ[this->fCapacity]; // ]()
+    // uses OBJ operator=
+    for (int i=0;i<fCount;i++) {
+      fArray[i]=array.fArray[i];
+    }
+   }
+ }
+ return *this;
+}
+
+template <class OBJ> GVec<OBJ>::~GVec() {
+ this->Clear();
+}
+
+
+template <class OBJ> void GVec<OBJ>::setCapacity(int NewCapacity) {
+  if (NewCapacity < fCount || NewCapacity > MAXLISTSIZE)
+    GError(GVEC_CAPACITY_ERR, NewCapacity);
+    //error: NewCapacity MUST be > fCount
+   //if you want to shrink it use Resize() or setCount()
+  if (NewCapacity!=fCapacity) {
+   if (NewCapacity==0) {
+      if (IsPrimitiveType<OBJ>::VAL) {
+       GFREE(fArray);
+      } else {
+       delete[] fArray;
+       fArray=NULL;
+      }
+   }
+   else {
+      if (IsPrimitiveType<OBJ>::VAL) {
+        GREALLOC(fArray, NewCapacity*sizeof(OBJ));
+      } else {
+        OBJ* oldArray=fArray;
+		//fArray=new OBJ[NewCapacity]();
+		fArray=new OBJ[NewCapacity];
+        for (int i=0;i<this->fCount;i++) {
+          fArray[i] = oldArray[i]; 
+          }// we need operator= here
+        //wouldn't be faster to use memcpy instead?
+        //memcpy(fArray, oldArray, fCount*sizeof(OBJ));
+        if (oldArray) delete[] oldArray;
+      }
+   }
+  fCapacity=NewCapacity;
+  }
+}
+
+template <class OBJ> void GVec<OBJ>::Clear() {
+  fCount=0;
+  if (IsPrimitiveType<OBJ>::VAL) {
+    GFREE(fArray);
+  }
+  else {
+    delete[] fArray;
+    fArray=NULL;
+  }
+  fCapacity=0;
+}
+
+template <class OBJ> void GVec<OBJ>::Grow() {
+ int delta = (fCapacity>8) ? (fCapacity>>2) : 1 ;
+ setCapacity(fCapacity + delta);
+}
+
+template <class OBJ> void GVec<OBJ>::Reverse() {
+  int l=0;
+  int r=fCount-1;
+  OBJ c;
+  while (l<r) {
+     c=fArray[l];fArray[l]=fArray[r];
+     fArray[r]=c;
+     l++;r--;
+     }
+}
+
+template <class OBJ> void GVec<OBJ>::Grow(int idx, OBJ& item) {
+ int delta = (fCapacity>8) ? (fCapacity>>2) : 1 ;
+ int NewCapacity=fCapacity+delta;
+ if (NewCapacity <= fCount || NewCapacity >= MAXLISTSIZE)
+    GError(GVEC_CAPACITY_ERR, NewCapacity);
+    //error: capacity not within range
+ //if (NewCapacity!=fCapacity) { 
+ if (idx==fCount) { //append item
+         //GREALLOC(fArray, NewCapacity*sizeof(OBJ));
+         setCapacity(NewCapacity);
+         fArray[idx]=item;
+ }
+ else { //insert item at idx
+   OBJ* newList;
+   if (IsPrimitiveType<OBJ>::VAL) {
+        GMALLOC(newList, NewCapacity*sizeof(OBJ));
+        //copy data before idx
+        memcpy(&newList[0],&fArray[0], idx*sizeof(OBJ));
+        newList[idx]=item;
+        //copy data after idx
+        memmove(&newList[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
+        //..shouldn't do this:
+        memset(&newList[fCount+1], 0, (NewCapacity-fCount-1)*sizeof(OBJ));
+        //data copied:
+        GFREE(fArray);
+   } else {
+        newList=new OBJ[NewCapacity]; //]()
+        // operator= required!
+        for (int i=0;i<idx;i++) {
+          newList[i]=fArray[i];
+          }
+        newList[idx]=item;
+        //copy data after idx
+        //memmove(&newList[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
+        for (int i=idx+1;i<=fCount;i++) {
+          newList[i]=fArray[i-1];
+          }
+        delete[] fArray;
+   }
+   fArray=newList;
+   fCapacity=NewCapacity;
+ }
+ fCount++;
+}
+template <class OBJ> int GVec<OBJ>::Add(OBJ* item) {
+ if (item==NULL) return -1;
+ if (fCount==fCapacity) Grow();
+ fArray[fCount] = *item; //OBJ::operator= must copy OBJ properly!
+ fCount++;
+ return fCount-1;
+}
+
+
+template <class OBJ> void GVec<OBJ>::Add(GVec<OBJ>& list) {
+  if (list.Count()==0) return;
+  //simply copy
+  setCapacity(fCapacity+list.fCount);
+  if (IsPrimitiveType<OBJ>::VAL) {
+    memcpy( &fArray[fCount], list.fArray, list.fCount*sizeof(OBJ));
+    }
+   else {
+    for (int i=0;i<list.fCount;i++)
+          fArray[fCount+i]=list.fArray[i];
+    }
+  fCount+=list.fCount;
+}
+
+//Stack usage:
+template <class OBJ> OBJ GVec<OBJ>::Pop() {
+ if (fCount<=0) GError("Error: invalid GVec::Pop() operation!\n");
+ fCount--;
+ //OBJ o(fArray[fCount]); //copy constructor
+ //o=fList[fCount];
+ //fArray[fCount]=NULL;
+ return fArray[fCount]; //copy of the last element (copy constructor called)
+}
+
+//Queue usage:
+template <class OBJ> OBJ GVec<OBJ>::Shift() {
+ if (fCount<=0) GError("Error: invalid GVec::Shift() operation!\n");
+ fCount--;
+ OBJ o(fArray[0]); //copy constructor
+ if (fCount>0)
+   memmove(&fArray[0], &fArray[1], (fCount)*sizeof(OBJ));
+ //fList[fCount]=NULL; //not that it matters..
+ return o;
+}
+
+template <class OBJ> void GVec<OBJ>::Insert(int idx, OBJ* item) {
+ //idx must be the new position this new item must have
+ //so the allowed range is [0..fCount]
+ //the old idx item all the above will be shifted to idx+1
+ if (idx<0 || idx>fCount) GError(GVEC_INDEX_ERR, idx);
+ if (fCount==fCapacity) { //need to resize the array
+    Grow(idx, *item); //expand and also copy/move data and insert the new item
+    return;
+    }
+ //move data around to make room for the new item
+ if (idx<fCount) {
+   //copy after-idx items (shift up) 
+   if (IsPrimitiveType<OBJ>::VAL) {      
+      memmove(&fArray[idx+1],&fArray[idx], (fCount-idx)*sizeof(OBJ));
+   }
+   else {
+      for (int i=fCount; i>idx; i--) {
+          fArray[i]=fArray[i-1];
+          }
+   }
+ }
+ fArray[idx]=*item;
+ fCount++;
+}
+
+
+/*template <class OBJ> void GVec<OBJ>::Move(int curidx, int newidx) { //swap
+ if (curidx!=newidx || newidx>=fCount)
+     GError(GVEC_INDEX_ERR, newidx);
+ OBJ tmp=fArray[curidx]; //copy constructor here
+ fArray[curidx]=fArray[newidx];
+ fArray[newidx]=tmp;
+}*/
+
+
+template <class OBJ> void GVec<OBJ>::Replace(int idx, OBJ& item) {
+ TEST_INDEX(idx);
+ fArray[idx]=item;
+}
+
+template <class OBJ> void GVec<OBJ>::Exchange(int idx1, int idx2) {
+ TEST_INDEX(idx1);
+ TEST_INDEX(idx2);
+ OBJ item=fArray[idx1];
+ fArray[idx1]=fArray[idx2];
+ fArray[idx2]=item;
+}
+
+
+template <class OBJ> void GVec<OBJ>::Delete(int index) {
+ TEST_INDEX(index);
+ fCount--;
+ if (IsPrimitiveType<OBJ>::VAL) {
+   if (index<fCount) 
+    //move higher elements if any (shift down)
+      memmove(&fArray[index], &fArray[index+1], (fCount-index)*sizeof(OBJ));
+   }
+ else {
+   while (index<fCount) {
+      fArray[index]=fArray[index+1];
+      index++;
+      }
+  }
+}
+
+template <class OBJ> void GVec<OBJ>::setCount(int NewCount) {
+	if (NewCount<0 || NewCount > MAXLISTSIZE)
+	   GError(GVEC_COUNT_ERR, NewCount);
+	//if (NewCount > fCapacity) setCapacity(NewCount);
+	while(NewCount > fCapacity) Grow();
+	fCount = NewCount; //new items will be populated by the default object constructor(!)
+}
+
+template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ* v) {
+	if (NewCount<0 || NewCount > MAXLISTSIZE)
+	  GError(GVEC_COUNT_ERR, NewCount);
+	while (NewCount > fCapacity) Grow();
+	if (NewCount>fCount) {
+		for (int i=fCount;i<NewCount;i++)
+		  fArray[i]=*v;
+	}
+	fCount = NewCount;
+}
+
+template <class OBJ> void GVec<OBJ>::setCount(int NewCount, OBJ v) {
+	if (NewCount<0 || NewCount > MAXLISTSIZE)
+	   GError(GVEC_COUNT_ERR, NewCount);
+	while (NewCount > fCapacity) Grow();
+	if (NewCount>fCount) {
+		for (int i=fCount;i<NewCount;i++)
+		  fArray[i]=v;
+	}
+	fCount = NewCount;
+}
+
+
+template <class OBJ> void GVec<OBJ>::qSort(int l, int r, GCompareProc* cmpFunc) {
+ int i, j;
+ OBJ p,t;
+ do {
+    i = l; j = r;
+    p = this->fArray[(l + r) >> 1];
+    do {
+      while (cmpFunc(&(this->fArray[i]), &p) < 0) i++;
+      while (cmpFunc(&(this->fArray[j]), &p) > 0) j--;
+      if (i <= j) {
+        t = this->fArray[i];
+        this->fArray[i] = this->fArray[j];
+        this->fArray[j] = t;
+        i++; j--;
+        }
+      } while (i <= j);
+    if (l < j) qSort(l, j, cmpFunc);
+    l = i;
+    } while (i < r);
+}
+
+template <class OBJ> void GVec<OBJ>::Sort(GCompareProc* cmpFunc) {
+ if (cmpFunc==NULL) {
+   GMessage("Warning: NULL compare function given, useless Sort() call.\n");
+   return;
+ }
+ if (this->fArray!=NULL && this->fCount>0)
+     qSort(0, this->fCount-1, cmpFunc);
+}
+
+template <class OBJ> void GVec<OBJ>::Sort() {  
+  GCompareProc* cmpFunc = DefLTCompareProc<OBJ>;
+  Sort(cmpFunc);
+}
+
+
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//*=> GPVec implementation
+
+template <class OBJ> GPVec<OBJ>::GPVec(GPVec& list) { //copy constructor
+ fCount=list.fCount;
+ fCapacity=list.fCapacity;
+ fList=NULL;
+ if (fCapacity>0) {
+      GMALLOC(fList, fCapacity*sizeof(OBJ*));
+      }
+ fFreeProc=list.fFreeProc;
+ fCount=list.fCount;
+ memcpy(fList, list.fList, fCount*sizeof(OBJ*));
+ //for (int i=0;i<list.Count();i++) Add(list[i]);
+}
+
+template <class OBJ> GPVec<OBJ>::GPVec(GPVec* plist) { //another copy constructor
+ fCount=0;
+ fCapacity=plist->fCapacity;
+ fList=NULL;
+ if (fCapacity>0) {
+     GMALLOC(fList, fCapacity*sizeof(OBJ*));
+     }
+ fFreeProc=plist->fFreeProc;
+ fCount=plist->fCount;
+ memcpy(fList, plist->fList, fCount*sizeof(OBJ*));
+ //for (int i=0;i<list->fCount;i++) Add(plist->Get(i));
+}
+
+template <class OBJ> const GPVec<OBJ>& GPVec<OBJ>::operator=(GPVec& list) {
+ if (&list!=this) {
+     Clear();
+     fFreeProc=list.fFreeProc;
+     //Attention: the object *POINTERS* are copied,
+     // but the actual object content is NOT duplicated
+     //for (int i=0;i<list.Count();i++) Add(list[i]);
+     fCount=list.fCount;
+     GMALLOC(fList, fCapacity*sizeof(OBJ*));
+     memcpy(fList, list.fList, fCount*sizeof(OBJ*));
+     }
+ return *this;
+}
+
+
+template <class OBJ> void GPVec<OBJ>::Add(GPVec<OBJ>& list) {
+  if (list.Count()==0) return;
+  //simply copy the pointers! -- the objects will be shared
+  setCapacity(fCapacity+list.fCount);
+  memcpy( & (fList[fCount]), list.fList, list.fCount*sizeof(OBJ*));
+  fCount+=list.fCount;
+}
+
+template <class OBJ> void GPVec<OBJ>::Reverse() {
+  int l=0;
+  int r=fCount-1;
+  OBJ* c;
+  while (l<r) {
+     c=fList[l];fList[l]=fList[r];
+     fList[r]=c;
+     l++;r--;
+     }
+}
+
+template <class OBJ> GPVec<OBJ>::GPVec(int init_capacity, bool free_elements) {
+  fCount=0;
+  fCapacity=0;
+  fList=NULL;
+  fFreeProc=(free_elements) ? DefaultFreeProc : NULL;
+  if (init_capacity>0)
+    setCapacity(init_capacity);
+}
+
+template <class OBJ> GPVec<OBJ>::GPVec(bool free_elements) {
+  fCount=0;
+  fCapacity=0;
+  fList=NULL;
+  fFreeProc=(free_elements) ? DefaultFreeProc : NULL;
+}
+
+template <class OBJ> GPVec<OBJ>::~GPVec() {
+ this->Clear();//this will free the items if fFreeProc is defined
+}
+
+template <class OBJ> void GPVec<OBJ>::setCapacity(int NewCapacity) {
+  if (NewCapacity < fCount || NewCapacity > MAXLISTSIZE)
+    GError(GVEC_CAPACITY_ERR, NewCapacity);
+    //error: capacity not within range
+  if (NewCapacity!=fCapacity) {
+   if (NewCapacity==0) {
+      GFREE(fList);
+      }
+    else {
+      GREALLOC(fList, NewCapacity*sizeof(OBJ*));
+      }
+   fCapacity=NewCapacity;
+   }
+}
+
+template <class OBJ> void GPVec<OBJ>::deallocate_item(OBJ* &item) {
+ if (item==NULL) return;
+ if (FREEDATA) {
+   (*fFreeProc)(item);
+   item=NULL;
+   }
+ else {
+  delete item;
+  item=NULL;
+  }
+}
+
+template <class OBJ> void GPVec<OBJ>::Clear() {
+ if (FREEDATA) {
+   for (int i=0; i<fCount; i++) {
+     (*fFreeProc)(fList[i]);
+     }
+   }
+ GFREE(fList);
+ fCount=0;
+ fCapacity=0;
+}
+
+template <class OBJ> void GPVec<OBJ>::Exchange(int idx1, int idx2) {
+ TEST_INDEX(idx1);
+ TEST_INDEX(idx2);
+ OBJ* item=fList[idx1];
+ fList[idx1]=fList[idx2];
+ fList[idx2]=item;
+}
+
+template <class OBJ> void GPVec<OBJ>::Expand() {
+ if (fCount==fCapacity) Grow();
+ //return this;
+}
+
+template <class OBJ> OBJ* GPVec<OBJ>::Get(int idx) {
+ TEST_INDEX(idx);
+ return fList[idx];
+}
+
+template <class OBJ> void GPVec<OBJ>::Grow() {
+ /*
+ int delta;
+ if (fCapacity > 64 ) {
+   delta = (fCapacity > 0xFFF) ? 0x100 : (fCapacity>>4);
+ }
+ else {
+   delta = (fCapacity>8) ? (fCapacity>>2) : 1 ;
+ }
+ */
+	int delta = (fCapacity>8) ? (fCapacity>>2) : 1;
+	setCapacity(fCapacity + delta);
+}
+
+template <class OBJ> void GPVec<OBJ>::Grow(int idx, OBJ* newitem) {
+ /*
+ int delta;
+ if (fCapacity > 64 ) {
+   delta = (fCapacity > 0xFFF) ? 0x100 : (fCapacity>>4);
+ }
+ else {
+   delta = (fCapacity>8) ? (fCapacity>>2) : 1 ;
+ }
+ */
+ int delta = (fCapacity>8) ? (fCapacity>>2) : 1 ;
+ int NewCapacity=fCapacity+delta;
+ if (NewCapacity <= fCount || NewCapacity > MAXLISTSIZE)
+    GError(GVEC_CAPACITY_ERR, NewCapacity);
+    //error: capacity not within range
+ //if (NewCapacity!=fCapacity) {
+ /*if (NewCapacity==0) {
+      GFREE(fList);
+ }
+ else  {//add the new item
+ */
+ if (idx==fCount) {
+    GREALLOC(fList, NewCapacity*sizeof(OBJ*));
+    fList[idx]=newitem;
+    }
+ else {
+   OBJ** newList;
+   GMALLOC(newList, NewCapacity*sizeof(OBJ*));
+   //copy data before idx
+   memcpy(&newList[0],&fList[0], idx*sizeof(OBJ*));
+   newList[idx]=newitem;
+   //copy data after idx
+   memmove(&newList[idx+1],&fList[idx], (fCount-idx)*sizeof(OBJ*));
+   memset(&newList[fCount+1], 0, (NewCapacity-fCount-1)*sizeof(OBJ*));
+   //data copied:
+   GFREE(fList);
+   fList=newList;
+   }
+ fCount++; 
+ fCapacity=NewCapacity;
+}
+
+template <class OBJ> int GPVec<OBJ>::IndexOf(pointer item) {
+ int result=-1;
+ for (int i=0;i<fCount;i++) {
+     if (item==(pointer)fList[i]) return i;
+     }
+ return -1;
+ }
+
+template <class OBJ> int GPVec<OBJ>::Add(OBJ* item) {
+ int result;
+ if (item==NULL) return -1;
+ result = fCount;
+ if (result==fCapacity) this->Grow();
+ fList[result]=item;
+ fCount++;
+ return fCount-1;
+}
+
+template <class OBJ> void GPVec<OBJ>::Insert(int idx, OBJ* item) {
+ //idx can be [0..fCount] so an item can be actually added
+ if (idx<0 || idx>fCount) GError(GVEC_INDEX_ERR, idx);
+ if (fCount==fCapacity) {
+   Grow(idx, item);
+   return;
+   }
+ if (idx<fCount)
+      memmove(&fList[idx+1], &fList[idx], (fCount-idx)*sizeof(OBJ*));
+ fList[idx]=item;
+ fCount++;
+}
+
+template <class OBJ> void GPVec<OBJ>::Move(int curidx, int newidx) { //s
+ //BE_UNSORTED; //cannot do that in a sorted list!
+ if (curidx!=newidx || newidx>=fCount)
+     GError(GVEC_INDEX_ERR, newidx);
+ OBJ* p;
+ p=Get(curidx);
+ //this is a delete:
+ fCount--;
+ if (curidx<fCount)
+    memmove(&fList[curidx], &fList[curidx+1], (fCount-curidx)*sizeof(OBJ*));
+ //-this was instead of delete
+ Insert(newidx, p);
+}
+
+template <class OBJ> void GPVec<OBJ>::Put(int idx, OBJ* item) {
+ //WARNING: this will never free the replaced item!
+ TEST_INDEX(idx);
+ fList[idx]=item;
+}
+
+template <class OBJ> void GPVec<OBJ>::Forget(int idx) {
+ TEST_INDEX(idx);
+ fList[idx]=NULL; //user should free that somewhere else
+}
+
+template <class OBJ> void GPVec<OBJ>::freeItem(int idx) {
+  TEST_INDEX(idx);
+  if (fFreeProc!=NULL) {
+      (*fFreeProc)(fList[idx]);
+      }
+    else this->DefaultFreeProc(fList[idx]);
+  fList[idx]=NULL;
+}
+
+template <class OBJ> void GPVec<OBJ>::Delete(int index) {
+ TEST_INDEX(index);
+ if (fFreeProc!=NULL && fList[index]!=NULL) {
+   (*fFreeProc)(fList[index]); //freeItem
+   }
+ fList[index]=NULL;
+ fCount--;
+ if (index<fCount) //move higher elements if any
+   memmove(&fList[index], &fList[index+1], (fCount-index)*sizeof(OBJ*));
+}
+
+//Stack usage:
+template <class OBJ> OBJ* GPVec<OBJ>::Pop() {
+ if (fCount<=0) return NULL;
+ fCount--;
+ OBJ* o=fList[fCount];
+ fList[fCount]=NULL;
+ return o;
+}
+
+//Queue usage:
+template <class OBJ> OBJ* GPVec<OBJ>::Shift() {
+ if (fCount<=0) return NULL;
+ fCount--;
+ OBJ* o=fList[0];
+ if (fCount>0)
+   memmove(&fList[0], &fList[1], (fCount)*sizeof(OBJ*));
+ fList[fCount]=NULL; //not that it matters..
+ return o;
+}
+
+//linear search for the pointer address
+template <class OBJ> int GPVec<OBJ>::RemovePtr(pointer item) {
+if (item==NULL) return -1;
+for (int i=0;i<fCount;i++)
+   if ((pointer)fList[i] == item) {
+       Delete(i);
+       return i;
+       }
+return -1; //not found
+}
+
+template <class OBJ> void GPVec<OBJ>::Pack()  {
+ for (int i=fCount-1; i>=0; i--)
+    if (fList[i]==NULL) Delete(i); //shift rest of fList content accordingly
+}
+
+template <class OBJ> void GPVec<OBJ>::setCount(int NewCount) {
+  if (NewCount<0 || NewCount > MAXLISTSIZE)
+     GError(GVEC_COUNT_ERR, NewCount);
+  if (NewCount > fCapacity) setCapacity(NewCount);
+  if (NewCount > fCount) //pad with NULL pointers
+    memset(& fList[fCount], 0, (NewCount - fCount) * sizeof(OBJ*));
+  fCount = NewCount;
+}
+
+template <class OBJ> void GPVec<OBJ>::qSort(int L, int R, GCompareProc* cmpFunc) {
+ int I, J;
+ OBJ* P;
+ OBJ* T;
+ do {
+    I = L;
+    J = R;
+    P = this->fList[(L + R) >> 1];
+    do {
+      while (cmpFunc(this->fList[I], P) < 0) I++;
+      while (cmpFunc(this->fList[J], P) > 0) J--;
+      if (I <= J) {
+        T = this->fList[I];
+        this->fList[I] = this->fList[J];
+        this->fList[J] = T;
+        I++;
+        J--;
+        }
+      }
+    while (I <= J);
+    if (L < J) qSort(L, J, cmpFunc);
+    L = I;
+    }
+ while (I < R);
+}
+
+template <class OBJ> void GPVec<OBJ>::Sort(GCompareProc* cmpFunc) {
+ if (cmpFunc==NULL) {
+    GMessage("Warning: NULL compare function given, useless Sort() call.\n");
+    return;
+    }
+ if (this->fList!=NULL && this->fCount>0)
+     qSort(0, this->fCount-1, cmpFunc);
+}
+
+
+template <class OBJ> void GPVec<OBJ>::Sort() {
+  GCompareProc* cmpFunc = DefLTCompareProc<OBJ>;
+  Sort(cmpFunc);
+}
+
+
+//---------------------------------------------------------------------------
+#endif
diff --git a/include/codons.h b/include/codons.h
new file mode 100644
index 0000000..1925e9f
--- /dev/null
+++ b/include/codons.h
@@ -0,0 +1,54 @@
+#ifndef CODONS_H
+#define CODONS_H
+#include "GBase.h"
+#include <ctype.h>
+
+unsigned short packCodon(char n1, char n2, char n3);
+//assumes n1,n2,n3 are UPPERCASE!
+
+struct Codon {
+ char nuc[3];
+ Codon(char* str=NULL) {
+  if (str==NULL) {
+   nuc[0]='N';
+   nuc[1]='N';
+   nuc[2]='N';
+   }
+  else {
+   nuc[0]=toupper(str[0]);
+   nuc[1]=toupper(str[1]);
+   nuc[2]=toupper(str[2]);
+   }
+  }
+
+ Codon(char s1, char s2, char s3) {
+   nuc[0]=toupper(s1);
+   nuc[1]=toupper(s2);
+   nuc[2]=toupper(s3);
+   }
+ 
+   
+ char& operator[](int idx) {
+   if (idx<0 || idx>2) 
+      GError("Error: Codon index out of bounds!\n");
+   return nuc[idx];
+   }
+
+ char operator[](int idx) const {
+   if (idx<0 || idx>2) 
+      GError("Error: Codon index out of bounds!\n");
+   return nuc[idx];
+   }
+ 
+ char translate();
+ };
+
+//simple 1st frame forward translation of a given DNA string
+//will allocated memory for the translation --  the caller is
+// responsible for freeing the returned string!
+char* translateDNA(const char* dnastr, int& aalen, int dnalen=0);
+
+
+bool codonTableInit();
+ 
+#endif
diff --git a/include/gdna.h b/include/gdna.h
new file mode 100644
index 0000000..1f923ed
--- /dev/null
+++ b/include/gdna.h
@@ -0,0 +1,15 @@
+#ifndef GDNA_H
+#define GDNA_H
+#include "GBase.h"
+
+char ntComplement(char c);
+
+//in-place reverse complement of a nucleotide (sub)sequence
+char* reverseComplement(char* seq, int slen=0);
+
+bool gDnaInit();
+
+byte gdna2bit(char* &nt, int n=4); //pack n bases into a byte (n can be 1..4)
+char g2bit2base(byte v2bit); //convert the 2-bit value into 'A', 'C', 'G' or 'T'
+
+#endif
diff --git a/include/gff.h b/include/gff.h
new file mode 100644
index 0000000..d29da03
--- /dev/null
+++ b/include/gff.h
@@ -0,0 +1,1088 @@
+#ifndef GFF_H
+#define GFF_H
+
+#include "GBase.h"
+#include "gdna.h"
+#include "codons.h"
+#include "GFaSeqGet.h"
+#include "GList.hh"
+#include "GHash.hh"
+
+//#include <boost/crc.hpp>  // for boost::crc_32_type
+
+/*
+const byte exMskMajSpliceL = 0x01;
+const byte exMskMajSpliceR = 0x02;
+const byte exMskMinSpliceL = 0x04;
+const byte exMskMinSpliceR = 0x08;
+const byte exMskTag = 0x80;
+*/
+
+//reserved Gffnames::feats entries -- basic feature types
+extern const int gff_fid_mRNA; // "mRNA" feature name
+extern const int gff_fid_transcript; // *RNA, *transcript feature name
+extern const int gff_fid_exon;
+
+extern const uint GFF_MAX_LOCUS;
+extern const uint GFF_MAX_EXON;
+extern const uint GFF_MAX_INTRON;
+
+extern const uint gfo_flag_CHILDREN_PROMOTED;
+extern const uint gfo_flag_HAS_ERRORS;
+extern const uint gfo_flag_IS_GENE;
+extern const uint gfo_flag_HAS_GFF_ID; //found a GFF3 formatted main feature with its own ID
+extern const uint gfo_flag_BY_EXON;  //created by subfeature (exon) directly
+                      //(GTF2 and some chado gff3 dumps with exons given before their mRNA)
+extern const uint gfo_flag_IS_TRANSCRIPT; //recognized as '*RNA' or '*transcript'
+extern const uint gfo_flag_DISCARDED; //should not be printed under the "transcriptsOnly" directive
+extern const uint gfo_flag_LST_KEEP; //GffObj from GffReader::gflst is to be kept (not deallocated)
+                                     //when GffReader is destroyed
+extern const uint gfo_flag_LEVEL_MSK; //hierarchical level: 0 = no parent
+extern const byte gfo_flagShift_LEVEL;
+
+extern bool gff_show_warnings;
+
+#define GFF_LINELEN 2048
+#define ERR_NULL_GFNAMES "Error: GffObj::%s requires a non-null GffNames* names!\n"
+
+
+enum GffExonType {
+  exgffIntron=-1, // useless "intron" feature
+	exgffNone=0,  //not a recognizable exon or CDS segment
+  exgffStart, //from "start_codon" feature (within CDS)
+  exgffStop, //from "stop_codon" feature (may be outside CDS)
+  exgffCDS,  //from "CDS" feature
+  exgffUTR,  //from "UTR" feature
+  exgffCDSUTR, //from a merge of UTR and CDS feature
+  exgffExon, //from "exon" feature
+};
+
+const char* strExonType(char xtype);
+
+class GffReader;
+
+class GffLine {
+    char* _parents; //stores a copy of the Parent attribute value,
+       //with commas replaced by \0
+    int _parents_len;
+ public:
+    char* dupline; //duplicate of original line
+    char* line; //this will have tabs replaced by \0
+    int llen;
+    char* gseqname;
+    char* track;
+    char* ftype; //feature name: mRNA/gene/exon/CDS
+    char* info; //the last, attributes' field, unparsed
+    uint fstart;
+    uint fend;
+    uint qstart; //overlap coords on query, if available
+    uint qend;
+    uint qlen; //query len, if given
+    double score;
+    char strand;
+    bool skip;
+    bool is_gff3; //if the line appears to be in GFF3 format
+    bool is_cds; //"cds" and "stop_codon" features
+    bool is_exon; //"exon" and "utr" features
+    char exontype; // gffExonType
+    bool is_transcript; //if current feature is *RNA or *transcript
+    bool is_gene; //if current feature is *gene
+    char phase;  // '.' , '0', '1' or '2'
+    // -- allocated strings:
+    char* gene_name; //value of gene_name attribute (GTF) if present or Name attribute of a gene feature (GFF3)
+    char* gene_id; //value of gene_id attribute (GTF) if present or ID attribute of a gene feature (GFF3)
+    //
+    char** parents; //for GTF only parents[0] is used
+    int num_parents;
+    char* ID;     // if a ID=.. attribute was parsed, or a GTF with 'transcript' line (transcript_id)
+    GffLine(GffReader* reader, const char* l); //parse the line accordingly
+    void discardParent() {
+       GFREE(_parents);
+       _parents_len=0;
+       num_parents=0;
+       parents=NULL;
+       }
+    char* extractAttr(const char* pre, bool caseStrict=false, bool enforce_GTF2=false);
+    GffLine(GffLine* l):_parents(NULL), _parents_len(0),
+        dupline(NULL), line(NULL), llen(0), gseqname(NULL), track(NULL),
+        ftype(NULL), info(NULL), fstart(0), fend(0), qstart(0), qend(0), qlen(0),
+        score(0), strand(0), skip(true), is_gff3(false), is_cds(false), is_exon(false),
+        exontype(0), is_transcript(false), is_gene(false), phase(0),
+        gene_name(NULL), gene_id(NULL),
+        parents(NULL), num_parents(0), ID(NULL) { //a copy constructor
+    	if (l==NULL || l->line==NULL)
+    		GError("Error: invalid GffLine(l)\n");
+      memcpy((void*)this, (void*)l, sizeof(GffLine));
+      GMALLOC(line, llen+1);
+      memcpy(line, l->line, llen+1);
+      GMALLOC(dupline, llen+1);
+      memcpy(dupline, l->dupline, llen+1);
+      //--offsets within line[]
+      gseqname=line+(l->gseqname-l->line);
+      track=line+(l->track-l->line);
+      ftype=line+(l->ftype-l->line);
+      info=line+(l->info-l->line);
+      if (num_parents>0 && parents) {
+         parents=NULL; //re-init, just copied earlier
+         GMALLOC(parents, num_parents*sizeof(char*));
+         //_parents_len=l->_parents_len; copied above
+         _parents=NULL; //re-init, forget pointer copy
+         GMALLOC(_parents, _parents_len);
+         memcpy(_parents, l->_parents, _parents_len);
+         for (int i=0;i<num_parents;i++) {
+            parents[i]=_parents+(l->parents[i] - l->_parents);
+            }
+         }
+      //-- allocated string copies:
+      ID=Gstrdup(l->ID);
+      if (l->gene_name!=NULL)
+          gene_name=Gstrdup(l->gene_name);
+      if (l->gene_id!=NULL)
+          gene_id=Gstrdup(l->gene_id);
+      }
+    GffLine():_parents(NULL), _parents_len(0),
+      dupline(NULL), line(NULL), llen(0), gseqname(NULL), track(NULL),
+      ftype(NULL), info(NULL), fstart(0), fend(0), qstart(0), qend(0), qlen(0),
+      score(0), strand(0), skip(true), is_gff3(false), is_cds(false), is_exon(false),
+      exontype(0), is_transcript(false), is_gene(false), phase(0),
+      gene_name(NULL), gene_id(NULL),
+      parents(NULL), num_parents(0), ID(NULL) {
+      }
+    ~GffLine() {
+      GFREE(dupline);
+      GFREE(line);
+      GFREE(_parents);
+      GFREE(parents);
+      GFREE(ID);
+      GFREE(gene_name);
+      GFREE(gene_id);
+     }
+};
+
+class GffAttr {
+ public:
+   int attr_id;
+   char* attr_val;
+   GffAttr(int an_id, const char* av=NULL) {
+     attr_id=an_id;
+     attr_val=NULL;
+     setValue(av);
+     }
+  ~GffAttr() {
+     GFREE(attr_val);
+     }
+  void setValue(const char* av) {
+     if (attr_val!=NULL) {
+        GFREE(attr_val);
+        }
+     if (av==NULL || av[0]==0) return;
+     //trim spaces
+     const char* vstart=av;
+     while (*vstart==' ') av++;
+     const char* vend=vstart;
+     bool keep_dq=false;
+     while (vend[1]!=0) {
+        if (*vend==' ' && vend[1]!=' ') keep_dq=true;
+          else if (*vend==';') keep_dq=true;
+        vend++;
+        }
+     //remove spaces at the end:
+     while (*vend==' ' && vend!=vstart) vend--;
+     //practical clean-up: if it doesn't have any internal spaces just strip those useless double quotes
+     if (!keep_dq && *vstart=='"' && *vend=='"') {
+               vend--;
+               vstart++;
+               }
+     attr_val=Gstrdup(vstart, vend);
+     }
+  bool operator==(GffAttr& d){
+      return (this==&d);
+      }
+  bool operator>(GffAttr& d){
+     return (this>&d);
+     }
+  bool operator<(GffAttr& d){
+    return (this<&d);
+    }
+
+ };
+
+class GffNameList;
+class GffNames;
+
+class GffNameInfo {
+  friend class GffNameList;
+ public:
+   int idx;
+   char* name;
+   GffNameInfo(const char* n=NULL):idx(-1),name(NULL) {
+     if (n) name=Gstrdup(n);
+     }
+
+   ~GffNameInfo() {
+      GFREE(name);
+     }
+
+   bool operator==(GffNameInfo& d){
+       return (strcmp(this->name, d.name)==0);
+       }
+   bool operator<(GffNameInfo& d){
+     return (strcmp(this->name, d.name)<0);
+     }
+};
+
+class GffNameList:public GList<GffNameInfo> {
+  friend class GffNameInfo;
+  friend class GffNames;
+protected:
+  GHash<GffNameInfo> byName;//hash with shared keys
+  int idlast; //fList index of last added/reused name
+  void addStatic(const char* tname) {// fast add
+     GffNameInfo* f=new GffNameInfo(tname);
+     idlast=this->Add(f);
+     f->idx=idlast;
+     byName.shkAdd(f->name,f);
+     }
+public:
+ GffNameList(int init_capacity=6):GList<GffNameInfo>(init_capacity, false,true,true), byName(false) {
+    idlast=-1;
+    setCapacity(init_capacity);
+    }
+ char* lastNameUsed() { return idlast<0 ? NULL : Get(idlast)->name; }
+ int lastNameId() { return idlast; }
+ char* getName(int nid) { //retrieve name by its ID
+   if (nid<0 || nid>=fCount)
+         GError("GffNameList Error: invalid index (%d)\n",nid);
+   return fList[nid]->name;
+   }
+
+ int addName(const char* tname) {//returns or create an id for the given name
+   //check idlast first, chances are it's the same feature name checked
+   /*if (idlast>=0 && strcmp(fList[idlast]->name,tname)==0)
+       return idlast;*/
+   GffNameInfo* f=byName.Find(tname);
+   int fidx=-1;
+   if (f!=NULL) fidx=f->idx;
+     else {//add new entry
+      f=new GffNameInfo(tname);
+      fidx=this->Add(f);
+      f->idx=fidx;
+      byName.shkAdd(f->name,f);
+      }
+   idlast=fidx;
+   return fidx;
+   }
+
+ int addNewName(const char* tname) {
+    GffNameInfo* f=new GffNameInfo(tname);
+    int fidx=this->Add(f);
+    f->idx=fidx;
+    byName.shkAdd(f->name,f);
+    return fidx;
+    }
+
+ int getId(const char* tname) { //only returns a name id# if found
+    GffNameInfo* f=byName.Find(tname);
+    if (f==NULL) return -1;
+    return f->idx;
+    }
+ int removeName() {
+   GError("Error: removing names from GffNameList not allowed!\n");
+   return -1;
+   }
+};
+
+class GffNames {
+ public:
+   int numrefs;
+   GffNameList tracks;
+   GffNameList gseqs;
+   GffNameList attrs;
+   GffNameList feats; //feature names: 'mRNA', 'exon', 'CDS' etc.
+   GffNames():tracks(),gseqs(),attrs(), feats() {
+    numrefs=0;
+    //the order below is critical!
+    //has to match: gff_fid_mRNA, gff_fid_exon
+    feats.addStatic("mRNA");//index 0=gff_fid_mRNA
+    feats.addStatic("transcript");//index 1=gff_fid_transcript
+    feats.addStatic("exon");//index 1=gff_fid_exon
+    //feats.addStatic("CDS"); //index 2=gff_fid_CDS
+    }
+};
+
+void gffnames_ref(GffNames* &n);
+void gffnames_unref(GffNames* &n);
+
+enum GffPrintMode {
+  pgtfAny, //print record as read
+  pgtfExon,
+  pgtfCDS,
+  pgffAny, //print record as read
+  pgffExon,
+  pgffCDS,
+  pgffBoth,
+};
+
+
+class GffAttrs:public GList<GffAttr> {
+  public:
+    GffAttrs():GList<GffAttr>(false,true,false) { }
+    void add_or_update(GffNames* names, const char* attrname, const char* val) {
+      int aid=names->attrs.getId(attrname);
+      if (aid>=0) {
+         //attribute found in the dictionary
+         for (int i=0;i<Count();i++) {
+            //do we have it?
+            if (aid==Get(i)->attr_id) {
+                //update the value
+                Get(i)->setValue(val);
+                return;
+                }
+            }
+         }
+        else {
+         aid=names->attrs.addNewName(attrname);
+         }
+      this->Add(new GffAttr(aid, val));
+      }
+
+    char* getAttr(GffNames* names, const char* attrname) {
+      int aid=names->attrs.getId(attrname);
+      if (aid>=0)
+        for (int i=0;i<Count();i++)
+          if (aid==Get(i)->attr_id) return Get(i)->attr_val;
+      return NULL;
+      }
+    char* getAttr(int aid) {
+      if (aid>=0)
+        for (int i=0;i<Count();i++)
+          if (aid==Get(i)->attr_id) return Get(i)->attr_val;
+      return NULL;
+      }
+};
+
+
+class GffExon : public GSeg {
+ public:
+  void* uptr; //for later extensions
+  GffAttrs* attrs; //other attributes kept for this exon
+  double score; // gff score column
+  char phase; //GFF phase column - for CDS segments only
+             // '.' = undefined (UTR), '0','1','2' for CDS exons
+  char exontype; // 1="exon" 2="cds" 3="utr" 4="stop_codon"
+  int qstart; // for mRNA/protein exon mappings: coordinates on query
+  int qend;
+  GffExon(int s=0, int e=0, double sc=0, char fr=0, int qs=0, int qe=0, char et=0) {
+    uptr=NULL;
+    attrs=NULL;
+    if (s<e) {
+      start=s;
+      end=e;
+      }
+   else {
+     start=e;
+     end=s;
+    }
+   if (qs<qe) {
+     qstart=qs;
+     qend=qe;
+     } else {
+     qstart=qe;
+     qend=qs;
+     }
+   score=sc;
+   phase=fr;
+   exontype=et;
+   } //constructor
+
+ char* getAttr(GffNames* names, const char* atrname) {
+   if (attrs==NULL || names==NULL || atrname==NULL) return NULL;
+   return attrs->getAttr(names, atrname);
+   }
+
+ char* getAttr(int aid) {
+   if (attrs==NULL) return NULL;
+   return attrs->getAttr(aid);
+   }
+
+ ~GffExon() { //destructor
+   if (attrs!=NULL) delete attrs;
+   }
+};
+
+
+class GffCDSeg:public GSeg {
+ public:
+  char phase;
+  int exonidx;
+};
+//one GFF mRNA object -- e.g. a mRNA with its exons and/or CDS segments
+class GffObj:public GSeg {
+  //utility segment-merging function for addExon()
+  void expandExon(int xovl, uint segstart, uint segend,
+       char exontype, double sc, char fr, int qs, int qe);
+ protected:
+   //coordinate transformation data:
+   uint xstart; //absolute genomic coordinates of reference region
+   uint xend;
+   char xstatus; //coordinate transform status:
+            //0 : (start,end) coordinates are absolute
+            //'+' : (start,end) coords are relative to xstart..xend region
+            //'-' : (start,end) are relative to the reverse complement of xstart..xend region
+   //--
+   char* gffID; // ID name for mRNA (parent) feature
+   char* gene_name; //value of gene_name attribute (GTF) if present or Name attribute of the parent gene feature (GFF3)
+   char* geneID; //value of gene_id attribute (GTF) if present or ID attribute of a parent gene feature (GFF3)
+   unsigned int flags;
+   //-- friends:
+   friend class GffReader;
+   friend class GffExon;
+public:
+  static GffNames* names; // dictionary storage that holds the various attribute names etc.
+  int track_id; // index of track name in names->tracks
+  int gseq_id; // index of genomic sequence name in names->gseqs
+  int ftype_id; // index of this record's feature name in names->feats, or the special gff_fid_mRNA value
+  int exon_ftype_id; //index of child subfeature name in names->feats (that subfeature stored in "exons")
+                   //if ftype_id==gff_fid_mRNA then this value is ignored
+  GList<GffExon> exons; //for non-mRNA entries, these can be any subfeature of type subftype_id
+  GPVec<GffObj> children;
+  GffObj* parent;
+  int udata; //user data, flags etc.
+  void* uptr; //user pointer (to a parent object, cluster, locus etc.)
+  GffObj* ulink; //link to another GffObj (user controlled field)
+  // mRNA specific fields:
+  bool isCDS; //just a CDS, no UTRs
+  bool partial; //partial CDS
+  uint CDstart; //CDS start coord
+  uint CDend;   //CDS end coord
+  char CDphase; //initial phase for CDS start
+  bool hasErrors() { return ((flags & gfo_flag_HAS_ERRORS)!=0); }
+  void hasErrors(bool v) {
+      if (v) flags |= gfo_flag_HAS_ERRORS;
+        else flags &= ~gfo_flag_HAS_ERRORS;
+      }
+  bool hasGffID() { return ((flags & gfo_flag_HAS_GFF_ID)!=0); }
+  void hasGffID(bool v) {
+      if (v) flags |= gfo_flag_HAS_GFF_ID;
+        else flags &= ~gfo_flag_HAS_GFF_ID;
+      }
+  bool createdByExon() { return ((flags & gfo_flag_BY_EXON)!=0); }
+  void createdByExon(bool v) {
+      if (v) flags |= gfo_flag_BY_EXON;
+        else flags &= ~gfo_flag_BY_EXON;
+      }
+  bool isGene() { return ((flags & gfo_flag_IS_GENE)!=0); }
+  void isGene(bool v) {
+      if (v) flags |= gfo_flag_IS_GENE;
+        else flags &= ~gfo_flag_IS_GENE;
+      }
+  bool isDiscarded() { return ((flags & gfo_flag_DISCARDED)!=0); }
+  void isDiscarded(bool v) {
+      if (v) flags |= gfo_flag_DISCARDED;
+        else flags &= ~gfo_flag_DISCARDED;
+      }
+
+  bool isUsed() { return ((flags & gfo_flag_LST_KEEP)!=0); }
+  void isUsed(bool v) {
+      if (v) flags |= gfo_flag_LST_KEEP;
+        else flags &= ~gfo_flag_LST_KEEP;
+      }
+  bool isTranscript() { return ((flags & gfo_flag_IS_TRANSCRIPT)!=0); }
+  void isTranscript(bool v) {
+      if (v) flags |= gfo_flag_IS_TRANSCRIPT;
+        else flags &= ~gfo_flag_IS_TRANSCRIPT;
+      }
+  bool promotedChildren() { return ((flags & gfo_flag_CHILDREN_PROMOTED)!=0); }
+  void promotedChildren(bool v) {
+    if (v) flags |= gfo_flag_CHILDREN_PROMOTED;
+      else flags &= ~gfo_flag_CHILDREN_PROMOTED;
+     }
+  void setLevel(byte v) {
+    if (v==0) flags &= ~gfo_flag_LEVEL_MSK;
+         else flags &= ~(((uint)v) << gfo_flagShift_LEVEL);
+    }
+  byte incLevel() {
+    uint v=((flags & gfo_flag_LEVEL_MSK) >> gfo_flagShift_LEVEL);
+    v++;
+    flags &= ~(v << gfo_flagShift_LEVEL);
+    return v;
+    }
+  byte getLevel() {
+    return ((byte)((flags & gfo_flag_LEVEL_MSK) >> gfo_flagShift_LEVEL));
+    }
+
+  bool isValidTranscript() {
+    //return (ftype_id==gff_fid_mRNA && exons.Count()>0);
+    return (isTranscript() && exons.Count()>0);
+    }
+
+
+  int addExon(uint segstart, uint segend, double sc=0, char fr='.',
+             int qs=0, int qe=0, bool iscds=false, char exontype=0);
+
+  int addExon(GffReader* reader, GffLine* gl, bool keepAttr=false, bool noExonAttr=true);
+
+  void removeExon(int idx);
+  void removeExon(GffExon* p);
+  char  strand; //true if features are on the reverse complement strand
+  double gscore;
+  double uscore; //custom, user-computed score, if needed
+  int covlen; //total coverage of  reference genomic sequence (sum of maxcf segment lengths)
+
+   //--------- optional data:
+  int qlen; //query length, start, end - if available
+  int qstart;
+  int qend;
+  int qcov; //query coverage - percent
+  GffAttrs* attrs; //other gff3 attributes found for the main mRNA feature
+   //constructor by gff line parsing:
+  GffObj(GffReader* gfrd, GffLine* gffline, bool keepAttrs=false, bool noExonAttr=true);
+   //if gfline->Parent!=NULL then this will also add the first sub-feature
+   // otherwise, only the main feature is created
+  void copyAttrs(GffObj* from);
+  void clearAttrs() {
+    if (attrs!=NULL) {
+      bool sharedattrs=(exons.Count()>0 && exons[0]->attrs==attrs);
+      delete attrs; attrs=NULL;
+      if (sharedattrs) exons[0]->attrs=NULL;
+      }
+    }
+  GffObj(char* anid=NULL):GSeg(0,0), exons(true,true,false), children(1,false) {
+                                   //exons: sorted, free, non-unique
+       gffID=NULL;
+       uptr=NULL;
+       ulink=NULL;
+       flags=0;
+       udata=0;
+       parent=NULL;
+       ftype_id=-1;
+       exon_ftype_id=-1;
+       if (anid!=NULL) gffID=Gstrdup(anid);
+       gffnames_ref(names);
+       qlen=0;
+       qstart=0;
+       qend=0;
+       qcov=0;
+       partial=true;
+       isCDS=false;
+       CDstart=0; // hasCDS <=> CDstart>0
+       CDend=0;
+       CDphase=0;
+       gseq_id=-1;
+       track_id=-1;
+       xstart=0;
+       xend=0;
+       xstatus=0;
+       strand='.';
+       gscore=0;
+       uscore=0;
+       attrs=NULL;
+       covlen=0;
+       gene_name=NULL;
+       geneID=NULL;
+       }
+   ~GffObj() {
+       GFREE(gffID);
+       GFREE(gene_name);
+       GFREE(geneID);
+       clearAttrs();
+       gffnames_unref(names);
+       }
+   //--------------
+   GffObj* finalize(GffReader* gfr, bool mergeCloseExons=false,
+               bool keepAttrs=false, bool noExonAttr=true);
+               //complete parsing: must be called in order to merge adjacent/close proximity subfeatures
+   void parseAttrs(GffAttrs*& atrlist, char* info, bool isExon=false);
+   const char* getSubfName() { //returns the generic feature type of the entries in exons array
+     //int sid=exon_ftype_id;
+     //if (sid==gff_fid_exon && isCDS) sid=gff_fid_CDS;
+     return names->feats.getName(exon_ftype_id);
+     }
+   void addCDS(uint cd_start, uint cd_end, char phase=0);
+
+   bool monoFeature() {
+     return (exons.Count()==0 ||
+          (exons.Count()==1 &&  //exon_ftype_id==ftype_id &&
+              exons[0]->end==this->end && exons[0]->start==this->start));
+     }
+
+   bool hasCDS() { return (CDstart>0); }
+
+   const char* getFeatureName() {
+     return names->feats.getName(ftype_id);
+     }
+   void setFeatureName(const char* feature);
+
+   void addAttr(const char* attrname, const char* attrvalue);
+   int removeAttr(const char* attrname, const char* attrval=NULL);
+   int removeAttr(int aid, const char* attrval=NULL);
+   int removeExonAttr(GffExon& exon, const char* attrname, const char* attrval=NULL);
+   int removeExonAttr(GffExon& exon, int aid, const char* attrval=NULL);
+   const char* getAttrName(int i) {
+     if (attrs==NULL) return NULL;
+     return names->attrs.getName(attrs->Get(i)->attr_id);
+     }
+   char* getAttr(const char* attrname, bool checkFirstExon=false) {
+     if (names==NULL || attrname==NULL) return NULL;
+     char* r=NULL;
+     if (attrs==NULL) {
+         if (!checkFirstExon) return NULL;
+         }
+       else r=attrs->getAttr(names, attrname);
+     if (r!=NULL) return r;
+     if (checkFirstExon && exons.Count()>0) {
+        r=exons[0]->getAttr(names, attrname);
+        }
+     return r;
+     }
+
+   char* getExonAttr(GffExon* exon, const char* attrname) {
+      if (exon==NULL || attrname==NULL) return NULL;
+      return exon->getAttr(names, attrname);
+      }
+
+   char* getExonAttr(int exonidx, const char* attrname) {
+      if (exonidx<0 || exonidx>=exons.Count() || attrname==NULL) return NULL;
+      return exons[exonidx]->getAttr(names, attrname);
+      }
+
+   char* getAttrValue(int i) {
+     if (attrs==NULL) return NULL;
+     return attrs->Get(i)->attr_val;
+     }
+   const char* getGSeqName() {
+     return names->gseqs.getName(gseq_id);
+     }
+
+   const char* getRefName() {
+     return names->gseqs.getName(gseq_id);
+     }
+   void setRefName(const char* newname);
+
+   const char* getTrackName() {
+     return names->tracks.getName(track_id);
+     }
+   bool exonOverlap(uint s, uint e) {//check if ANY exon overlaps given segment
+      //ignores strand!
+      if (s>e) Gswap(s,e);
+      for (int i=0;i<exons.Count();i++) {
+         if (exons[i]->overlap(s,e)) return true;
+         }
+      return false;
+      }
+    bool exonOverlap(GffObj& m) {//check if ANY exon overlaps given segment
+      //if (gseq_id!=m.gseq_id) return false;
+      // ignores strand and gseq_id, must check in advance
+      for (int i=0;i<exons.Count();i++) {
+         for (int j=0;j<m.exons.Count();j++) {
+            if (exons[i]->start>m.exons[j]->end) continue;
+            if (m.exons[j]->start>exons[i]->end) break;
+            //-- overlap if we are here:
+            return true;
+            }
+         }
+      return false;
+      }
+
+    int exonOverlapIdx(uint s, uint e, int* ovlen=NULL) {
+      //return the exons' index for the overlapping OR ADJACENT exon
+      //ovlen, if given, will return the overlap length
+      if (s>e) Gswap(s,e);
+      s--;e++; //to also catch adjacent exons
+      for (int i=0;i<exons.Count();i++) {
+            if (exons[i]->start>e) break;
+            if (s>exons[i]->end) continue;
+            //-- overlap if we are here:
+            if (ovlen!=NULL) {
+              s++;e--;
+              int ovlend= (exons[i]->end>e) ? e : exons[i]->end;
+              *ovlen= ovlend - ((s>exons[i]->start)? s : exons[i]->start)+1;
+              }
+            return i;
+            } //for each exon
+      *ovlen=0;
+      return -1;
+      }
+
+    int exonOverlapLen(GffObj& m) {
+      if (start>m.end || m.start>end) return 0;
+      int i=0;
+      int j=0;
+      int ovlen=0;
+      while (i<exons.Count() && j<m.exons.Count()) {
+        uint istart=exons[i]->start;
+        uint iend=exons[i]->end;
+        uint jstart=m.exons[j]->start;
+        uint jend=m.exons[j]->end;
+        if (istart>jend) { j++; continue; }
+        if (jstart>iend) { i++; continue; }
+        //exon overlap
+        uint ovstart=GMAX(istart,jstart);
+        if (iend<jend) {
+           ovlen+=iend-ovstart+1;
+           i++;
+           }
+        else {
+           ovlen+=jend-ovstart+1;
+           j++;
+           }
+        }//while comparing exons
+      return ovlen;
+      }
+
+    bool exonOverlap(GffObj* m) {
+      return exonOverlap(*m);
+      }
+   //---------- coordinate transformation
+   void xcoord(uint grstart, uint grend, char xstrand='+') {
+     //relative coordinate transform, and reverse-complement transform if xstrand is '-'
+     //does nothing if xstatus is the same already
+     if (xstatus) {
+          if (xstatus==xstrand && grstart==xstart && grend==xend) return;
+          unxcoord();//restore original coordinates
+          }
+     xstatus=xstrand;
+     xstart=grstart;
+     xend=grend;
+     if (CDstart>0) xcoordseg(CDstart, CDend);
+     for (int i=0;i<exons.Count();i++) {
+         xcoordseg(exons[i]->start, exons[i]->end);
+         }
+     if (xstatus=='-') {
+       exons.Reverse();
+       int flen=end-start;
+       start=xend-end+1;
+       end=start+flen;
+       }
+      else {
+       start=start-xstart+1;
+       end=end-xstart+1;
+       }
+     }
+
+   //transform an arbitrary segment based on current xstatus/xstart-xend
+   void xcoordseg(uint& segstart, uint &segend) {
+     if (xstatus==0) return;
+     if (xstatus=='-') {
+       int flen=segend-segstart;
+       segstart=xend-segend+1;
+       segend=segstart+flen;
+       return;
+       }
+     else {
+       segstart=segstart-xstart+1;
+       segend=segend-xstart+1;
+       }
+     }
+
+   void unxcoord() { //revert back to absolute genomic/gff coordinates if xstatus==true
+     if (xstatus==0) return; //nothing to do, no transformation appplied
+     if (CDstart>0) unxcoordseg(CDstart, CDend);
+     //restore all GffExon intervals too
+     for (int i=0;i<exons.Count();i++) {
+         unxcoordseg(exons[i]->start, exons[i]->end);
+         }
+     if (xstatus=='-') {
+        exons.Reverse();
+        int flen=end-start;
+        start=xend-end+1;
+        end=start+flen;
+        }
+      else {
+        start=start+xstart-1;
+        end=end+xstart-1;
+        }
+     xstatus=0;
+     }
+   void unxcoordseg(uint& astart, uint &aend) {
+     //restore an arbitrary interval -- does NOT change the transform state!
+     if (xstatus==0) return;
+     if (xstatus=='-') {
+        int flen=aend-astart;
+        astart=xend-aend+1;
+        aend=astart+flen;
+        }
+      else {
+        astart=astart+xstart-1;
+        aend=aend+xstart-1;
+        }
+     }
+   //---------------------
+   bool operator==(GffObj& d){
+       return (gseq_id==d.gseq_id && start==d.start && end==d.end && strcmp(gffID, d.gffID)==0);
+       }
+   bool operator>(GffObj& d){
+      if (gseq_id!=d.gseq_id) return (gseq_id>d.gseq_id);
+      if (start==d.start) {
+         if (getLevel()==d.getLevel()) {
+             if (end==d.end) return (strcmp(gffID, d.gffID)>0);
+                        else return (end>d.end);
+             } else return (getLevel()>d.getLevel());
+         } else return (start>d.start);
+      }
+   bool operator<(GffObj& d){
+     if (gseq_id!=d.gseq_id) return (gseq_id<d.gseq_id);
+     if (start==d.start) {
+         if (getLevel()==d.getLevel()) {
+            if (end==d.end) return strcmp(gffID, d.gffID)<0;
+                     else return end<d.end;
+            } else return (getLevel()<d.getLevel());
+        } else return (start<d.start);
+     }
+   char* getID() { return gffID; }
+   char* getGeneID() { return geneID; }
+   char* getGeneName() { return gene_name; }
+   void setGeneName(const char* gname) {
+        GFREE(gene_name);
+        if (gname) gene_name=Gstrdup(gname);
+        }
+   void setGeneID(const char* gene_id) {
+        GFREE(geneID);
+        if (gene_id) geneID=Gstrdup(gene_id);
+        }
+   int addSeg(GffLine* gfline);
+   int addSeg(int fnid, GffLine* gfline);
+   void getCDSegs(GArray<GffCDSeg>& cds);
+
+   void updateExonPhase(); //for CDS-only features, updates GExon::phase
+
+   void printGxfLine(FILE* fout, const char* tlabel, const char* gseqname,
+          bool iscds, uint segstart, uint segend, int exidx, char phase, bool gff3, bool cvtChars=false);
+   void printGxf(FILE* fout, GffPrintMode gffp=pgffExon,
+             const char* tlabel=NULL, const char* gfparent=NULL, bool cvtChars=false);
+   void printGtf(FILE* fout, const char* tlabel=NULL, bool cvtChars=false) {
+      printGxf(fout, pgtfAny, tlabel, NULL, cvtChars);
+      }
+   void printGff(FILE* fout, const char* tlabel=NULL,
+                                const char* gfparent=NULL, bool cvtChars=false) {
+      printGxf(fout, pgffAny, tlabel, gfparent, cvtChars);
+      }
+   void printTranscriptGff(FILE* fout, char* tlabel=NULL,
+                            bool showCDS=false, const char* gfparent=NULL, bool cvtChars=false) {
+      if (isValidTranscript())
+         printGxf(fout, showCDS ? pgffBoth : pgffExon, tlabel, gfparent, cvtChars);
+      }
+   void printSummary(FILE* fout=NULL);
+   void getCDS_ends(uint& cds_start, uint& cds_end);
+   void mRNA_CDS_coords(uint& cds_start, uint& cds_end);
+   char* getSpliced(GFaSeqGet* faseq, bool CDSonly=false, int* rlen=NULL,
+           uint* cds_start=NULL, uint* cds_end=NULL, GList<GSeg>* seglst=NULL);
+    char* getUnspliced(GFaSeqGet* faseq, int* rlen, GList<GSeg>* seglst);
+   char* getSplicedTr(GFaSeqGet* faseq, bool CDSonly=true, int* rlen=NULL);
+   //bool validCDS(GFaSeqGet* faseq); //has In-Frame Stop Codon ?
+   bool empty() { return (start==0); }
+};
+
+typedef bool GffRecFunc(GffObj* gobj, void* usrptr1, void* usrptr2);
+//user callback after parsing a mapping object:
+// Returns: "done with it" status:
+//   TRUE if gobj is no longer needed so it's FREEd upon return
+//   FALSE if the user needs the gobj pointer and is responsible for
+//             collecting and freeing all GffObj objects
+
+
+//GSeqStat: collect basic stats about a common underlying genomic sequence
+//          for multiple GffObj
+class GSeqStat {
+ public:
+   int gseqid; //gseq id in the global static pool of gseqs
+   char* gseqname; //just a pointer to the name of gseq
+   int fcount;//number of features on this gseq
+   uint mincoord;
+   uint maxcoord;
+   uint maxfeat_len; //maximum feature length on this genomic sequence
+   GffObj* maxfeat;
+   GSeqStat(int id=-1, char* name=NULL) {
+     gseqid=id;
+     gseqname=name;
+     fcount=0;
+     mincoord=MAXUINT;
+     maxcoord=0;
+     maxfeat_len=0;
+     maxfeat=NULL;
+     }
+   bool operator>(GSeqStat& g) {
+    return (gseqid>g.gseqid);
+    }
+   bool operator<(GSeqStat& g) {
+    return (gseqid<g.gseqid);
+    }
+   bool operator==(GSeqStat& g) {
+    return (gseqid==g.gseqid);
+    }
+};
+
+
+int gfo_cmpByLoc(const pointer p1, const pointer p2);
+
+class GfList: public GList<GffObj> {
+  //just adding the option to sort by genomic sequence and coordinate
+   bool mustSort;
+ public:
+   GfList(bool sortbyloc=false):GList<GffObj>(false,false,false) {
+     //GffObjs in this list are NOT deleted when the list is cleared
+     //-- for deallocation of these objects, call freeAll() or freeUnused() as needed
+     mustSort=sortbyloc;
+     }
+   void sortedByLoc(bool v=true) {
+     bool prev=mustSort;
+     mustSort=v;
+     if (fCount>0 && mustSort && !prev) {
+       this->setSorted((GCompareProc*)gfo_cmpByLoc);
+       }
+     }
+   void finalize(GffReader* gfr, bool mergeCloseExons,
+                bool keepAttrs=false, bool noExonAttr=true);
+
+   void freeAll() {
+     for (int i=0;i<fCount;i++) {
+       delete fList[i];
+       fList[i]=NULL;
+       }
+     Clear();
+     }
+   void freeUnused() {
+     for (int i=0;i<fCount;i++) {
+       if (fList[i]->isUsed()) continue;
+       //inform the children
+       for (int c=0;c<fList[i]->children.Count();c++) {
+          fList[i]->children[c]->parent=NULL;
+          }
+       delete fList[i];
+       fList[i]=NULL;
+       }
+     Clear();
+     }
+
+};
+/*
+struct GfoHolder {
+   //int idx; //position in GffReader::gflst array
+   GffObj* gffobj;
+   GfoHolder(GffObj* gfo=NULL) { //, int i=0) {
+     //idx=i;
+     gffobj=gfo;
+     }
+};
+*/
+class CNonExon { //utility class used in subfeature promotion
+ public:
+   //int idx;
+   GffObj* parent;
+   GffExon* exon;
+   GffLine* gffline;
+   //CNonExon(int i, GffObj* p, GffExon* e, GffLine* gl) {
+   CNonExon(GffObj* p, GffExon* e, GffLine* gl) {
+     parent=p;
+     exon=e;
+     //idx=i;
+     gffline=new GffLine(gl);
+     }
+  ~CNonExon() {
+     delete gffline;
+     }
+ };
+
+
+class GffReader {
+  friend class GffObj;
+  friend class GffLine;
+  char* linebuf;
+  off_t fpos;
+  int buflen;
+ protected:
+  bool gff_warns; //warn about duplicate IDs, etc. even when they are on different chromosomes
+  FILE* fh;
+  char* fname;  //optional fasta file with the underlying genomic sequence to be attached to this reader
+  GffLine* gffline;
+  bool transcriptsOnly; //keep only transcripts w/ their exon/CDS features
+  GHash<int> discarded_ids; //for transcriptsOnly mode, keep track
+                            // of discarded parent IDs
+  GHash< GPVec<GffObj> > phash; //transcript_id+contig (Parent~Contig) => [gflst index, GffObj]
+  //GHash<int> tids; //just for transcript_id uniqueness
+  char* gfoBuildId(const char* id, const char* ctg);
+  //void gfoRemove(const char* id, const char* ctg);
+  GffObj* gfoAdd(GffObj* gfo);
+  GffObj* gfoAdd(GPVec<GffObj>& glst, GffObj* gfo);
+  // const char* id, const char* ctg, char strand, GVec<GfoHolder>** glst, uint start, uint end
+  GffObj* gfoFind(const char* id, const char* ctg=NULL, GPVec<GffObj>** glst=NULL,
+	                                         char strand=0, uint start=0, uint end=0);
+  CNonExon* subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name);
+  void subfPoolAdd(GHash<CNonExon>& pex, GffObj* newgfo);
+  GffObj* promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex,
+                                  bool keepAttr, bool noExonAttr);
+  GList<GSeqStat> gseqstats; //list of all genomic sequences seen by this reader, accumulates stats
+
+     //boost::crc_32_type  _crc_result;
+
+ public:
+  GffNames* names; //just a pointer to the global static Gff names repository in GffObj
+  GfList gflst; //accumulate GffObjs being read
+  GffObj* newGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr,
+                               GffObj* parent=NULL, GffExon* pexon=NULL, GPVec<GffObj>* glst=NULL);
+  //GffObj* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx);
+  GffObj* updateGffRec(GffObj* prevgfo, GffLine* gffline,
+                                         bool keepAttr);
+  GffObj* updateParent(GffObj* newgfh, GffObj* parent);
+  bool addExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon>& pex, bool noExonAttr);
+  GPVec<GSeqStat> gseqStats; //only populated after finalize()
+  GffReader(FILE* f=NULL, bool t_only=false, bool sortbyloc=false):discarded_ids(true),
+                       phash(true), gseqstats(true,true,true), gflst(sortbyloc), gseqStats(1, false) {
+      gff_warns=gff_show_warnings;
+      names=NULL;
+      gffline=NULL;
+      transcriptsOnly=t_only;
+      fpos=0;
+      fname=NULL;
+      fh=f;
+      GMALLOC(linebuf, GFF_LINELEN);
+      buflen=GFF_LINELEN-1;
+      }
+  void init(FILE *f, bool t_only=false, bool sortbyloc=false) {
+      fname=NULL;
+      fh=f;
+      if (fh!=NULL) rewind(fh);
+      fpos=0;
+      transcriptsOnly=t_only;
+      gflst.sortedByLoc(sortbyloc);
+      }
+  GffReader(char* fn, bool t_only=false, bool sort=false):discarded_ids(true), phash(true),
+            gseqstats(true,true,true), gflst(sort), gseqStats(1,false) {
+      gff_warns=gff_show_warnings;
+      names=NULL;
+      fname=Gstrdup(fn);
+      transcriptsOnly=t_only;
+      fh=fopen(fname, "rb");
+      fpos=0;
+      gffline=NULL;
+      GMALLOC(linebuf, GFF_LINELEN);
+      buflen=GFF_LINELEN-1;
+      }
+
+ ~GffReader() {
+      delete gffline;
+      gffline=NULL;
+      fpos=0;
+      gflst.freeUnused();
+      gflst.Clear();
+      discarded_ids.Clear();
+      phash.Clear();
+      gseqstats.Clear();
+      GFREE(fname);
+      GFREE(linebuf);
+      }
+
+  void showWarnings(bool v=true) {
+      gff_warns=v;
+      gff_show_warnings=v;
+      }
+
+  GffLine* nextGffLine();
+
+  // load all subfeatures, re-group them:
+  void readAll(bool keepAttr=false, bool mergeCloseExons=false, bool noExonAttr=true);
+
+    //boost::crc_32_type current_crc_result() const { return _crc_result; }
+}; // end of GffReader
+
+#endif
diff --git a/include/gff_utils.h b/include/gff_utils.h
new file mode 100644
index 0000000..b15b677
--- /dev/null
+++ b/include/gff_utils.h
@@ -0,0 +1,610 @@
+#ifndef GFF_UTILS_H
+#define GFF_UTILS_H
+#include "gff.h"
+#include "GStr.h"
+#include "GFastaIndex.h"
+#include "GFaSeqGet.h"
+
+typedef bool GFValidateFunc(GffObj* gf, GList<GffObj>* gfadd);
+
+class GeneInfo { //for Ensembl GTF conversion
+ public:
+   int flag;
+   GffObj* gf;
+   GList<GStr> gene_names;
+   GList<GStr> transcripts; //list of transcript IDs
+   GeneInfo():gene_names(true, true, true), transcripts(true,true,true) {
+     gf=NULL;
+     flag=0;
+     }
+   GeneInfo(GffObj* gfrec, bool ensembl_convert=false):gene_names(true, true, true), 
+                    transcripts(true,true,true) {
+     flag=0;
+     if (gfrec->getGeneName())
+        gene_names.Add(new GStr(gfrec->getGeneName()));
+     transcripts.Add(new GStr(gfrec->getID()));
+     create_gf(gfrec, ensembl_convert);
+     }
+     
+   void create_gf(GffObj* gfrec, bool ensembl_convert) {
+     gf=new GffObj(gfrec->getGeneID());
+     gf->gseq_id=gfrec->gseq_id;
+     gf->track_id=gfrec->track_id;
+     gf->start=gfrec->start;
+     gf->end=gfrec->end;
+     gf->strand=gfrec->strand;
+     gf->setFeatureName("gene");
+     gf->isGene(true);
+     gf->isUsed(true);
+     gf->uptr=this;
+     gfrec->incLevel();
+     gfrec->parent=gf;
+     gf->children.Add(gfrec);
+     if (ensembl_convert) {
+       //gf->addAttr("type", gf->getTrackName());
+       const char* biotype=gfrec->getAttr("type");
+       if (biotype) gf->addAttr("type", biotype);
+       }
+     //gf->children.Add(gfrec);
+     }
+   //~GeneInfo() {
+   //  }
+   void update(GffObj* gfrec) {
+     if (transcripts.AddedIfNew(new GStr(gfrec->getID()))<0)
+       return;
+     gene_names.AddedIfNew(new GStr(gfrec->getGeneName()));
+     if (gf==NULL) {
+        GError("GeneInfo::update() called on uninitialized gf!\n");
+        //create_gf(gfrec);
+        //return;
+        }
+     gfrec->parent=gf;
+     gf->children.Add(gfrec);
+     gfrec->incLevel();
+     if (gf->start>gfrec->start) 
+           gf->start=gfrec->start;
+     if (gf->end<gfrec->end) 
+           gf->end=gfrec->end;
+     }
+    void finalize() {
+     //prepare attributes for printing
+     //must be called right before printing
+     if (gf==NULL || transcripts.Count()==0) return;
+     if (gene_names.Count()>0) {
+       gf->addAttr("Name", gene_names[0]->chars());
+       /*
+       GStr s(gene_names[0]->chars());
+       for (int i=1;i<gene_names.Count();i++) {
+          s.append(",");
+          s.append(gene_names[i]->chars());
+          }
+       gf->addAttr("genes", s.chars());
+       */
+       } //has gene names
+       GStr t(transcripts[0]->chars());
+       for (int i=1;i<transcripts.Count();i++) {
+          t.append(",");
+          t.append(transcripts[i]->chars());
+          }
+       gf->addAttr("transcripts", t.chars());
+     }
+};
+
+//genomic fasta sequence handling
+class GFastaDb {
+ public:
+  char* fastaPath;
+  GFastaIndex* faIdx; //could be a cdb .cidx file
+  int last_fetchid;
+  GFaSeqGet* faseq;
+  //GCdbYank* gcdb;
+  char* getFastaFile(int gseq_id) {
+     if (fastaPath==NULL) return NULL;
+     GStr s(fastaPath);
+     s.trimR('/');
+     s.appendfmt("/%s",GffObj::names->gseqs.getName(gseq_id));
+     GStr sbase(s);
+     if (!fileExists(s.chars())) s.append(".fa");
+     if (!fileExists(s.chars())) s.append("sta");
+     if (fileExists(s.chars())) return Gstrdup(s.chars());
+         else {
+             GMessage("Warning: cannot find genomic sequence file %s{.fa,.fasta}\n",sbase.chars());
+             return NULL;
+             }
+     }
+
+   GFastaDb(const char* fpath=NULL) {
+     //gcdb=NULL;
+     fastaPath=NULL;
+     faseq=NULL;
+     faIdx=NULL;
+     init(fpath);
+     }
+
+   void init(const char* fpath) {
+     if (fpath==NULL || fpath[0]==0) return;
+     last_fetchid=-1;
+     if (!fileExists(fpath))
+       GError("Error: file/directory %s does not exist!\n",fpath);
+     fastaPath=Gstrdup(fpath);
+     GStr gseqpath(fpath);
+     if (fileExists(fastaPath)>1) { //exists and it's not a directory
+            GStr fainame(fastaPath);
+            if (fainame.rindex(".fai")==fainame.length()-4) {
+               //.fai index file given directly
+               fastaPath[fainame.length()-4]=0;
+               if (!fileExists(fastaPath))
+                  GError("Error: cannot find fasta file for index %s !\n", fastaPath);
+               }
+              else fainame.append(".fai");
+            //GMessage("creating GFastaIndex with fastaPath=%s, fainame=%s\n", fastaPath, fainame.chars());
+            faIdx=new GFastaIndex(fastaPath,fainame.chars());
+            GStr fainamecwd(fainame);
+            int ip=-1;
+            if ((ip=fainamecwd.rindex(CHPATHSEP))>=0)
+               fainamecwd.cut(0,ip+1);
+            if (!faIdx->hasIndex()) { //could not load index
+               //try current directory
+                  if (fainame!=fainamecwd) {
+                    if (fileExists(fainamecwd.chars())>1) {
+                       faIdx->loadIndex(fainamecwd.chars());
+                       }
+                    }
+                  } //tried to load index
+            if (!faIdx->hasIndex()) {
+                 GMessage("No fasta index found for %s. Rebuilding, please wait..\n",fastaPath);
+                 faIdx->buildIndex();
+                 if (faIdx->getCount()==0) GError("Error: no fasta records found!\n");
+                 GMessage("Fasta index rebuilt.\n");
+                 FILE* fcreate=fopen(fainame.chars(), "w");
+                 if (fcreate==NULL) {
+                   GMessage("Warning: cannot create fasta index %s! (permissions?)\n", fainame.chars());
+                   if (fainame!=fainamecwd) fcreate=fopen(fainamecwd.chars(), "w");
+                   if (fcreate==NULL)
+                      GError("Error: cannot create fasta index %s!\n", fainamecwd.chars());
+                   }
+                 if (faIdx->storeIndex(fcreate)<faIdx->getCount())
+                     GMessage("Warning: error writing the index file!\n");
+                 } //index created and attempted to store it
+            } //multi-fasta
+     }
+   GFaSeqGet* fetch(int gseq_id, bool checkFasta=false) {
+     if (fastaPath==NULL) return NULL;
+     if (gseq_id==last_fetchid && faseq!=NULL) return faseq;
+     delete faseq;
+     faseq=NULL;
+     last_fetchid=-1;
+     char* gseqname=GffObj::names->gseqs.getName(gseq_id);
+     if (faIdx!=NULL) { //fastaPath was the multi-fasta file name
+        GFastaRec* farec=faIdx->getRecord(gseqname);
+        if (farec!=NULL) {
+             faseq=new GFaSeqGet(fastaPath,farec->seqlen, farec->fpos,
+                               farec->line_len, farec->line_blen);
+             faseq->loadall(); //just cache the whole sequence, it's faster
+             last_fetchid=gseq_id;
+             }
+        else {
+          GMessage("Warning: couldn't find fasta record for '%s'!\n",gseqname);
+          return NULL;
+          }
+        }
+     else {
+         char* sfile=getFastaFile(gseq_id);
+         if (sfile!=NULL) {
+            faseq=new GFaSeqGet(sfile,checkFasta);
+            faseq->loadall();
+            last_fetchid=gseq_id;
+            GFREE(sfile);
+            }
+         } //one fasta file per contig
+       return faseq;
+     }
+
+   ~GFastaDb() {
+     GFREE(fastaPath);
+     //delete gcdb;
+     delete faIdx;
+     delete faseq;
+     }
+};
+
+class GffLocus;
+
+class GTData { //transcript associated data
+ public:
+    GffObj* rna;
+    GffLocus* locus;
+    GffObj* replaced_by;
+    GeneInfo* geneinfo;
+    int flag;
+    GTData(GffObj* t=NULL) {
+        rna=t;
+        flag=0;
+        locus=NULL;
+        replaced_by=NULL;
+        geneinfo=NULL;
+        if (rna!=NULL) {
+            geneinfo=(GeneInfo*)rna->uptr; //take over geneinfo, if there
+            rna->uptr=this;
+            }
+        }
+   bool operator<(GTData& b) { return (rna < b.rna); }
+   bool operator==(GTData& b) { return (rna==b.rna); }
+};
+
+class CGeneSym {
+ public:
+  GStr name;
+  int freq;
+  CGeneSym(const char* n=NULL, int f=0):name(n) {
+    freq=f;
+    }
+  bool operator<(CGeneSym& b) {
+     return (freq==b.freq)? ( (name.length()==b.name.length()) ? (name<b.name) :
+         (name.length()<b.name.length()) ) : ( freq>b.freq );
+     }
+  bool operator==(CGeneSym& b) { return name==b.name; }
+};
+
+const char* getGeneDescr(const char* gsym);
+
+void printLocus(GffLocus* loc, const char* pre=NULL);
+
+class GffLocus:public GSeg {
+public:
+    int gseq_id; //id of underlying genomic sequence
+    int locus_num;
+    bool is_mrna;
+    char strand;
+    GffObj* t_maxcov;  //transcript with maximum coverage (for main "ref" transcript)
+    GList<GffObj> rnas; //list of transcripts (isoforms) for this locus
+    GArray<GSeg> mexons; //list of merged exons in this region
+    GList<CGeneSym> gene_names;
+    GList<CGeneSym> gene_ids;
+    int v; //user flag/data
+   /*
+   bool operator==(GffLocus& d){
+       return (gseq_id==d.gseq_id && strand==d.strand && start==d.start && end==d.end);
+       }
+   bool operator<(GffLocus& d){
+     if (gseq_id!=d.gseq_id) return (gseq_id<d.gseq_id);
+     if (start==d.start) {
+        if (end==d.end) return strand<d.strand;
+                     else return end<d.end;
+        } else return (start<d.start);
+     }
+    */
+    const char* getGeneName() {
+         if (gene_names.Count()==0) return NULL;
+         return gene_names.First()->name.chars();
+         }
+    const char* get_tmax_id() {
+         return t_maxcov->getID();
+         }
+    const char* get_descr() {
+       if (gene_names.Count()>0) {
+          for (int i=0;i<gene_names.Count();i++) {
+            const char* gn=getGeneDescr(gene_names.First()->name.chars());
+            if (gn!=NULL) return gn;
+            }
+          }
+       char* s=t_maxcov->getAttr("product");
+       if (s!=NULL) return s;
+       s=t_maxcov->getAttr("descr");
+       if (s!=NULL) return s;
+       s=t_maxcov->getAttr("description");
+       if (s!=NULL) return s;
+       s=t_maxcov->getAttr("info");
+       if (s!=NULL) return s;
+       return NULL;
+       }
+
+    GffLocus(GffObj* t=NULL):rnas(true,false,false),mexons(true,true),
+           gene_names(true,true,false), gene_ids(true,true,false) {
+        //this will NOT free rnas!
+        t_maxcov=NULL;
+        gseq_id=-1;
+        v=0;
+        locus_num=0;
+        start=0;
+        end=0;
+        strand=0;
+        is_mrna=false;
+        if (t!=NULL) {
+           start=t->exons.First()->start;
+           end=t->exons.Last()->end;;
+           gseq_id=t->gseq_id;
+           GSeg seg;
+           for (int i=0;i<t->exons.Count();i++) {
+                seg.start=t->exons[i]->start;
+                seg.end=t->exons[i]->end;
+                mexons.Add(seg);
+                }
+           rnas.Add(t);
+           ((GTData*)(t->uptr))->locus=this;
+           t_maxcov=t;
+           strand=t->strand;
+           if (t->ftype_id==gff_fid_mRNA) {
+              is_mrna=true;
+              }
+           }
+    }
+
+   void addMerge(GffLocus& locus, GffObj* lnkrna) {
+     //add all the elements of the other locus (merging)
+     //-- merge mexons
+     GArray<int> ovlexons(true,true); //list of locus.mexons indexes overlapping existing mexons
+     int i=0; //index of first mexons with a merge
+     int j=0; //index current mrna exon
+     while (i<mexons.Count() && j<locus.mexons.Count()) {
+            uint istart=mexons[i].start;
+            uint iend=mexons[i].end;
+            uint jstart=locus.mexons[j].start;
+            uint jend=locus.mexons[j].end;
+            if (iend<jstart) { i++; continue; }
+            if (jend<istart) { j++; continue; }
+            ovlexons.Add(j);
+            //extend mexons[i] as needed
+            if (jstart<istart) mexons[i].start=jstart;
+            if (jend>iend) { //mexons[i] end extend
+                mexons[i].end=jend;
+                //now this could overlap the next mexon(s), so we have to merge them all
+                while (i<mexons.Count()-1 && mexons[i].end>mexons[i+1].start) {
+                    uint nextend=mexons[i+1].end;
+                    mexons.Delete(i+1);
+                    if (nextend>mexons[i].end) {
+                        mexons[i].end=nextend;
+                        break; //no need to check next mexons
+                    }
+                } //while next mexons merge
+            } // mexons[i] end extend
+            j++; //check the next locus.mexon
+        }
+        //-- add the rest of the non-overlapping mexons:
+        GSeg seg;
+        for (int i=0;i<locus.mexons.Count();i++) {
+            seg.start=locus.mexons[i].start;
+            seg.end=locus.mexons[i].end;
+            if (!ovlexons.Exists(i)) mexons.Add(seg);
+        }
+     // -- add locus.rnas
+     for (int i=0;i<locus.rnas.Count();i++) {
+          ((GTData*)(locus.rnas[i]->uptr))->locus=this;
+          if (locus.rnas[i]!=lnkrna) rnas.Add(locus.rnas[i]);
+          }
+        // -- adjust start/end as needed
+     if (start>locus.start) start=locus.start;
+     if (end<locus.end) end=locus.end;
+     if (locus.is_mrna) is_mrna=true;
+     if (t_maxcov->covlen<locus.t_maxcov->covlen)
+            t_maxcov=locus.t_maxcov;
+     }
+
+    bool exonOverlap(GffLocus& loc) {
+        //check if any mexons overlap!
+        if (strand!=loc.strand || loc.start>end || start>loc.end) return false;
+        int i=0;
+        int j=0;
+        while (i<mexons.Count() && j<loc.mexons.Count()) {
+            uint istart=mexons[i].start;
+            uint iend=mexons[i].end;
+            uint jstart=loc.mexons[j].start;
+            uint jend=loc.mexons[j].end;
+            if (iend<jstart) { i++; continue; }
+            if (jend<istart) { j++; continue; }
+            //exon overlap found if we're here:
+            return true;
+        }
+        return false;
+    }
+
+    bool add_RNA(GffObj* t) {
+        //if (rnas.Count()==0) return true; //? should never be called on an empty locus
+        if (t->gseq_id!=gseq_id || t->strand!=strand || t->start>end || start>t->end)
+              return false; //rna must be on the same genomic seq
+        //check for exon overlap with existing mexons
+        //also update mexons accordingly if t is to be added
+        bool hasovl=false;
+        int i=0; //index of first mexons with a merge
+        int j=0; //index current t exon
+        GArray<int> ovlexons(true,true); //list of mrna exon indexes overlapping mexons
+        while (i<mexons.Count() && j<t->exons.Count()) {
+            uint istart=mexons[i].start;
+            uint iend=mexons[i].end;
+            uint jstart=t->exons[j]->start;
+            uint jend=t->exons[j]->end;
+            if (iend<jstart) { i++; continue; }
+            if (jend<istart) { j++; continue; }
+            //exon overlap found if we're here:
+            ovlexons.Add(j);
+            hasovl=true;
+            //extend mexons[i] as needed
+            if (jstart<istart) mexons[i].start=jstart;
+            if (jend>iend) { //mexon stretch up
+                mexons[i].end=jend;
+                //now this could overlap the next mexon(s), so we have to merge them all
+                while (i<mexons.Count()-1 && mexons[i].end>mexons[i+1].start) {
+                    uint nextend=mexons[i+1].end;
+                    mexons.Delete(i+1);
+                    if (nextend>mexons[i].end) {
+                        mexons[i].end=nextend;
+                        break; //no need to check next mexons
+                    }
+                } //while next mexons merge
+            } //possible mexons merge
+
+            j++; //check the next t exon
+        }//all vs all exon check loop
+        if (hasovl) {
+            GSeg seg;
+             //add the rest of the non-overlapping exons
+            for (int i=0;i<t->exons.Count();i++) {
+                seg.start=t->exons[i]->start;
+                seg.end=t->exons[i]->end;
+                if (!ovlexons.Exists(i)) mexons.Add(seg);
+                }
+            rnas_add(t);
+            // add to rnas
+            ((GTData*)t->uptr)->locus=this;
+            gseq_id=t->gseq_id;
+            }
+        return hasovl;
+    }
+
+    //simpler,basic adding of a mrna
+    void rnas_add(GffObj* t) {
+      rnas.Add(t);
+      // adjust start/end
+      //if (start==0 || start>t->start) start=t->start;
+      if (start==0) start=t->start;
+        else if (start>t->start) {
+          start=t->start;
+          }
+      if (end<t->end) end=t->end;
+      if (t_maxcov->covlen<t->covlen) t_maxcov=t;
+      if (strand==0) strand=t->strand;
+      if (t->ftype_id==gff_fid_mRNA) is_mrna=true;
+      }
+};
+
+class GenomicSeqData {
+  int gseq_id;
+ public:
+  const char* gseq_name;
+  GList<GffObj> gfs; //all non-transcript features -> usually gene features
+  GList<GffObj> rnas; //all transcripts on this genomic sequence
+  GList<GffLocus> loci; //all loci clusters
+  GList<GTData> tdata; //transcript data (uptr holder for all rnas loaded here)
+  //GenomicSeqData(int gid=-1):rnas(true,true,false),loci(true,true,true),
+  GenomicSeqData(int gid=-1):gfs(true, true, false),rnas((GCompareProc*)gfo_cmpByLoc),loci(true,true,false),
+       tdata(false,true,false) {
+  gseq_id=gid;
+  if (gseq_id>=0) 
+    gseq_name=GffObj::names->gseqs.getName(gseq_id);
+  
+  }
+  bool operator==(GenomicSeqData& d){
+    return gseq_id==d.gseq_id;
+  }
+  bool operator<(GenomicSeqData& d){
+    return (gseq_id<d.gseq_id);
+  }
+};
+
+int gseqCmpName(const pointer p1, const pointer p2);
+
+class GSpliceSite {
+ public:
+  char nt[3];
+  GSpliceSite(const char* c, bool revc=false) {
+    nt[2]=0;
+    if (c==NULL) {
+      nt[0]=0;
+      nt[1]=0;
+      return;
+      }
+    if (revc) {
+      nt[0]=toupper(ntComplement(c[1]));
+      nt[1]=toupper(ntComplement(c[0]));
+      }
+    else {
+      nt[0]=toupper(c[0]);
+      nt[1]=toupper(c[1]);
+      }
+    }
+
+  GSpliceSite(const char* intron, int intronlen, bool getAcceptor, bool revc=false) {
+    nt[2]=0;
+    if (intron==NULL || intronlen==0)
+       GError("Error: invalid intron or intron len for GSpliceSite()!\n");
+    const char* c=intron;
+    if (revc) {
+      if (!getAcceptor) c+=intronlen-2;
+      nt[0]=toupper(ntComplement(c[1]));
+      nt[1]=toupper(ntComplement(c[0]));
+      }
+    else { //on forward strand
+      if (getAcceptor) c+=intronlen-2;
+      nt[0]=toupper(c[0]);
+      nt[1]=toupper(c[1]);
+      }//forward strand
+    }
+
+  GSpliceSite(const char n1, const char n2) {
+    nt[2]=0;
+    nt[0]=toupper(n1);
+    nt[1]=toupper(n2);
+    }
+  bool canonicalDonor() {
+    return (nt[0]=='G' && (nt[1]=='C' || nt[1]=='T'));
+    }
+  bool operator==(GSpliceSite& c) {
+    return (c.nt[0]==nt[0] && c.nt[1]==nt[1]);
+    }
+  bool operator==(GSpliceSite* c) {
+    return (c->nt[0]==nt[0] && c->nt[1]==nt[1]);
+    }
+  bool operator==(const char* c) {
+    //return (nt[0]==toupper(c[0]) && nt[1]==toupper(c[1]));
+    //assumes given const nucleotides are uppercase already!
+    return (nt[0]==c[0] && nt[1]==c[1]);
+    }
+  bool operator!=(const char* c) {
+    //assumes given const nucleotides are uppercase already!
+    return (nt[0]!=c[0] || nt[1]!=c[1]);
+    }
+};
+
+struct GffLoader {
+  GStr fname;
+  FILE* f;
+  bool transcriptsOnly;
+  bool fullAttributes;
+  bool noExonAttrs;
+  bool mergeCloseExons;
+  bool showWarnings;
+  bool noPseudo;
+  void placeGf(GffObj* t, GenomicSeqData* gdata, bool doCluster=true, bool collapseRedundant=true,
+                                    bool matchAllIntrons=true, bool fuzzSpan=false);
+  void load(GList<GenomicSeqData>&seqdata, GFValidateFunc* gf_validate=NULL, 
+                      bool doCluster=true, bool doCollapseRedundant=true, 
+                      bool matchAllIntrons=true, bool fuzzSpan=false, bool forceExons=false);
+  GffLoader(const char* filename):fname(filename) {
+      f=NULL;
+      transcriptsOnly=true;
+      fullAttributes=false;
+      noExonAttrs=false;
+      mergeCloseExons=false;
+      showWarnings=false;
+      noPseudo=false;
+      if (fname=="-" || fname=="stdin") {
+         f=stdin;
+         fname="stdin";
+         }
+        else {
+          if ((f=fopen(fname.chars(), "r"))==NULL) {
+            GError("Error: cannot open gff file %s!\n",fname.chars());
+            }
+          }
+      }
+  ~GffLoader() {
+      if (f!=NULL && f!=stdin) fclose(f);
+      }
+};
+
+void printFasta(FILE* f, GStr& defline, char* seq, int seqlen=-1);
+
+//"position" a given coordinate x within a list of transcripts sorted by their start (lowest)
+//coordinate, using quick-search; the returned int is the list index of the closest *higher*
+//GffObj - i.e. starting right *ABOVE* the given coordinate
+//Convention: returns -1 if there is no such GffObj (i.e. last GffObj starts below x)
+int qsearch_rnas(uint x, GList<GffObj>& rnas);
+int qsearch_gloci(uint x, GList<GffLocus>& loci);
+
+GffObj* redundantTranscripts(GffObj& ti, GffObj&  tj, bool matchAllIntrons=true, bool fuzzSpan=false);
+
+//void loadGFF(FILE* f, GList<GenomicSeqData>& seqdata, const char* fname);
+
+void collectLocusData(GList<GenomicSeqData>& ref_data);
+
+#endif
diff --git a/src/GArgs.cpp b/src/GArgs.cpp
new file mode 100644
index 0000000..f3b72b9
--- /dev/null
+++ b/src/GArgs.cpp
@@ -0,0 +1,376 @@
+#include "GBase.h"
+#include "GArgs.h"
+#include <ctype.h>
+
+GArgs::GArgs(int argc, char* const argv[], const char* format, bool nodigitopts) {
+   /* format can be:
+      <string>{;|=} e.g. disable-test;PID=S= for --disable-test PID=50 (or --PID 50) S=3.5 etc.
+      <letter>[:]  e.g. p:hT  for -p testing (or -ptesting) -h -T
+   */
+const char* fstr=format;
+fmtcount=0;
+count=0;
+nonOptCount=0;
+nonOptPos=0;
+optPos=0;
+errarg=0;
+err_valmissing=false;
+args=NULL;
+fmt=NULL;
+_argc=argc;
+_argv=argv;
+int fmtlen=strlen(format);
+//---- first parse the format string
+while (fstr-format < fmtlen ) {
+  int l=strcspn(fstr, ";=:");
+  if (fstr[l]==0) { //end of string reached
+      //all previous chars are just switches:
+       GREALLOC(fmt, (fmtcount+l)*sizeof(fmtdef));
+       //store each switch
+       for (int i=0; i<l;i++) { 
+         fmt[fmtcount+i].longopt=NULL;
+         fmt[fmtcount+i].opt=fstr[i];
+         fmt[fmtcount+i].req_value = false;
+         fmt[fmtcount+i].code=fmtcount+i+1;
+         }
+       fmtcount+=l;
+       break;
+     }
+   else {
+     if (fstr[l]==':') {
+         //fstr[l-1] is an argument, but all the previous are just switches
+         GREALLOC(fmt, (fmtcount+l)*sizeof(fmtdef));
+         //store each switch AND the option
+         for (int i=0; i<l;i++) { 
+           fmt[fmtcount+i].longopt=NULL; //one char length
+           fmt[fmtcount+i].opt=fstr[i];
+           fmt[fmtcount+i].req_value = (i==l-1);
+           fmt[fmtcount+i].code=fmtcount+i+1;
+           }
+         fmtcount+=l;
+         }
+      else { // fstr[l]=='=' or ';' 
+         GREALLOC(fmt, (fmtcount+1)*sizeof(fmtdef));
+         fmt[fmtcount].longopt=Gstrdup(fstr, fstr+l-1);
+         fmt[fmtcount].opt=0;
+         fmt[fmtcount].req_value=(fstr[l]=='=');
+         fmt[fmtcount].code=fmtcount+1;
+         fmtcount++;
+         }
+     fstr+=l+1;
+     }
+  }
+ //-- now parse the arguments based on the given format specification
+ parseArgs(nodigitopts);
+ }
+ 
+int GArgs::parseArgs(bool nodigitopts) {
+  int p=1; //skip program name
+  int f=0;
+  while (p<_argc) {
+   if (_argv[p][0]=='-' && (_argv[p][1]==0 || _argv[p][1]!='-')) { 
+     //single-dash argument
+     int cpos=1;
+     char c=_argv[p][cpos];
+     if (c==0 || (nodigitopts && isdigit(c)) || 
+            (c=='.' && isdigit(_argv[p][cpos+1]))) { 
+        //special case: plain argument '-' or just a negative number
+        GREALLOC(args, (count+1)*sizeof(argdata));
+        args[count].opt=NULL;
+        args[count].fmti=-1;
+        if (c==0) {
+          GCALLOC(args[count].value, 2);
+          args[count].value[0]='-';
+          }
+         else { //negative number given
+          args[count].value=Gstrdup(_argv[p]);
+          }
+        count++;
+        nonOptCount++;
+        }
+      else { //single-dash argument or switch
+       COLLAPSED:
+        if ((f=validShortOpt(c))>=0) {
+          GREALLOC(args, (count+1)*sizeof(argdata));
+          GCALLOC(args[count].opt, 2);
+          args[count].opt[0]=c;
+          args[count].fmti=f;
+          if (!fmt[f].req_value) {//switch type
+            GCALLOC(args[count].value,1);//so getOpt() functions would not return NULL
+            count++;
+            // only switches can be grouped with some other switches or options
+            if (_argv[p][cpos+1]!='\0') {
+               cpos++;
+               c=_argv[p][cpos];
+               goto COLLAPSED;
+               }
+            }
+           else {
+              //single-dash argument followed by a value
+            if (_argv[p][cpos+1]=='\0') {
+                if (p+1<_argc && _argv[p+1][0]!=0) { //value is the whole next argument
+                   p++;
+                   args[count].value=Gstrdup(_argv[p]);
+                   }
+                  else {
+                   errarg=p;
+                   err_valmissing=true;
+                   return errarg;
+                   }
+                }
+               else { //value immediately follows the dash-option
+                args[count].value=Gstrdup(_argv[p]+cpos+1);
+                }
+            count++;
+            }
+          } //was validShortOpt
+         else { //option not found in format definition!
+           errarg=p;
+           return errarg;
+           }
+        }
+     } //-single-dash
+   else {//not a single-dash argument
+     char* ap=_argv[p];
+     bool is_longopt=false;
+     if (*ap=='-' && ap[1]=='-') {
+        is_longopt=true;
+        ap+=2;
+        }
+     char* e=strchr(ap+1,'=');
+     while (e!=NULL && *(e-1)=='\\') e=strchr(e,'=');
+     if (e==NULL && is_longopt) {
+        e=ap;
+        while (*e!=0 && *e!=' ') e++;
+        //e will be on eos or next space
+        }
+     if (e!=NULL && e>ap) {
+       //this must be a long option
+       //e is on eos, space or '='
+       if ((f=validLongOpt(ap,e-1))>=0) {
+            GREALLOC(args, (count+1)*sizeof(argdata));
+            args[count].opt=Gstrdup(ap,e-1);
+            args[count].fmti=f;
+            if (fmt[f].req_value) {
+               if (*e==0) {
+                   //value is the next argument
+                   if (p+1<_argc && _argv[p+1][0]!=0) {
+                      p++;
+                      args[count].value=Gstrdup(_argv[p]);
+                      }
+                    else {
+                      errarg=p;
+                      err_valmissing=true;
+                      return errarg;
+                      }
+                   }
+                else { //value is in the same argument
+                   //while (*e!=0 && (*e==' ' || *e=='=')) e++;
+                   if (*e=='=') e++;
+                   if (*e==0) {
+                      errarg=p;
+                      err_valmissing=true;
+                      return errarg;
+                      }
+                   args[count].value=Gstrdup(e);
+                   }
+               } //value required
+              else { //no value expected
+               GCALLOC(args[count].value,1); //do not return NULL
+               }
+            count++;
+            }
+          else { //error - this long argument not recognized
+           errarg=p;
+           return errarg;
+           }
+        }
+      else { //just a plain non-option argument
+       if (e==ap) { //i.e. just "--"
+          errarg=p;
+          return errarg;
+          }
+       GREALLOC(args, (count+1)*sizeof(argdata));
+       args[count].opt=NULL; //it's not an option
+       args[count].value=Gstrdup(_argv[p]);
+       args[count].fmti=-1;
+       count++;
+       nonOptCount++;
+       }
+     }
+   p++;//check next arg string
+   } //while arguments
+ return errarg;
+}
+
+void GArgs::printError(FILE* fout, const char* usage, bool exitProgram) {
+ if (errarg==0) return;
+ if (usage) fprintf(fout, "%s\n", usage);
+ if (err_valmissing) 
+     fprintf(fout, "Error: value required for option '%s'\n", _argv[errarg]);
+    else 
+     fprintf(fout, "Error: invalid argument '%s'\n", _argv[errarg]);
+ if (exitProgram)
+     exit(1);
+}
+
+void GArgs::printError(const char* usage, bool exitProgram) {
+ printError(stderr, usage, exitProgram);
+}
+
+void GArgs::printCmdLine(FILE* fout) {
+ if (_argv==NULL) return;
+ for (int i=0;i<_argc;i++) {
+   fprintf(fout, "%s%c", _argv[i], (i==_argc-1)?'\n':' ');
+   }
+}
+
+GArgs::GArgs(int argc, char* const argv[], const GArgsDef fmtrecs[], bool nodigitopts) {
+ fmtcount=0;
+ count=0;
+ nonOptCount=0;
+ nonOptPos=0;
+ optPos=0;
+ errarg=0;
+ err_valmissing=false;
+ args=NULL;
+ fmt=NULL;
+ _argc=argc;
+ _argv=argv;
+ if (fmtrecs==NULL) return;
+ 
+ const GArgsDef* frec=fmtrecs;
+ while ((frec->longopt || frec->opt) && fmtcount<255) {
+     fmtcount++;
+     frec=&(fmtrecs[fmtcount]);
+     }
+ GCALLOC(fmt, fmtcount*sizeof(fmtdef));
+ for (int i=0;i<fmtcount;i++) {
+   fmt[i].longopt=Gstrdup(fmtrecs[i].longopt); //do we need to use Gstrdup here?
+   fmt[i].opt=fmtrecs[i].opt;
+   fmt[i].req_value=fmtrecs[i].req_value;
+   fmt[i].code=fmtrecs[i].code;
+   }
+ parseArgs(nodigitopts);
+}
+
+
+GArgs::~GArgs() {
+ int i;
+ for (i=0; i<fmtcount; i++)
+    GFREE(fmt[i].longopt);
+ GFREE(fmt);
+ for (i=0; i<count; i++) {
+  GFREE(args[i].opt);
+  GFREE(args[i].value);
+  }
+ GFREE(args);
+}
+
+int GArgs::validShortOpt(char o) {
+ for (int i=0; i<fmtcount; i++) 
+  if (fmt[i].opt==o) return i;
+ return -1; 
+}
+
+int GArgs::validLongOpt(char* o, char* to) {
+ char* pstr=Gstrdup(o,to);
+ for (int i=0; i<fmtcount; i++) {
+  if (fmt[i].longopt && strcmp(fmt[i].longopt, pstr)==0) {
+       GFREE(pstr);
+       return i;
+       }
+  }
+ GFREE(pstr); 
+ return -1;
+}
+
+int GArgs::validOpt(int code) {
+ for (int i=0; i<fmtcount; i++) 
+   if (fmt[i].code==code) return i;
+ return -1;
+}
+
+
+int GArgs::isError() { // returns the offending argv position or 0 if no error
+ return errarg;
+ }
+
+char* GArgs::getOpt(const char* o) { /* retrieve the value for option o
+                   returns 
+                       NULL    if option not given at all
+                     !=NULL    if boolean option was given
+                     opt.value if value option was given
+                     */
+ for (int i=0; i<count; i++) 
+  if (args[i].opt!=NULL && strcmp(args[i].opt, o)==0) 
+           return args[i].value;
+ return NULL;
+}
+
+char* GArgs::getOpt(const char o) {
+ for (int i=0; i<count; i++) 
+  if (args[i].opt!=NULL && args[i].opt[0]==o && args[i].opt[1]=='\0') 
+      return args[i].value;
+ return NULL;
+}
+
+char* GArgs::getOpt(int c) {
+ for (int i=0; i<count; i++) 
+  if (args[i].fmti>=0 && fmt[args[i].fmti].code==c)
+      return args[i].value;
+ return NULL;
+}
+
+char* GArgs::getOptName(int c) {
+ for (int i=0; i<count; i++) 
+  if (args[i].fmti>=0 && fmt[args[i].fmti].code==c)
+      return args[i].opt;
+ return NULL;
+}
+
+
+int GArgs::startNonOpt(){ //reset iteration through non-option arguments
+   //returns the number of non-option arguments
+nonOptPos=0;
+return nonOptCount;   
+}
+   
+   
+char* GArgs::nextNonOpt() { //get the next non-dashed argument
+               //or NULL if no more 
+for (int i=nonOptPos;i<count;i++)
+ if (args[i].opt==NULL) {
+      nonOptPos=i+1;
+      return args[i].value;
+      }
+return NULL;
+}
+
+int GArgs::startOpt(){ //reset iteration through option arguments
+   //returns the number of option arguments
+optPos=0;
+return count-nonOptCount;
+}
+   
+   
+char* GArgs::nextOpt() { //get the next non-dashed argument
+               //or NULL if no more 
+for (int i=optPos;i<count;i++)
+ if (args[i].opt!=NULL) {
+      optPos=i+1;
+      return args[i].opt;
+      }
+return NULL;
+}
+
+int GArgs::nextCode() { //get the next non-dashed argument
+               //or NULL if no more 
+for (int i=optPos;i<count;i++)
+ if (args[i].opt!=NULL && args[i].fmti>=0) {
+      optPos=i+1;
+      return fmt[args[i].fmti].code;
+      }
+return 0; //must make sure that codes are > 0 for this to work properly
+}
+
diff --git a/src/GBase.cpp b/src/GBase.cpp
new file mode 100644
index 0000000..ed117f5
--- /dev/null
+++ b/src/GBase.cpp
@@ -0,0 +1,780 @@
+#include "GBase.h"
+#include <stdarg.h>
+#include <ctype.h>
+#include <sys/stat.h>
+
+#ifndef S_ISDIR
+#define S_ISDIR(mode)  (((mode) & S_IFMT) == S_IFDIR)
+#endif
+
+#ifndef S_ISREG
+#define S_ISREG(mode)  (((mode) & S_IFMT) == S_IFREG)
+#endif
+
+/*
+#ifdef _DEFINE_WIN32_FSEEKO
+ int fseeko(FILE *stream, off_t offset, int whence) {
+   
+   }
+#endif
+
+#ifdef _DEFINE_WIN32_FTELLO
+ off_t ftello(FILE *stream) {
+  
+  }
+#endif
+*/
+
+/*
+int saprintf(char **retp, const char *fmt, ...) {
+  va_list argp;
+  int len;
+  char *buf;
+
+  va_start(argp, fmt);
+  len = vsnprintf(NULL, 0, fmt, argp);
+  va_end(argp);
+  GMALLOC(buf, (len + 1));
+  if(buf == NULL)
+    {
+    *retp = NULL;
+    return -1;
+    }
+
+  va_start(argp, fmt);
+  vsnprintf(buf, len+1, fmt, argp);
+  va_end(argp);
+
+  *retp = buf;
+  return len;
+}
+*/
+
+//************************* Debug helpers **************************
+// Assert failed routine
+void GAssert(const char* expression, const char* filename, unsigned int lineno){
+  char msg[4096];
+  sprintf(msg,"%s(%d): ASSERT(%s) failed.\n",filename,lineno,expression);
+  fprintf(stderr,"%s",msg);
+  //abort();
+  }
+// Error routine (prints error message and exits!)
+void GError(const char* format,...){
+  #ifdef __WIN32__
+    char msg[4096];
+    va_list arguments;
+    va_start(arguments,format);
+    vsprintf(msg,format,arguments);
+    va_end(arguments);
+    OutputDebugString(msg);
+    fprintf(stderr,"%s",msg); // if a console is available
+    MessageBox(NULL,msg,NULL,MB_OK|MB_ICONEXCLAMATION|MB_APPLMODAL);
+  #else
+    va_list arguments;
+    va_start(arguments,format);
+    vfprintf(stderr,format,arguments);
+    va_end(arguments);
+    #ifdef DEBUG
+     // modify here if you want a core dump
+     abort();
+    #endif
+  #endif
+    exit(1);
+  }
+  
+// Warning routine (just print message without exiting)
+void GMessage(const char* format,...){
+  char msg[4096];
+  va_list arguments;
+  va_start(arguments,format);
+  vsprintf(msg,format,arguments);
+  va_end(arguments);
+  #ifdef __WIN32__
+    OutputDebugString(msg);
+  #endif
+  fprintf(stderr,"%s",msg);fflush(stderr);
+  }
+
+/*************** Memory management routines *****************/
+// Allocate memory
+bool GMalloc(pointer* ptr,unsigned long size){
+  //GASSERT(ptr);
+  if (size!=0) *ptr=malloc(size);
+  return *ptr!=NULL;
+  }
+
+// Allocate cleaned memory (0 filled)
+bool GCalloc(pointer* ptr,unsigned long size){
+  GASSERT(ptr);
+  *ptr=calloc(size,1);
+  return *ptr!=NULL;
+  }
+
+// Resize memory
+bool GRealloc(pointer* ptr,unsigned long size){
+  //GASSERT(ptr);
+  if (size==0) {
+    GFree(ptr);
+    return true;
+    }
+  if (*ptr==NULL) {//simple malloc
+   void *p=malloc(size);
+   if (p != NULL) {
+     *ptr=p;
+     return true;
+     }
+    else return false;
+   }//malloc
+  else {//realloc
+   void *p=realloc(*ptr,size);
+   if (p) {
+       *ptr=p;
+       return true;
+       }
+   return false;
+   }
+ }
+// Free memory, resets ptr to NULL afterward
+void GFree(pointer* ptr){
+  GASSERT(ptr);
+  if (*ptr) free(*ptr);
+  *ptr=NULL;
+  }
+
+char* Gstrdup(const char* str) {
+  if (str==NULL) return NULL;
+  char *copy=NULL;
+  GMALLOC(copy, strlen(str)+1);
+  strcpy(copy,str);
+  return copy;
+  }
+
+char* newEmptyStr() {
+  char* zs=NULL;
+  GMALLOC(zs,1);
+  zs[0]=0;
+  return zs;
+}
+
+char* Gstrdup(const char* sfrom, const char* sto) {
+  if (sfrom==NULL || sto==NULL) return NULL;
+  char *copy=NULL;
+  if (sfrom[0]==0) return newEmptyStr();
+  GMALLOC(copy, sto-sfrom+2);
+  strncpy(copy, sfrom, sto-sfrom+1);
+  copy[sto-sfrom+1]=0;
+  return copy;
+  }
+
+int Gstrcmp(const char* a, const char* b, int n) {
+ if (a==NULL || b==NULL) {
+   return a==NULL ? -1 : 1;
+   }
+ else {
+   if (n<0) return strcmp(a,b);
+       else return strncmp(a,b,n);
+ }
+
+}
+
+int Gstricmp(const char* a, const char* b, int n) {
+ if (a==NULL || b==NULL) return a==NULL ? -1 : 1;
+ register int ua, ub;
+ if (n<0) {
+   while ((*a!=0) && (*b!=0)) {
+    ua=tolower((unsigned char)*a);
+    ub=tolower((unsigned char)*b);
+    a++;b++;
+    if (ua!=ub) return ua < ub ? -1 : 1;
+    }
+    return (*a == 0) ? ( (*b == 0) ? 0 : -1 ) : 1 ;
+  }
+ else {
+   while (n && (*a!=0) && (*b!=0)) {
+    ua=tolower((unsigned char)*a);
+    ub=tolower((unsigned char)*b);
+    a++;b++;n--;
+    if (ua!=ub) return ua < ub ? -1 : 1;
+    }
+    //return (*a == 0) ? ( (*b == 0) ? 0 : -1 ) : 1 ;
+   if (n==0) return 0;
+   else { return (*a == 0) ? ( (*b == 0) ? 0 : -1 ) : 1 ; }
+  }
+}
+
+int strsplit(char* str, char** fields, int maxfields, const char* delim) {
+ //splits by placing 0 where delim chars are found, setting fields[] to the beginning
+ //of each field (stopping after maxfields); returns number of fields parsed
+ int tidx=0;
+ bool afterdelim=true;
+ int i=0;
+ while (str[i]!=0 && tidx<maxfields) {
+    if (afterdelim) {
+        fields[tidx]=str+i;
+        tidx++;
+        }
+    afterdelim=false;
+    if (chrInStr(str[i],(char*)delim)) {
+        str[i]=0;
+        i++;
+        while (str[i]!=0 && chrInStr(str[i], (char*)delim)) i++;
+        afterdelim=true;
+        continue;
+        }
+    i++;
+    }
+ return tidx;
+}
+
+int strsplit(char* str, char** fields, int maxfields, const char delim) {
+  //splits by placing 0 where delim is found, setting fields[] to the beginning
+  //of each field (stopping after maxfields); returns number of fields parsed
+  int tidx=0;
+  bool afterdelim=true;
+  int i=0;
+  while (str[i]!=0 && tidx<maxfields) {
+     if (afterdelim) {
+         fields[tidx]=str+i;
+         tidx++;
+         }
+     afterdelim=false;
+     if (str[i]==delim) {
+         str[i]=0;
+         i++;
+         while (str[i]!=0 && str[i]==delim) i++;
+         afterdelim=true;
+         continue;
+         }
+     i++;
+     }
+  return tidx;
+}
+
+int strsplit(char* str, char** fields, int maxfields) {
+  //splits by placing 0 where delim is found, setting fields[] to the beginning
+  //of each field (stopping after maxfields); returns number of fields parsed
+  int tidx=0;
+  bool afterdelim=true;
+  int i=0;
+  while (str[i]!=0 && tidx<maxfields) {
+     if (afterdelim) {
+         fields[tidx]=str+i;
+         tidx++;
+         }
+     afterdelim=false;
+     if (str[i]==' ' || str[i]=='\t') {
+         str[i]=0;
+         i++;
+         while (str[i]!=0 && (str[i]=='\t' || str[i]==' ')) i++;
+         afterdelim=true;
+         continue;
+         }
+     i++;
+     }
+  return tidx;
+}
+
+
+char* Gsubstr(const char* str, char* from, char* to) {
+ //extract (and allocate) a substring, including boundaries (from/to)
+ if (str==NULL || from==NULL) return NULL;
+ if (from[0]==0 || str[0]==0) return newEmptyStr();
+ if (from<str) return NULL;
+ if (to==NULL) {
+    to=from;
+    while (to[1]) to++;
+    }
+ if (to<from) return newEmptyStr();
+ int newlen=to-from+1;
+ char* subs;
+ GMALLOC(subs, newlen);
+ memcpy(subs, str, newlen-1);
+ subs[newlen]='\0';
+ return subs;
+ }
+
+char* replaceStr(char* &str, char* newvalue) {
+ if (str!=NULL) GFREE(str);
+ if (newvalue==NULL) { return NULL; }
+ GMALLOC(str, strlen(newvalue)+1);
+ strcpy(str,newvalue);
+ return str;
+ }
+
+void* Gmemscan(void *mem, unsigned int len,
+                   void *part, unsigned int partlen) {
+char* p;
+unsigned int restlen=len-partlen+1;
+void* oldp=mem;
+while ( (p=(char*)memchr(oldp, ((char*)part)[0], restlen))!=NULL) {
+  //located first char, try to match the rest:
+  p++;
+  if (memcmp(p, &((char*)part)[1], partlen-1)==0) return p-1;
+  //no string match, prepare next iteration
+  restlen-=(p-(char*)oldp);
+  oldp=p;
+  }//while
+return NULL;
+}
+
+//rindex function is missing on some platforms ?
+char* rstrchr(char* str, char ch) {  /* returns a pointer to the rightmost
+  occurence of ch in str  */
+ char *p;
+ if (str==NULL) return NULL;
+ p=str+strlen(str)-1;
+ while (p>=str) {
+    if (*p==ch) return p;
+    p--;
+    }
+ return NULL;
+ }
+
+
+/* DOS/UNIX safer fgets : reads a text line from a (binary) file and
+  update the file position accordingly and the buffer capacity accordingly.
+  The given buf is resized to read the entire line in memory
+    -- even when it's abnormally long
+  */
+char* fgetline(char* & buf, int& buf_cap, FILE *stream, off_t* f_pos, int* linelen) {
+  //reads a char at a time until \n and/or \r are encountered
+  int i=0;
+  int c=0;
+  off_t fpos=(f_pos!=NULL) ? *f_pos : 0;
+  while ((c=getc(stream))!=EOF) {
+    if (i>=buf_cap-1) {
+       buf_cap+=1024;
+       GREALLOC(buf, buf_cap);
+       }
+    if (c=='\n' || c=='\r') {
+       if (c=='\r') {
+         if ((c=getc(stream))!='\n') ungetc(c,stream);
+                                else fpos++;
+         }
+       fpos++;
+       break;
+       }
+    fpos++;
+    buf[i]=(char)c;
+    i++;
+    } //while i<buf_cap-1
+  if (linelen!=NULL) *linelen=i;
+  if (f_pos!=NULL) *f_pos=fpos;
+  if (c==EOF && i==0) return NULL;
+  buf[i]='\0';
+  return buf;
+  }
+
+char* GLineReader::getLine(FILE* stream, off_t& f_pos) {
+   if (pushed) { pushed=false; return buf; }
+   //reads a char at a time until \n and/or \r are encountered
+   len=0;
+   int c=0;
+   while ((c=getc(stream))!=EOF) {
+     if (len>=allocated-1) {
+        allocated+=1024;
+        GREALLOC(buf, allocated);
+     }
+     if (c=='\n' || c=='\r') {
+       buf[len]='\0';
+       if (c=='\r') { //DOS file -- special case
+         if ((c=getc(stream))!='\n') ungetc(c,stream);
+                                else f_pos++;
+         }
+       f_pos++;
+       lcount++;
+       return buf;
+       }
+     f_pos++;
+     buf[len]=(char)c;
+     len++;
+     } //while i<buf_cap-1
+   if (c==EOF) {
+     isEOF=true;
+     if (len==0) return NULL;
+     }
+   buf[len]='\0';
+   lcount++;
+   return buf;
+}
+
+
+//strchr but with a set of chars instead of only one
+char* strchrs(const char* s, const char* chrs) {
+  if (s==NULL || chrs==NULL || *chrs=='\0' || *s=='\0')
+         return NULL;
+  unsigned int l=strlen(s);
+  unsigned int r=strcspn(s, chrs);
+  if (r==l) return NULL;
+  return ((char*)s+r);
+}
+
+char* upCase(const char* str) {
+ if (str==NULL) return NULL;
+ int len=strlen(str);
+ char* upstr;
+ GMALLOC(upstr, len+1);
+ upstr[len]='\0';
+ for (int i=0;i<len;i++) upstr[i]=toupper(str[i]);
+ return upstr;
+ }
+
+char* loCase(const char* str) {
+ if (str==NULL) return NULL;
+ int len=strlen(str);
+ char* lostr;
+ GMALLOC(lostr, len+1);
+ lostr[len]='\0';
+ for (int i=0;i<len;i++) lostr[i]=tolower(str[i]);
+ return lostr;
+ }
+
+char* strlower(char * str) {//changes string in place
+  if (str==NULL) return NULL;
+  int i=0;
+  while (str[i]!=0) { str[i]=tolower(str[i]); i++; }
+  return str;
+}
+
+char* strupper(char * str) {//changes string in place
+  if (str==NULL) return NULL;
+  int i=0;
+  while (str[i]!=0) { str[i]=toupper(str[i]); i++; }
+  return str;
+}
+
+
+
+//test if a char is in a given string (set)
+bool chrInStr(char c, const char* str) {
+ if (str==NULL || *str=='\0') return false;
+ for (const char* p=str; (*p)!='\0'; p++) {
+   if ((*p)==c) return true;
+   }
+ return false;
+ }
+
+
+
+char* rstrfind(const char* str, const char* substr) {
+/* like rindex() for a string */
+ int l,i;
+ if (str==NULL || *str=='\0') return NULL;
+ if (substr==NULL || *substr=='\0') return NULL;
+ l=strlen(substr);
+ char* p=(char*)str+strlen(str)-l;
+   //rightmost position that could match
+
+ while (p>=str) {
+    for (i=0; i<l && *(p+i) == *(substr+i); i++) ;
+    if (i==l) return p; //found!
+    p--;
+    }
+ return NULL;
+}
+
+
+char* strifind(const char* str,  const char* substr) {
+ // the case insensitive version of strstr -- finding a string within a strin
+  int l,i;
+  if (str==NULL || *str==0) return NULL;
+  if (substr==NULL || *substr==0) return NULL;
+  l=strlen(substr);
+  char* smax=(char*)str+strlen(str)-l;
+  //rightmost position that could match
+  char* p=(char*)str;
+  while (p<=smax) {
+     for (i=0; i<l && tolower(*(p+i))==tolower(*(substr+i)); i++) ;
+     if (i==l) return p; //found!
+     p++;
+     }
+  return NULL;
+}
+
+
+
+// tests if string s has the given prefix
+bool startsWith(const char* s, const char* prefix) {
+ if (prefix==NULL || s==NULL) return false;
+ int i=0;
+ while (prefix[i]!='\0' && prefix[i]==s[i]) i++;
+ return (prefix[i]=='\0');
+ }
+
+// tests if string s ends with given suffix
+bool endsWith(const char* s, const char* suffix) {
+ if (suffix==NULL || s==NULL) return false;
+ if (suffix[0]==0) return true; //special case: empty suffix
+ int j=strlen(suffix)-1;
+ int i=strlen(s)-1;
+ if (i<j) return false;
+ while (j>=0 && s[i]==suffix[j]) { i--; j--; }
+ return (j==-1);
+ }
+
+
+char* reverseChars(char* str, int slen) {
+  if (slen==0) slen=strlen(str);
+  int l=0;
+  int r=slen-1;
+  char c;
+  while (l<r) {
+     c=str[l];str[l]=str[r];
+     str[r]=c;
+     l++;r--;
+     }
+  return str;
+}
+
+
+char* rstrstr(const char* rstart, const char *lend, const char* substr) {  /*like strstr, but starts searching
+ from right end, going up to lend and returns a pointer to the last (right)
+ matching character in str */
+ char *p;
+ int l,i;
+ l=strlen(substr);
+ p=(char*)rstart-l+1;
+ while (p>=lend) {
+    for (i=0;i<l;i++) if (*(p+i) != *(substr+i)) break;
+    if (i==l) return p+l-1;
+    p--;
+    }
+ return NULL;
+ }
+
+
+//hash function used for strings in GHash
+int strhash(const char* str){
+  register int h=0;
+  register int g;
+  while (*str) {
+    h=(h<<4)+*str++;
+    g=h&0xF0000000;
+    if(g) h^=g>>24;
+    h&=0x0fffffff;
+    }
+  GASSERT(h<=0x0fffffff);
+  return h;
+  }
+
+// removes the last part (file or directory name) of a full path
+// this is a destructive operation for the given string!!!
+// the trailing '/' is guaranteed to be there
+void delFileName(char* filepath) {
+ char *p, *sep;
+ if (filepath==NULL) return;
+ for (p=filepath, sep=filepath;*p!='\0';p++)
+     if (*p=='/' || *p=='\\') sep=p+1;
+ *sep='\0'; // truncate filepath
+}
+
+// returns a pointer to the last file or directory name in a full path
+const char* getFileName(const char* filepath) {
+ const char *p, *sep;
+ if (filepath==NULL) return NULL;
+ for (p=filepath, sep=filepath;*p!='\0';p++)
+     if (*p=='/' || *p=='\\') sep=p+1;
+ return sep;
+}
+
+// returns a pointer to the file "extension" part in a filename
+const char* getFileExt(const char* filepath) {
+ const char *p, *dp, *sep;
+ if (filepath==NULL) return NULL;
+ for (p=filepath, dp=filepath, sep=filepath;*p!='\0';p++) {
+     if (*p=='.') dp=p+1;
+       else if (*p=='/' || *p=='\\') 
+                  sep=p+1;
+     }
+ return (dp>sep) ? dp : NULL ;
+}
+
+int fileExists(const char* fname) {
+  struct stat stFileInfo;
+  int r=0;
+  // Attempt to get the file attributes
+  int fs = stat(fname,&stFileInfo);
+  if (fs == 0) {
+      r=3;
+      // We were able to get the file attributes
+      // so the file obviously exists.
+      if (S_ISREG (stFileInfo.st_mode)) {
+         r=2;
+         }
+      if (S_ISDIR (stFileInfo.st_mode)) {
+          r=1;
+          }
+      }
+  return r;
+}
+
+/*bool fileExists(const char* filepath) {
+  if (filepath==NULL) return false;
+  FILE* ft=fopen(filepath, "rb");
+  if (ft==NULL) return false;
+  fclose(ft);
+  return true;
+}
+*/
+int64 fileSize(const char* fpath) {
+  struct stat results;
+  if (stat(fpath, &results) == 0)
+      // The size of the file in bytes is in
+      return (int64)results.st_size;
+  else
+      // An error occurred
+    //GMessage("Error at stat(%s)!\n", fpath);
+    return 0;
+}
+
+bool parseNumber(char* &p, double& v) {
+ //skip any spaces..
+ while (*p==' ' || *p=='\t') p++;
+ char* start=p;
+ /*if (*p=='-') p++;
+       else if (*p=='+') { p++;start++; }*/
+
+ /* while ((*p>='1' && *p<='9') || *p=='0' ||
+          *p=='.' || *p=='-' || tolower(*p)=='e') p++; */
+ int numlen=strspn(start, "0123456789eE.-+");
+ p=start+numlen;
+ //now p is on a non-digit;
+ if (*start=='-' && p==start+1) return false;
+ char saved=*p;
+ *p='\0';
+ char* endptr=p;
+ v=strtod(start,&endptr);
+ *p=saved;
+ if (endptr!=p) return false;
+ return true;
+}
+
+
+bool parseDouble(char* &p, double& v) {
+ return parseNumber(p,v);
+}
+
+bool parseInt(char* &p, int& i) {
+ while (*p==' ' || *p=='\t') p++;
+ char* start=p;
+ if (*p=='-') p++;
+       else if (*p=='+') { p++;start++; }
+ while ((*p>='1' && *p<='9') || *p=='0') p++;
+ //now p is on a non-digit;
+ if (*start=='-' && p==start+1) return false;
+ char saved=*p;
+ *p='\0';
+ char* endptr=p;
+ long l=strtol(start,&endptr,10);
+ i=(int)l;
+ *p=saved;
+ if (endptr!=p || i!=l) return false;
+ return true;
+}
+
+bool parseUInt(char* &p, uint& i) {
+ while (*p==' ' || *p=='\t') p++;
+ char* start=p;
+ if (*p=='-') return false;
+       else if (*p=='+') { p++;start++; }
+ while ((*p>='1' && *p<='9') || *p=='0') p++;
+ //now p is on a non-digit;
+ if (*start=='-' && p==start+1) return false;
+ char saved=*p;
+ *p='\0';
+ char* endptr=p;
+ unsigned long l=strtoul(start,&endptr,10);
+ i=(uint) l;
+ *p=saved;
+ if (endptr!=p || i!=l) return false;
+ return true;
+}
+
+bool parseHex(char* &p, uint& i) {
+ //skip initial spaces/prefix
+ while (*p==' ' || *p=='\t' || *p=='0' || *p=='x') p++;
+ char* start=p;
+ if (*p=='-') return false;
+       else if (*p=='+') { p++;start++; }
+ while (isxdigit(*p)) p++;
+ //now p is on a non-hexdigit;
+ if (p==start+1) return false;
+ char saved=*p;
+ *p='\0';
+ char* endptr=p;
+ unsigned long l=strtoul(start,&endptr,16);
+ i=(uint) l;
+ *p=saved;
+ if (endptr!=p || i!=l) return false;
+ return true;
+}
+
+//write a formatted fasta record, fasta formatted
+void writeFasta(FILE *fw, const char* seqid, const char* descr,
+        const char* seq, int linelen, int seqlen) {
+  fflush(fw);
+  // write header line only if given!
+  if (seqid!=NULL) {
+    if (descr==NULL || descr[0]==0)
+             fprintf(fw,">%s\n",seqid);
+        else fprintf(fw,">%s %s\n",seqid, descr);
+    }
+  fflush(fw);
+  if (seq==NULL || *seq==0) return; //nothing to print
+  if (linelen==0) { //unlimited line length: write the whole sequence on a line
+     if (seqlen>0)
+             fwrite((const void*)seq, 1, seqlen,fw);
+        else fprintf(fw,"%s",seq);
+     fprintf(fw,"\n");
+     fflush(fw);
+     return;
+     }
+  int ilen=0;
+  if (seqlen>0) { //seq length given, so we know when to stop
+    for (int i=0; i < seqlen; i++, ilen++) {
+            if (ilen == linelen) {
+                 fputc('\n', fw);
+                 ilen = 0;
+                 }
+            fputc(seq[i], fw);
+            }
+    fputc('\n', fw);
+    }
+  else { //seq length not given, stop when 0 encountered
+    for (int i=0; seq[i]!=0; i++, ilen++) {
+            if (ilen == linelen) {
+                 fputc('\n', fw);
+                 ilen = 0;
+                 }
+            fputc(seq[i], fw);
+            } //for
+    fputc('\n', fw);
+    }
+  fflush(fw);
+ }
+
+char* commaprint(uint64 n) {
+  int comma = '\0';
+  char retbuf[48];
+  char *p = &retbuf[sizeof(retbuf)-1];
+  int i = 0;
+  if(comma == '\0') {
+    /* struct lconv *lcp = localeconv();
+    if(lcp != NULL) {
+      if(lcp->thousands_sep != NULL &&
+        *lcp->thousands_sep != '\0')
+        comma = *lcp->thousands_sep;
+      else  */
+                          comma = ',';
+     // }
+    }
+  *p = '\0';
+  do {
+    if(i%3 == 0 && i != 0)
+      *--p = comma;
+    *--p = '0' + n % 10;
+    n /= 10;
+    i++;
+  } while(n != 0);
+  return p;
+}
diff --git a/src/GFaSeqGet.cpp b/src/GFaSeqGet.cpp
new file mode 100644
index 0000000..ca722ca
--- /dev/null
+++ b/src/GFaSeqGet.cpp
@@ -0,0 +1,319 @@
+#include "GFaSeqGet.h"
+#include "gdna.h"
+#include <ctype.h>
+
+void GSubSeq::setup(uint sstart, int slen, int sovl, int qfrom, int qto, uint maxseqlen) {
+     if (sovl==0) {
+       GFREE(sq);
+       sqstart=sstart;
+       uint max_len=(maxseqlen>0) ? maxseqlen : MAX_FASUBSEQ;
+       sqlen = (slen==0 ? max_len : slen);
+       GMALLOC(sq, sqlen);
+       return;
+       }
+  //overlap -- copy the overlapping region
+  char* newsq=NULL;
+  GMALLOC(newsq, slen);
+  memcpy((void*)&newsq[qto], (void*)&sq[qfrom], sovl);
+  GFREE(sq);
+  sq=newsq;
+  sqstart=sstart;
+  sqlen=slen;
+}
+
+void GFaSeqGet::finit(const char* fn, off_t fofs, bool validate) {
+ fh=fopen(fn,"rb");
+ if (fh==NULL) {
+   GError("Error (GFaSeqGet) opening file '%s'\n",fn);
+   }
+ fname=Gstrdup(fn);
+ initialParse(fofs, validate);
+ lastsub=new GSubSeq();
+}
+
+GFaSeqGet::GFaSeqGet(const char* faname, uint seqlen, off_t fseqofs, int l_len, int l_blen) {
+//for GFastaIndex use mostly -- the important difference is that
+//the file offset is to the sequence, not to the defline
+  fh=fopen(faname,"rb");
+  if (fh==NULL) {
+    GError("Error (GFaSeqGet) opening file '%s'\n",faname);
+    }
+  fname=Gstrdup(faname);
+  line_len=l_len;
+  line_blen=l_blen;
+  seq_len=seqlen;
+  if (line_blen<line_len)
+       GError("Error (GFaSeqGet): invalid line length info (len=%d, blen=%d)\n",
+              line_len, line_blen);
+  fseqstart=fseqofs;
+  lastsub=new GSubSeq();
+}
+
+GFaSeqGet::GFaSeqGet(FILE* f, off_t fofs, bool validate) {
+  fname=NULL;
+  fseqstart=0;
+  if (f==NULL) GError("Error (GFaSeqGet) : null file handle!\n");
+  seq_len=0;
+  fh=f;
+  initialParse(fofs, validate);
+  lastsub=new GSubSeq();
+}
+
+void GFaSeqGet::initialParse(off_t fofs, bool checkall) {
+ static const char gfa_ERRPARSE[]="Error (GFaSeqGet): invalid FASTA file format.\n";
+ if (fofs!=0) { fseeko(fh,fofs,SEEK_SET); } //e.g. for offsets provided by cdbyank
+ //read the first two lines to determine fasta parameters
+ fseqstart=fofs;
+ int c=getc(fh);
+ fseqstart++;
+ if (c!='>') GError("Error (GFaSeqGet): not a fasta header?\n");
+ while ((c=getc(fh))!=EOF) {
+   fseqstart++;
+   if (c=='\n' || c=='\r') { break; } //end of defline
+   }
+
+ if (c==EOF) GError(gfa_ERRPARSE);
+ line_len=0;
+ int lendlen=0;
+ while ((c=getc(fh))!=EOF) {
+  if (c=='\n' || c=='\r') { //end of line encountered
+     if (line_len>0) { //end of the first "sequence" line
+        lendlen++;
+        break;
+        }
+      else {// another EoL char at the end of defline
+        fseqstart++;
+        continue;
+        }
+     }// end-of-line characters
+  line_len++;
+  }
+ //we are at the end of first sequence line
+ while ((c=getc(fh))!=EOF) {
+   if (c=='\n' || c=='\r') lendlen++;
+      else {
+       ungetc(c,fh);
+       break;
+       }
+   }
+ line_blen=line_len+lendlen;
+ if (c==EOF) return;
+ // -- you don't need to check it all if you're sure it's safe
+ if (checkall) { //validate the rest of the FASTA record
+   int llen=0; //last line length
+   int elen=0; //length of last line ending
+   bool waseol=true;
+   while ((c=getc(fh))!=EOF) {
+     if (c=='>' && waseol) { ungetc(c,fh); break; }
+     if (c=='\n' ||  c=='\r') {
+        // eol char
+        elen++;
+        if (waseol) continue; //2nd eol char
+        waseol=true;
+        elen=1;
+        continue;
+        }
+     if (c<=32) GError(gfa_ERRPARSE); //invalid character encountered
+     //--- on a seq char here:
+     if (waseol) {//beginning of a seq line
+       if (elen && (llen!=line_len || elen!=lendlen))
+           //GError(gfa_ERRPARSE);
+         GError("Error: invalid FASTA format for GFaSeqGet; make sure that\n\
+  the sequence lines have the same length (except for the last line)");
+       waseol=false;
+       llen=0;
+       elen=0;
+       }
+     llen++;
+     } //while reading chars
+   }// FASTA checking was requested
+ fseeko(fh,fseqstart,SEEK_SET);
+}
+
+const char* GFaSeqGet::subseq(uint cstart, int& clen) {
+  //cstart is 1-based genomic coordinate within current fasta sequence
+   int maxlen=(seq_len>0)?seq_len : MAX_FASUBSEQ;
+   //GMessage("--> call: subseq(%u, %d)\n", cstart, clen);
+  if (clen>maxlen) {
+    GMessage("Error (GFaSeqGet): subsequence cannot be larger than %d\n", maxlen);
+    return NULL;
+    }
+  if (seq_len>0 && clen+cstart-1>seq_len) {
+     GMessage("Error (GFaSeqGet): end coordinate (%d) cannot be larger than sequence length %d\n", clen+cstart-1, seq_len);
+     }
+  if (lastsub->sq==NULL || lastsub->sqlen==0) {
+    lastsub->setup(cstart, clen, 0,0,0,seq_len);
+    loadsubseq(cstart, clen);
+    lastsub->sqlen=clen;
+    return (const char*)lastsub->sq;
+    }
+  //allow extension up to MAX_FASUBSEQ
+  uint bstart=lastsub->sqstart;
+  uint bend=lastsub->sqstart+lastsub->sqlen-1;
+  uint cend=cstart+clen-1;
+  int qlen=0; //only the extra len to be allocated/appended/prepended
+  uint qstart=cstart; //start coordinate of the new seq block of length qlen to be read from file
+  int newlen=0; //the new total length of the buffered sequence lastsub->sq
+  int kovl=0;
+  int czfrom=0;//0-based offsets for copying a previously read sequence chunk
+  int czto=0;
+  uint newstart=cstart;
+  if (cstart>=bstart && cend<=bend) { //new reg contained within existing buffer
+     return (const char*) &(lastsub->sq[cstart-bstart]) ;
+    }
+  //extend downward
+  uint newend=GMAX(cend, bend);
+  if (cstart<bstart) { //requested start < old buffer start
+    newstart=cstart;
+    newlen=(newend-newstart+1);
+    if (newlen>MAX_FASUBSEQ) {
+       newlen=MAX_FASUBSEQ;
+       newend=cstart+newlen-1; //keep newstart, set newend
+       }
+    qlen=bstart-cstart;
+    if (newend>bstart) { //overlap
+       if (newend>bend) {// new region is larger & around the old one - so we have two regions to update
+         kovl=bend-bstart+1;
+         czfrom=0;
+         czto=bstart-cstart;
+         lastsub->setup(newstart, newlen, kovl, czfrom, czto, seq_len); //this should realloc and copy the kovl subseq
+         qlen=bstart-cstart;
+         loadsubseq(newstart, qlen);
+         qlen=newend-bend;
+         int toread=qlen;
+         loadsubseq(bend+1, qlen);
+         clen-=(toread-qlen);
+         lastsub->sqlen=clen;
+         return (const char*)lastsub->sq;
+         }
+        //newend<=bend
+       kovl=newend-bstart+1;
+       }
+     else { //no overlap with previous buffer
+       if (newend>bend) kovl=bend-bstart+1;
+                   else kovl=newend-bstart+1;
+       }
+     qlen=bstart-cstart;
+     czfrom=0;
+     czto=qlen;
+    } //cstart<bstart
+   else { //cstart>=bstart, possibly extend upwards
+    newstart=bstart;
+    newlen=(newend-newstart+1);
+    if (newlen>MAX_FASUBSEQ) {
+       newstart=bstart+(newlen-MAX_FASUBSEQ);//keep newend, assign newstart
+       newlen=MAX_FASUBSEQ;
+       if (newstart<=bend) { //overlap with old buffer
+          kovl=bend-newstart+1;
+          czfrom=newstart-bstart;
+          czto=0;
+          }
+       else { //not overlapping old buffer
+         kovl=0;
+         }
+       } //newstart reassigned
+    else { //we can extend the buffer to include the old one
+      qlen=newend-bend; //how much to read from file
+      qstart=bend+1;
+      kovl=bend-bstart+1;
+      czfrom=0;
+      czto=0;
+      }
+    }
+  lastsub->setup(newstart, newlen, kovl, czfrom, czto, seq_len); //this should realloc but copy any overlapping region
+  lastsub->sqlen-=qlen; //appending may result in a premature eof
+  int toread=qlen;
+  loadsubseq(qstart, qlen); //read the missing chunk, if any
+  clen-=(toread-qlen);
+  lastsub->sqlen+=qlen;
+  return (const char*)(lastsub->sq+(cstart-newstart));
+}
+
+char* GFaSeqGet::copyRange(uint cstart, uint cend, bool revCmpl, bool upCase) {
+  if (cstart>cend) { Gswap(cstart, cend); }
+  int clen=cend-cstart+1;
+  const char* gs=subseq(cstart, clen);
+  if (gs==NULL) return NULL;
+  char* r=NULL;
+  GMALLOC(r,clen+1);
+  r[clen]=0;
+  memcpy((void*)r,(void*)gs, clen);
+  if (revCmpl) reverseComplement(r,clen);
+  if (upCase) {
+       for (int i=0;i<clen;i++)
+            r[i]=toupper(r[i]);
+       }
+  return r;
+ }
+
+const char* GFaSeqGet::loadsubseq(uint cstart, int& clen) {
+  //assumes enough lastsub->sq space allocated previously
+  //only loads the requested clen chars from file, at offset &lastsub->sq[cstart-lastsub->sqstart]
+  int sofs=cstart-lastsub->sqstart;
+  int lendlen=line_blen-line_len;
+  char* seqp=lastsub->sq+sofs;
+  //find the proper file offset and read the appropriate lines
+  uint seqofs=cstart-1;
+  uint startlno = seqofs/line_len;
+  int lineofs = seqofs % line_len;
+  off_t fstart=fseqstart + (startlno*line_blen);
+  fstart+=lineofs;
+
+  fseeko(fh, fstart, SEEK_SET);
+  int toread=clen;
+  int maxlen=(seq_len>0)? seq_len-cstart+1 : MAX_FASUBSEQ ;
+  if (toread==0) toread=maxlen; //read max allowed, or to the end of file
+  int actualrlen=0;
+  int sublen=0;
+  if (lineofs>0) { //read the partial first line
+    int reqrlen=line_len-lineofs;
+    if (reqrlen>toread) reqrlen=toread; //in case we need to read just a few chars
+    actualrlen=fread((void*)seqp, 1, reqrlen, fh);
+    if (actualrlen<reqrlen) { //eof reached prematurely
+      while (seqp[actualrlen-1]=='\n' || seqp[actualrlen-1]=='\r') actualrlen--;
+      //check for new sequences in between
+      clen=actualrlen;
+      sublen+=actualrlen;
+      return (const char*)seqp;
+      }
+    toread-=reqrlen;
+    sublen+=reqrlen;
+    fseeko(fh, lendlen, SEEK_CUR);
+    }
+  //read the rest of the lines
+  while (toread>=line_len) {
+    char* rseqp=&(seqp[sublen]);
+    actualrlen=fread((void*)rseqp, 1, line_len, fh);
+    /*
+    char dbuf[256];dbuf[255]=0;
+    strncpy(dbuf,rseqp, actualrlen);
+    dbuf[actualrlen]=0;
+    GMessage("<<<read line: %s\n",dbuf);
+    */
+    if (actualrlen<line_len) {
+      while (rseqp[actualrlen-1]=='\n' || rseqp[actualrlen-1]=='\r') actualrlen--;
+      sublen+=actualrlen;
+      clen=sublen;
+      return (const char*)seqp;
+      }
+    toread-=actualrlen;
+    sublen+=actualrlen;
+    fseeko(fh, lendlen, SEEK_CUR);
+    }
+  // read the last partial line, if any
+  if (toread>0) {
+    char* rseqp=&(seqp[sublen]);
+    actualrlen=fread((void*)rseqp, 1, toread, fh);
+    if (actualrlen<toread) {
+      while (rseqp[actualrlen-1]=='\n' || rseqp[actualrlen-1]=='\r')
+          actualrlen--;
+      }
+    sublen+=actualrlen;
+    }
+  //lastsub->sqlen+=sublen;
+  clen=sublen;
+
+  return (const char*)seqp;
+  }
+
+
diff --git a/src/GFastaIndex.cpp b/src/GFastaIndex.cpp
new file mode 100644
index 0000000..bc79b66
--- /dev/null
+++ b/src/GFastaIndex.cpp
@@ -0,0 +1,170 @@
+/*
+ * GFastaIndex.cpp
+ *
+ *  Created on: Aug 25, 2010
+ *      Author: gpertea
+ */
+
+#include "GFastaIndex.h"
+#define ERR_FAIDXLINE "Error parsing fasta index line: \n%s\n"
+#define ERR_FALINELEN "Error: sequence lines in a FASTA record must have the same length!\n"
+void GFastaIndex::addRecord(const char* seqname, uint seqlen, off_t foffs, int llen, int llen_full) {
+     GFastaRec* farec=records.Find(seqname);
+     if (farec!=NULL) {
+          GMessage("Warning: duplicate sequence ID (%s) added to the fasta index! Only last entry data will be kept.\n");
+          farec->seqlen=seqlen;
+          farec->fpos=foffs;
+          farec->line_len=llen;
+          farec->line_blen=llen_full;
+          }
+     else {
+         farec=new GFastaRec(seqlen,foffs,llen,llen_full);
+         records.Add(seqname,farec);
+         farec->seqname=records.getLastKey();
+         }
+}
+
+int GFastaIndex::loadIndex(const char* finame) { //load record info from existing fasta index
+    if (finame==NULL) finame=fai_name;
+    if (finame!=fai_name) {
+      fai_name=Gstrdup(finame);
+      }
+    if (fai_name==NULL) GError("Error: GFastaIndex::loadIndex() called with no file name!\n");
+    records.Clear();
+    haveFai=false;
+    FILE* fi=fopen(fai_name,"rb");
+    if (fi==NULL) {
+       GMessage("Warning: cannot open fasta index file: %s!\n",fai_name);
+       return 0;
+       }
+    GLineReader fl(fi);
+    char* s=NULL;
+    while ((s=fl.nextLine())!=NULL) {
+      if (*s=='#') continue;
+      char* p=strchrs(s,"\t ");
+      if (p==NULL) GError(ERR_FAIDXLINE,s);
+      *p=0; //s now holds the genomic sequence name
+      p++;
+      uint len=0;
+      int line_len=0, line_blen=0;
+#ifdef __WIN32__
+         long offset=-1;
+         sscanf(p, "%d%ld%d%d", &len, &offset, &line_len, &line_blen);
+#else
+         long long offset=-1;
+         sscanf(p, "%d%lld%d%d", &len, &offset, &line_len, &line_blen);
+#endif
+      if (len==0 || line_len==0 || line_blen==0 || line_blen<line_len)
+          GError(ERR_FAIDXLINE,p);
+      addRecord(s,len,offset,line_len, line_blen);
+      }
+    fclose(fi);
+    haveFai=(records.Count()>0);
+    return records.Count();
+}
+
+int GFastaIndex::buildIndex() {
+    //this parses the whole fasta file, so it could be slow
+    if (fa_name==NULL)
+       GError("Error: GFastaIndex::buildIndex() called with no fasta file!\n");
+    FILE* fa=fopen(fa_name,"rb");
+    if (fa==NULL) {
+       GMessage("Warning: cannot open fasta index file: %s!\n",fa_name);
+       return 0;
+       }
+    records.Clear();
+    GLineReader fl(fa);
+    char* s=NULL;
+    uint seqlen=0;
+    int line_len=0,line_blen=0;
+    bool newSeq=false; //set to true after defline
+    off_t newSeqOffset=0;
+    int prevOffset=0;
+    char* seqname=NULL;
+    int last_len=0;
+    bool mustbeLastLine=false; //true if the line length decreases
+    while ((s=fl.nextLine())!=NULL) {
+     if (s[0]=='>') {
+        if (seqname!=NULL) {
+         if (seqlen==0)
+            GError("Warning: empty FASTA record skipped (%s)!\n",seqname);
+         else { //seqlen!=0
+           addRecord(seqname, seqlen,newSeqOffset, line_len, line_blen);
+           }
+         }
+        char *p=s;
+        while (*p > 32) p++;
+        *p=0;
+        GFREE(seqname);
+        seqname=Gstrdup(&s[1]);
+        newSeq=true;
+        newSeqOffset=fl.getfpos();
+        last_len=0;
+        line_len=0;
+        line_blen=0;
+        seqlen=0;
+        mustbeLastLine=false;
+        } //defline parsing
+     else { //sequence line
+       int llen=fl.length();
+       int lblen=fl.getFpos()-prevOffset;
+        if (newSeq) { //first sequence line after defline
+          line_len=llen;
+          line_blen=lblen;
+          }
+        else {//next seq lines after first
+          if (mustbeLastLine || llen>last_len)
+             GError(ERR_FALINELEN);
+          if (llen<last_len) mustbeLastLine=true;
+          }
+        seqlen+=llen;
+        last_len=llen;
+        newSeq=false;
+        } //sequence line
+     prevOffset=fl.getfpos();
+     }//for each line of the fasta file
+    if (seqlen>0)
+       addRecord(seqname, seqlen, newSeqOffset, line_len, line_blen);
+    GFREE(seqname);
+    fclose(fa);
+    return records.Count();
+}
+
+
+int GFastaIndex::storeIndex(const char* finame) { //write the hash to a file
+    if (records.Count()==0)
+       GError("Error at GFastaIndex:storeIndex(): no records found!\n");
+    FILE* fai=fopen(finame, "w");
+    if (fai==NULL) GError("Error creating fasta index file: %s\n",finame);
+    int rcount=storeIndex(fai);
+    GFREE(fai_name);
+    fai_name=Gstrdup(finame);
+    return rcount;
+}
+
+int GFastaIndex::storeIndex(FILE* fai) {
+  int rcount=0;
+  GList<GFastaRec> reclist(true,false,true); //sorted, don't free members, unique
+  records.startIterate();
+  GFastaRec* rec=NULL;
+  while ((rec=records.NextData())!=NULL) {
+    reclist.Add(rec);
+    }
+  //reclist has records sorted by file offset
+  for (int i=0;i<reclist.Count();i++) {
+#ifdef __WIN32__
+    int written=fprintf(fai, "%s\t%d\t%ld\t%d\t%d\n",
+            reclist[i]->seqname,reclist[i]->seqlen,(long)reclist[i]->fpos,
+              reclist[i]->line_len, reclist[i]->line_blen);
+#else
+    int written=fprintf(fai, "%s\t%d\t%lld\t%d\t%d\n",
+            reclist[i]->seqname, reclist[i]->seqlen, (long long)(reclist[i]->fpos),
+              reclist[i]->line_len, reclist[i]->line_blen);
+#endif
+    if (written>0) rcount++;
+       else break; //couldn't write anymore
+    }
+  fclose(fai);
+  haveFai=(rcount>0);
+  return rcount;
+}
diff --git a/src/GStr.cpp b/src/GStr.cpp
new file mode 100644
index 0000000..4613fa2
--- /dev/null
+++ b/src/GStr.cpp
@@ -0,0 +1,1345 @@
+//---------------------------------------------------------------------------
+#include "GStr.h"
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "GBase.h"
+#include <stdarg.h>
+#include <errno.h>
+
+//---------------------------------------------------------------------------
+
+GStr::Data GStr::null_data;
+
+//=========================================
+
+GStr::Data * GStr::new_data(int length) {
+//static method to return a new Data object (allocate length)
+//content is undefined, but it's null terminated
+    if (length > 0) {
+        Data* data;
+        GMALLOC(data, sizeof(Data)+length);
+        data->ref_count = 0;
+        data->length = length;
+        data->chars[length] = '\0';
+        return data;
+        }
+    else
+        return &null_data;
+ }
+
+GStr::Data* GStr::new_data(const char* str) {
+//static method to return a new Data object (allocate length)
+//as a copy of a given string
+ if (str==NULL) return &null_data;
+ int length=strlen(str);
+ if (length > 0) {
+        Data* data;
+        GMALLOC(data, sizeof(Data)+length);
+        strcpy(data->chars, str);
+        data->ref_count = 0;
+        data->length = length;
+        data->chars[length] = '\0';
+        return data;
+        }
+    else
+        return &null_data;
+ }
+ 
+void GStr::replace_data(int len) {
+
+    if (len == my_data->length && my_data->ref_count <= 1)
+        return;
+
+    if (my_data != &null_data && --my_data->ref_count == 0)
+        GFREE(my_data);
+
+    if (len > 0) {
+        //my_data = (Data *) malloc(sizeof(Data) + len);
+        GMALLOC(my_data, sizeof(Data) + len);
+        my_data->ref_count = 1;
+        my_data->length = len;
+        my_data->chars[len] = '\0';
+    }
+    else
+        my_data = &null_data;
+}
+
+void GStr::replace_data(Data *data) {
+    if (my_data != &null_data && --my_data->ref_count == 0)
+        GFREE(my_data);
+    if (data != &null_data)
+        data->ref_count++;
+    my_data = data;
+}
+
+void GStr::make_unique() {//make sure it's not a reference to other string
+    if (my_data->ref_count > 1) {
+        Data *data = new_data(length());
+        ::memcpy(data->chars, chars(), length());
+        my_data->ref_count--;
+        my_data = data;
+        my_data->ref_count++;
+    }
+}
+
+bool operator==(const char *s1, const GStr& s2){
+  if (s1==NULL) return s2.is_empty();
+  return (strcmp(s1, s2.chars()) == 0);
+  }
+
+bool operator<(const char *s1, const GStr& s2) {
+  if (s1==NULL) return !s2.is_empty();
+  return (strcmp(s1, s2.chars()) < 0);
+  }
+
+bool operator<=(const char *s1, const GStr& s2){
+ if (s1==NULL) return true;
+ return (strcmp(s1, s2.chars()) <= 0);
+ }
+
+bool operator>(const char *s1, const GStr& s2) {
+  if (s1==NULL) return false;
+ return (strcmp(s1, s2.chars()) > 0);
+ }
+
+
+GStr::GStr():my_data(&null_data) {
+ fTokenDelimiter=NULL;
+ fTokenizeMode=tkCharSet;
+ fLastTokenStart=0;
+ readbuf=NULL;
+ readbufsize=0;
+ }
+
+GStr::GStr(const GStr& s): my_data(&null_data){
+ fTokenDelimiter=NULL;
+ fTokenizeMode=tkCharSet;
+ fLastTokenStart=0;
+ readbuf=NULL;
+ readbufsize=0;
+ replace_data(s.my_data);
+ }
+
+GStr::GStr(const char *s): my_data(&null_data) {
+  fTokenDelimiter=NULL;
+  fTokenizeMode=tkCharSet;
+  fLastTokenStart=0;
+  readbuf=NULL;
+  readbufsize=0;
+  my_data=new_data(s);
+  my_data->ref_count = 1;
+ }
+
+GStr::GStr(const int i): my_data(&null_data) {
+ fTokenDelimiter=NULL;
+ fTokenizeMode=tkCharSet;
+ fLastTokenStart=0;
+ readbuf=NULL;
+ readbufsize=0;
+ char buf[20];
+ sprintf(buf,"%d",i);
+ const int len = ::strlen(buf);
+ replace_data(len);
+ ::memcpy(chrs(), buf, len);
+ }
+
+GStr::GStr(const double f): my_data(&null_data) {
+ fTokenDelimiter=NULL;
+ fTokenizeMode=tkCharSet;
+ fLastTokenStart=0;
+ readbuf=NULL;
+ readbufsize=0;
+ char buf[20];
+ sprintf(buf,"%f",f);
+ const int len = ::strlen(buf);
+ replace_data(len);
+ ::memcpy(chrs(), buf, len);
+ }
+
+GStr::GStr(char c, int n): my_data(&null_data) {
+  fTokenDelimiter=NULL;
+  fTokenizeMode=tkCharSet;
+  fLastTokenStart=0;
+  readbuf=NULL;
+  readbufsize=0;
+  replace_data(n); ::memset(chrs(), c, n);
+  }
+
+GStr::~GStr() {  
+  if (my_data != &null_data && --my_data->ref_count == 0)
+             GFREE(my_data);
+  GFREE(fTokenDelimiter);
+  GFREE(readbuf);
+  }
+
+char& GStr::operator[](int idx){
+//returns reference to char (can be l-value)
+  if (idx < 0) idx += length();
+  if (idx < 0 || idx >= length()) invalid_index_error("operator[]");
+  make_unique();  //because the user will probably modify this char!
+  return chrs()[idx]; 
+  }
+
+char GStr::operator[](int idx) const {
+//returns char copy (cannot be l-value!)
+  if (idx < 0) idx += length();
+  if (idx < 0 || idx >= length()) invalid_index_error("operator[]");
+  return chars()[idx];
+  }
+
+GStr& GStr::operator=(const GStr& s) {
+  make_unique(); //edit operation ahead
+  replace_data(s.my_data); 
+  return *this;
+  }
+
+GStr& GStr::operator=(const char *s) {
+  make_unique(); //edit operation ahead
+  if (s==NULL) {
+    replace_data(0);
+    return *this;
+    }
+  const int len = ::strlen(s); replace_data(len);
+  ::memcpy(chrs(), s, len); 
+  return *this;
+  }
+
+GStr& GStr::operator=(const double f) {
+ make_unique(); //edit operation ahead
+ char buf[20];
+ sprintf(buf,"%f",f);
+ const int len = ::strlen(buf);
+ replace_data(len);
+ ::memcpy(chrs(), buf, len);
+ return *this;
+}
+
+GStr& GStr::operator=(const int i) {
+ make_unique(); //edit operation ahead
+ char buf[20];
+ sprintf(buf,"%d",i);
+ const int len = ::strlen(buf);
+ replace_data(len);
+ ::memcpy(chrs(), buf, len);
+ return *this;
+}
+
+bool GStr::operator==(const GStr& s) const {
+  if (s.is_empty()) return is_empty();
+  return (length() == s.length()) &&
+    (memcmp(chars(), s.chars(), length()) == 0);
+  }
+
+bool GStr::operator==(const char *s) const {
+ if (s==NULL) return is_empty();
+ return (strcmp(chars(), s) == 0);
+ }
+
+bool GStr::operator<(const GStr& s) const {
+ if (s.is_empty()) return false;
+ return (strcmp(chars(), s.chars()) < 0);
+ }
+
+bool GStr::operator<(const char *s) const {
+ if (s==NULL) return false;
+ return (strcmp(chars(), s) < 0);
+ }
+
+bool GStr::operator<=(const GStr& s) const {
+ if (s.is_empty()) return is_empty();
+ return (strcmp(chars(), s.chars()) <= 0);
+ }
+
+bool GStr::operator<=(const char *s) const {
+ if (s==NULL) return is_empty();
+ return (strcmp(chars(), s) <= 0);
+ }
+
+bool GStr::operator>(const GStr& s) const {
+ if (s.is_empty()) return !is_empty();
+ return (strcmp(chars(), s.chars()) > 0);
+ }
+
+bool GStr::operator>(const char *s) const {
+ if (s==NULL) return !is_empty();
+ return (strcmp(chars(), s) > 0);
+ }
+
+bool GStr::operator>=(const GStr& s) const {
+ if (s.is_empty()) return true;
+ return (strcmp(chars(), s.chars()) >= 0);
+ }
+
+bool GStr::operator>=(const char *s) const {
+ if (s==NULL) return true;
+ return (strcmp(chars(), s) >= 0);
+ }
+
+bool GStr::operator!=(const GStr& s) const {
+  if (s.is_empty()) return !is_empty();
+  return (length() != s.length()) ||
+         (memcmp(chars(), s.chars(), length()) != 0);
+  }
+
+bool GStr::operator!=(const char *s) const {
+ if (s==NULL) return !is_empty();
+ return (strcmp(chars(), s) != 0);
+ }
+
+GStr& GStr::operator+=(const GStr& s) {
+ return append((const char *)s);
+ }
+
+GStr& GStr::operator+=(const char* s) {
+ return append(s);
+ }
+
+GStr& GStr::operator+=(const char c) {
+ char buf[4];
+ sprintf(buf,"%c",c);
+ return append(buf);
+ }
+
+GStr& GStr::operator+=(const int i) {
+ char buf[20];
+ sprintf(buf,"%d",i);
+ return append(buf);
+ }
+
+
+GStr& GStr::operator+=(const double f) {
+ char buf[30];
+ sprintf(buf,"%f",f);
+ return append(buf);
+ }
+ 
+bool GStr::is_empty() const {
+  //return my_data == &null_data;
+  return (length()==0);
+  }
+
+GStr GStr::copy() const {
+ GStr newstring(*this);
+ return newstring;
+ }
+
+GStr& GStr::clear() {
+  make_unique(); //edit operation ahead
+  replace_data(0);
+  return *this;
+  }
+
+int GStr::index(const GStr& s, int start_index) const {
+ return index(s.chars(), start_index);
+ }
+
+bool GStr::contains(const GStr& s) const {
+ return (index(s, 0) >= 0);
+ }
+
+bool GStr::contains(const char *s) const {
+ return (index(s, 0) >= 0);
+ }
+
+bool GStr::startsWith(const char *s) const {
+ //return (index(s, 0) == 0);
+ return ::startsWith(this->chars(), s);
+ }
+
+bool GStr::startsWith(const GStr& s) const {
+ //return (index(s, 0) == 0);
+ return ::startsWith(this->chars(), s.chars());
+ }
+
+bool GStr::endsWith(const char *s) const {
+ //return (index(s, 0) == 0);
+ return ::endsWith(this->chars(), s);
+ }
+
+bool GStr::endsWith(const GStr& s) const {
+ //return (index(s, 0) == 0);
+ return ::endsWith(this->chars(), s.chars());
+ }
+
+bool GStr::contains(char c) const {
+ return (index(c, 0) >= 0);
+ }
+
+GStr& GStr::format(const char *fmt,...) {
+// Format as in sprintf
+  make_unique(); //edit operation ahead
+  char* buf;
+  GMALLOC(buf, strlen(fmt)+1024);
+  va_list arguments;
+  va_start(arguments,fmt);
+  //+1K buffer, should be enough for common expressions
+  int len=vsprintf(buf,fmt,arguments);
+  va_end(arguments);
+  replace_data(len); //this also adds the '\0' at the end!
+                     //and sets the right len
+  ::memcpy(chrs(), buf, len);
+  GFREE(buf);
+  return *this;
+  }
+
+GStr& GStr::appendfmt(const char *fmt,...) {
+// Format as in sprintf
+  make_unique(); //edit operation ahead
+  char* buf;
+  GMALLOC(buf, strlen(fmt)+1024);
+  va_list arguments;
+  va_start(arguments,fmt);
+  //+1K buffer, should be enough for common expressions
+  vsprintf(buf,fmt,arguments);
+  va_end(arguments);
+  append(buf);
+  GFREE(buf);
+  return *this;
+  }
+
+GStr& GStr::trim(char c) {
+ register int istart;
+ register int iend;
+ for (istart=0; istart<length() && chars()[istart]==c;istart++) ;
+ if (istart==length()) {
+       make_unique(); //edit operation ahead
+       replace_data(0); //string was entirely trimmed
+       return *this;
+       }
+ for (iend=length()-1; iend>istart && chars()[iend]==c;iend--) ;
+ int newlen=iend-istart+1;
+ if (newlen==length())  //nothing to trim
+           return *this; 
+ make_unique(); //edit operation ahead
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, &chars()[istart], newlen);
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::trim(const char* c) {
+ register int istart;
+ register int iend;
+ for (istart=0; istart<length() && strchr(c, chars()[istart])!=NULL ;istart++) ;
+ if (istart==length()) {
+        replace_data(0); //string was entirely trimmed
+        return *this;
+        }
+ for (iend=length()-1; iend>istart && strchr(c, chars()[iend])!=NULL;iend--) ;
+ int newlen=iend-istart+1;
+ if (newlen==length())  //nothing to trim
+           return *this; 
+ make_unique(); //edit operation ahead
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, &chars()[istart], newlen);
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::trimR(char c) {
+ //only trim the right end
+ //register int istart;
+ register int iend;
+ for (iend=length()-1; iend>=0 && chars()[iend]==c;iend--) ;
+ if (iend==-1) {
+       replace_data(0); //string was entirely trimmed
+       return *this;
+       }
+ int newlen=iend+1;
+ if (newlen==length())  //nothing to trim
+           return *this; 
+ make_unique(); //edit operation ahead
+
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, chars(), newlen);
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::trimR(const char* c) {
+ register int iend;
+ for (iend=length()-1; iend>=0 && strchr(c,chars()[iend])!=NULL;iend--) ;
+ if (iend==-1) {
+       replace_data(0); //string was entirely trimmed
+       return *this;
+       }
+ int newlen=iend+1;
+ if (newlen==length())  //nothing to trim
+           return *this; 
+ make_unique(); //edit operation ahead
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, chars(), newlen);
+ replace_data(data);
+ return *this;
+ }
+
+
+GStr& GStr::chomp(const char* cstr) {
+ register int iend;
+ if (cstr==NULL || *cstr==0) return *this;
+ //check if this ends with cstr
+ int cend=strlen(cstr)-1;
+ iend=my_data->length-1;
+ while (iend>=0 && cend>=0) {
+  if (my_data->chars[iend]!=cstr[cend]) return *this;
+  iend--;
+  cend--;
+  }
+ if (iend==-1) {
+       replace_data(0); //string will be entirely trimmed
+       return *this;
+       }
+ int newlen=iend+1;
+ make_unique(); //edit operation ahead
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, chars(), newlen);
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::trimL(char c) {
+ register int istart;
+ for (istart=0; istart<length() && chars()[istart]==c;istart++) ;
+ if (istart==length()) {
+       replace_data(0); //string was entirely trimmed
+       return *this;
+       }
+ int newlen=length()-istart;
+ if (newlen==length())  //nothing to trim
+           return *this; 
+ make_unique(); //edit operation ahead
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, &chars()[istart], newlen);
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::trimL(const char* c) {
+ register int istart;
+ for (istart=0; istart<length() && strchr(c,chars()[istart])!=NULL;istart++) ;
+ if (istart==length()) {
+       replace_data(0); //string was entirely trimmed
+       return *this;
+       }
+ int newlen=length()-istart;
+ if (newlen==length())  //nothing to trim
+           return *this; 
+ make_unique(); //edit operation ahead
+
+ Data *data = new_data(newlen);
+ ::memcpy(data->chars, &chars()[istart], newlen);
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::padR(int len, char c) {
+ //actually means align right in len
+ if (length()>=len) return *this; //no room for padding
+ make_unique(); //edit operation ahead
+ Data *data = new_data(len);
+ ::memset(data->chars,c,len-length());
+ ::memcpy(&data->chars[len-length()], chars(), length());
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::padL(int len, char c) { //align left the string
+ if (length()>=len) return *this; //no room for padding
+ make_unique(); //edit operation ahead
+ Data *data = new_data(len);
+ ::memcpy(data->chars, chars(), length());
+ ::memset(&data->chars[length()],c,len-length());
+ replace_data(data);
+ return *this;
+ }
+
+GStr& GStr::padC(int len, char c) {
+ if (length()>=len) return *this; //no room for padding
+ make_unique(); //edit operation ahead
+ int istart=(len-length())/2;
+ Data *data = new_data(len);
+ if (istart>0)
+      ::memset(data->chars, c, istart);
+ ::memcpy(&data->chars[istart], chars(), length());
+ int iend=istart+length();
+ if (iend<len)
+      ::memset(&data->chars[iend],c,len-iend);
+ replace_data(data);
+ return *this;
+ }
+
+GStr operator+(const char *s1, const GStr& s2) {
+    const int s1_length = ::strlen(s1);
+
+    if (s1_length == 0)
+        return s2;
+    else {
+        GStr newstring;
+        newstring.replace_data(s1_length + s2.length());
+        ::memcpy(newstring.chrs(), s1, s1_length);
+        ::memcpy(&(newstring.chrs())[s1_length], s2.chars(), s2.length());
+        return newstring;
+        }
+}
+
+//=========================================
+
+GStr GStr::operator+(const GStr& s) const {
+    if (length() == 0)
+        return s;
+    else if (s.length() == 0)
+        return *this;
+    else {
+        GStr newstring;
+        newstring.replace_data(length() + s.length());
+        ::memcpy(newstring.chrs(), chars(), length());
+        ::memcpy(&(newstring.chrs())[length()], s.chars(), s.length());
+        return newstring;
+        }
+}
+
+//=========================================
+
+GStr GStr::operator+(const char *s) const {
+
+    const int s_length = ::strlen(s);
+
+    if (s_length == 0)
+        return *this;
+    else {
+        GStr newstring;
+        newstring.replace_data(length() + s_length);
+        ::memcpy(newstring.chrs(), chars(), length());
+        ::memcpy(&(newstring.chrs())[length()], s, s_length);
+        return newstring;
+        }
+}
+
+GStr GStr::operator+(const int i) const {
+    char buf[20];
+    sprintf(buf, "%d", i);
+    const int s_length = ::strlen(buf);
+    GStr newstring;
+    newstring.replace_data(length() + s_length);
+    ::memcpy(newstring.chrs(), chars(), length());
+    ::memcpy(&(newstring.chrs())[length()], buf, s_length);
+    return newstring;
+}
+
+GStr GStr::operator+(const char c) const {
+    char buf[4];
+    sprintf(buf, "%c", c);
+    const int s_length = ::strlen(buf);
+    GStr newstring;
+    newstring.replace_data(length() + s_length);
+    ::memcpy(newstring.chrs(), chars(), length());
+    ::memcpy(&(newstring.chrs())[length()], buf, s_length);
+    return newstring;
+}
+
+GStr GStr::operator+(const double f) const {
+    char buf[30];
+    sprintf(buf, "%f", f);
+    const int s_length = ::strlen(buf);
+    GStr newstring;
+    newstring.replace_data(length() + s_length);
+    ::memcpy(newstring.chrs(), chars(), length());
+    ::memcpy(&(newstring.chrs())[length()], buf, s_length);
+    return newstring;
+}
+
+
+//=========================================
+
+bool GStr::is_space() const {
+
+    if (my_data == &null_data)
+        return false;
+
+    for (register const char *p = chars(); *p; p++)
+        if (!isspace(*p))
+            return false;
+
+    return true;
+}
+
+//=========================================
+
+GStr GStr::substr(int idx, int len) const {
+    // A negative idx specifies an idx from the right of the string.
+    if (idx < 0)
+        idx += length();
+
+    // A length of -1 specifies the rest of the string.
+    if (len < 0  || len>length()-idx)
+        len = length() - idx;
+    
+    if (idx<0 || idx>=length() || len<0 )
+        invalid_args_error("substr()");
+
+    GStr newstring;
+    newstring.replace_data(len);
+    ::memcpy(newstring.chrs(), &chars()[idx], len);
+    return newstring;
+}
+
+GStr& GStr::reverse() {
+  make_unique();
+  int l=0;
+  int r=my_data->length-1;
+  char c;
+  while (l<r) {
+     c=my_data->chars[l];
+     my_data->chars[l]=my_data->chars[r];
+     my_data->chars[r]=c;
+     l++;r--;
+     }
+  return *this;
+}
+
+
+//transform: any character from 'from' is replaced with a coresponding
+//char from 'to'
+
+GStr&  GStr::tr(const char *rfrom, const char* rto) {
+ if (length() == 0 || rfrom==NULL || strlen(rfrom)==0)
+        return *this;
+ unsigned int l=strlen(rfrom);       
+ if (rto!=NULL && strlen(rto)!=l)
+      invalid_args_error("tr()");
+ make_unique(); //edit operation ahead
+ Data *data = new_data(length());
+      
+ if (rto==NULL) { //deletion case 
+   char* s = my_data->chars;
+   char* p;
+   char* dest = data->chars;
+   do {
+      if ((p=strpbrk(s,rfrom))!=NULL) {
+        memcpy(dest,s,p-s);
+        dest+=p-s;
+        s=p+1;
+        }
+       else { 
+        strcpy(dest, s); 
+        dest+=strlen(s);
+        }
+      } while (p!=NULL);
+   (*dest)='\0';   
+   }
+  else { //char substitution case - easier!
+   const char* p;
+   for (int i=0; i<length(); i++) {
+    if ((p=strchr(rfrom, my_data->chars[i]))!=NULL) 
+         my_data->chars[i]=rto[p-rfrom];
+    }
+   }
+ data->length=strlen(data->chars);
+ replace_data(data);
+ return *this;
+}
+
+
+// search and replace all the occurences of a string with another string
+// or just remove the given string (if replacement is NULL)
+GStr&  GStr::replace(const char *rfrom, const char* rto) {
+ if (length() == 0 || rfrom==NULL || strlen(rfrom)==0)
+        return *this;
+ unsigned int l=strlen(rfrom);
+ unsigned int tl= (rto==NULL)?0:strlen(rto);
+ make_unique(); //edit operation ahead
+ char* p;
+ char* dest;
+ char* newdest=NULL;
+ char* s = my_data->chars;
+ if (tl!=l) { //reallocation
+   if (tl>l) {  //possible enlargement
+       GMALLOC(newdest, length()*(tl-l+1)+1);
+       }
+      else  {//delete or replace with a shorter string
+       GMALLOC(newdest, length() + 1);
+       }
+      dest=newdest; 
+      if (tl==0) {//deletion
+           while ((p=strstr(s,rfrom))!=NULL) {
+               //rfrom found at position p
+                memcpy(dest,s,p-s);
+                dest+=p-s;
+                s+=p-s+l; //s positioned in string after rfrom
+                }
+           //no more occurences, copy the remaining string
+           strcpy(dest, s);
+          }
+        else { //replace with another string
+          while ((p=strstr(s,rfrom))!=NULL) {
+              memcpy(dest,s,p-s); //copy up rto the match
+              dest+=p-s;
+              memcpy(dest,rto,tl); //put the replacement string
+              dest+=tl;
+              s+=p-s+l;
+              }
+          //not found any more, copy rto end of string
+          strcpy(dest, s);
+          }
+       Data* data=new_data(newdest);
+       replace_data(data);
+       GFREE(newdest);
+       }
+  else { //inplace editing: no need rto reallocate
+    while ((p=strstr(s,rfrom))!=NULL) {
+        memcpy(p,rto,l);
+        s+=p-s+l;
+        }    
+   }
+ return *this;
+}
+
+
+
+GStr&  GStr::cut(int idx, int len) {
+
+    if (len == 0)
+        return *this;
+    make_unique(); //edit operation ahead
+
+    // A negative idx specifies an idx from the right of the string,
+    // so the left part will be cut out
+    if (idx < 0)
+        idx += length();
+
+    // A length of -1 specifies the rest of the string.
+    if (len == -1)
+        len = length() - idx;
+
+    if (idx<0 || idx>=length() || len<0 || len>length()-idx)
+        invalid_args_error("cut()");
+
+    Data *data = new_data(length() - len);
+    if (idx > 0)
+        ::memcpy(data->chars, chars(), idx);
+    ::strcpy(&data->chars[idx], &chars()[idx+len]);
+    replace_data(data);
+
+    return *this;
+}
+
+//=========================================
+
+GStr&  GStr::paste(const GStr& s, int idx, int len) {
+    // A negative idx specifies an idx from the right of the string.
+    if (idx < 0)
+        idx += length();
+    make_unique(); //edit operation ahead
+
+    // A length of -1 specifies the rest of the string.
+    if (len == -1)
+        len = length() - idx;
+
+    if (idx<0 || idx>=length() || len<0 || len>length()-idx)
+        invalid_args_error("replace()");
+
+    if (len == s.length() && my_data->ref_count == 1)
+        ::memcpy(&chrs()[idx], s.chars(), len);
+    else {
+        Data *data = new_data(length() - len + s.length());
+        if (idx > 0)
+            ::memcpy(data->chars, chars(), idx);
+        if (s.length() > 0)
+            ::memcpy(&data->chars[idx], s.chars(), s.length());
+        ::strcpy(&data->chars[idx+s.length()], &chars()[idx+len]);
+        replace_data(data);
+    }
+
+    return *this;
+}
+
+//=========================================
+
+GStr& GStr::paste(const char *s, int idx, int len) {
+
+    // A negative idx specifies an idx from the right of the string.
+    make_unique(); //edit operation ahead
+    if (idx < 0)
+        idx += length();
+
+    // A length of -1 specifies the rest of the string.
+    if (len == -1)
+        len = length() - idx;
+
+    if (idx<0 || idx>=length() || len<0 || len>length()-idx)
+        invalid_args_error("replace()");
+
+    const int s_length = ::strlen(s);
+
+    if (len == s_length && my_data->ref_count == 1)
+        ::memcpy(&chrs()[idx], s, len);
+    else {
+        Data *data = new_data(length() - len + s_length);
+        if (idx > 0)
+            ::memcpy(data->chars, chars(), idx);
+        if (s_length > 0)
+            ::memcpy(&data->chars[idx], s, s_length);
+        ::strcpy(&data->chars[idx+s_length], &chars()[idx+len]);
+        replace_data(data);
+    }
+
+    return *this;
+}
+
+//=========================================
+
+GStr& GStr::insert(const GStr& s, int idx) {
+    make_unique(); //edit operation ahead
+
+    // A negative idx specifies an idx from the right of the string.
+    if (idx < 0)
+        idx += length();
+
+    if (idx < 0 || idx >= length())
+        invalid_index_error("insert()");
+
+    if (s.length() > 0) {
+        Data *data = new_data(length() + s.length());
+        if (idx > 0)
+            ::memcpy(data->chars, chars(), idx);
+        ::memcpy(&data->chars[idx], s.chars(), s.length());
+        ::strcpy(&data->chars[idx+s.length()], &chars()[idx]);
+        replace_data(data);
+    }
+
+    return *this;
+}
+
+//=========================================
+
+GStr& GStr::insert(const char *s, int idx) {
+    // A negative idx specifies an idx from the right of the string.
+    make_unique(); //edit operation ahead
+    if (idx < 0)
+        idx += length();
+
+    if (idx < 0 || idx >= length())
+        invalid_index_error("insert()");
+
+    const int s_length = ::strlen(s);
+
+    if (s_length > 0) {
+        Data *data = new_data(length() + s_length);
+        if (idx > 0)
+            ::memcpy(data->chars, chars(), idx);
+        ::memcpy(&data->chars[idx], s, s_length);
+        ::strcpy(&data->chars[idx+s_length], &chars()[idx]);
+        replace_data(data);
+    }
+
+    return *this;
+}
+//=========================================
+
+GStr& GStr::append(const char* s) {
+  make_unique(); //edit operation ahead
+  int len=::strlen(s);
+  int newlength=len+my_data->length;
+  if (newlength<=my_data->length) return *this;
+  if (my_data->length==0) {
+    replace_data(len);
+    ::memcpy(my_data->chars, s, len);
+    return *this;
+   }
+  //faster solution with realloc
+  GREALLOC(my_data, sizeof(Data)+newlength);
+  ::strcpy(&my_data->chars[my_data->length], s);
+  my_data->length=newlength;
+  my_data->chars[newlength]='\0';
+  return *this;
+}
+
+GStr& GStr::append(const GStr& s) {
+ return append((const char *)s);
+}
+
+
+GStr& GStr::upper() {
+  make_unique(); //edit operation ahead
+  for (register char *p = chrs(); *p; p++)
+            *p = (char) toupper(*p);
+
+    return *this;
+}
+
+//=========================================
+
+GStr& GStr::lower() {
+    make_unique();
+
+    for (register char *p = chrs(); *p; p++)
+          *p = (char) tolower(*p);
+
+    return *this;
+}
+
+//=========================================
+
+int GStr::index(const char *s, int start_index) const {
+    // A negative index specifies an index from the right of the string.
+    if (strlen(s)>(size_t)length()) return -1;
+    if (start_index < 0)
+        start_index += length();
+
+    if (start_index < 0 || start_index >= length())
+        invalid_index_error("index()");
+    const char* idx = strstr(&chars()[start_index], s);
+    if (!idx)
+        return -1;
+    else
+        return idx - chars();
+}
+
+//=========================================
+
+int GStr::index(char c, int start_index) const {
+    // A negative index specifies an index from the right of the string.
+    if (length()==0) return -1;
+    if (start_index < 0)
+        start_index += length();
+     
+    if (start_index < 0 || start_index >= length())
+        invalid_index_error("index()");
+
+
+    if (c == '\0')
+        return -1;
+    const char *idx=(char *) ::memchr(&chars()[start_index], c,
+                                         length()-start_index);
+    if (idx==NULL)
+        return -1;
+    else
+        return idx - chars();
+}
+
+int GStr::rindex(char c, int end_index) const {   
+    if (c == 0 || length()==0 || end_index>=length()) return -1;
+    if (end_index<0) end_index=my_data->length-1; 
+    for (int i=end_index;i>=0;i--) {
+      if (my_data->chars[i]==c) return i;
+      }
+    return -1;
+}
+
+int GStr::rindex(const char* str, int end_index) const {
+    if (str==NULL || *str == '\0' || length()==0 || end_index>=length())
+        return -1;
+    int slen=strlen(str);
+    if (end_index<0) end_index=my_data->length-1;
+    //end_index is the index of the right-side boundary 
+    //the scanning starts at the end
+    if (end_index>=0 && end_index<slen-1) return -1;
+    for (int i=end_index-slen+1;i>=0;i--) {
+       if (memcmp((void*)(my_data->chars+i),(void*)str, slen)==0)
+           return i;
+       }
+    return -1;
+}
+
+GStr GStr::split(const char* delim) {
+           /* splits "this" in two parts, at the first (left) 
+                 encounter of delim:
+                 1st would stay in "this",
+                 2nd part will be returned 
+                 as a new string!
+           */
+ GStr result;
+ int i=index(delim);
+ if (i>=0){
+      result=substr(i+strlen(delim));
+      cut(i);
+      return result;
+      }
+ return result;
+}
+
+GStr GStr::split(char c) {
+           /* splits "this" in two parts, at the first (left) 
+                 encounter of delim:
+                 1st would stay in "this",
+                 2nd part will be returned 
+                 as a new string!
+           */
+ GStr result;
+ int i=index(c);
+ if (i>=0){
+      result=substr(i+1);
+      cut(i);
+      return result;
+      }
+ return result;
+}
+
+GStr GStr::splitr(const char* delim) {
+ GStr result;
+ int i=rindex(delim);
+ if (i>=0){
+      result=substr(i+strlen(delim));
+      cut(i);
+      return result;
+      }
+ return result;
+}
+
+GStr GStr::splitr(char c) {
+ GStr result;
+ int i=rindex(c);
+ if (i>=0){
+      result=substr(i+1);
+      cut(i);
+      return result;
+      }
+ return result;
+}
+
+
+void GStr::startTokenize(const char* delimiter, enTokenizeMode tokenizemode) {
+ GFREE(fTokenDelimiter);
+ if (delimiter) {
+    GMALLOC(fTokenDelimiter,strlen(delimiter)+1);
+    strcpy(fTokenDelimiter, delimiter);
+    }
+ fLastTokenStart=0;
+ fTokenizeMode=tokenizemode;
+}
+
+bool GStr::nextToken(GStr& token) {
+ if (fTokenDelimiter==NULL) {
+    GError("GStr:: no token delimiter; use StartTokenize first\n");
+    }
+ if (fLastTokenStart>=length()) {//no more
+    GFREE(fTokenDelimiter);
+    fLastTokenStart=0;
+    return false;
+    }
+ int dlen=strlen(fTokenDelimiter);
+ char* delpos=NULL; //delimiter position
+ int tlen=0;
+ if (fTokenizeMode==tkFullString) { //exact string as a delimiter
+   delpos=(char*)strstr(chars()+fLastTokenStart,fTokenDelimiter);
+   if (delpos==NULL) delpos=(char*)(chars()+length());
+   //empty records may be returned
+   if (chars()+fLastTokenStart == delpos) { //empty token
+     fLastTokenStart=(delpos-chars())+dlen;
+     token="";
+     return true;
+     }
+    else {
+     tlen=delpos-(chars()+fLastTokenStart);
+     token.replace_data(tlen);
+     ::memcpy(token.chrs(), &chars()[fLastTokenStart], tlen);
+     fLastTokenStart=(delpos-chars())+dlen;
+     return true;
+     } 
+   }
+  else { //tkCharSet - any character is a delimiter
+   //empty records are never returned !
+   if (fLastTokenStart==0) {//skip any starting delimiters
+     delpos=(char*)chars();   
+     while (*delpos!='\0' && strchr(fTokenDelimiter, *delpos)!=NULL) 
+       delpos++;
+     if (*delpos!='\0')
+         fLastTokenStart = delpos-chars();
+       else { //only delimiters here,no tokens
+         GFREE(fTokenDelimiter);
+         fLastTokenStart=0;
+         return false;
+         }
+     }
+   //now fLastTokenStart is on a non-delimiter char
+   //GMessage("String at fLastTokenStart=%d is %s\n", fLastTokenStart, delpos);
+   char* token_end=NULL;
+   delpos=(char*)strpbrk(chars()+fLastTokenStart,fTokenDelimiter);
+   if (delpos==NULL) delpos=(char*)(chars()+length());
+   token_end=delpos-1;
+   while (*delpos!='\0' && strchr(fTokenDelimiter, *delpos)!=NULL) 
+      delpos++; //skip any other delimiters in the set!
+   //now we know that delpos is on the beginning of next token
+   tlen=(token_end-chars())-fLastTokenStart+1;
+   if (tlen==0) {
+       GFREE(fTokenDelimiter);
+       fLastTokenStart=0;
+       return false;
+       }
+   token.replace_data(tlen);
+   ::memcpy(token.chrs(), &chars()[fLastTokenStart], tlen);
+   fLastTokenStart=delpos-chars();
+   return true;
+   }
+ //return true;
+}
+
+size_t GStr::read(FILE* stream, const char* delimiter, size_t bufsize) {
+//read up to (and including) the given delimiter string
+ if (readbuf==NULL) {
+    GMALLOC(readbuf, bufsize);
+    readbufsize=bufsize;
+    }
+  else if (bufsize!=readbufsize) {
+            GFREE(readbuf);
+            if (bufsize>0) {
+              GMALLOC(readbuf, bufsize);
+              }
+            readbufsize=bufsize;
+            }
+ if (bufsize==0) {
+    replace_data(0);
+    return 0; //clear the string and free the buffer
+    }
+ size_t numread;
+ size_t acc_len=0; //accumulated length
+ int seplen=strlen(delimiter);
+ void* p=NULL;
+ Data *data = new_data(0);
+ do {
+   numread=fread(readbuf, 1, bufsize, stream);
+   if (numread) {
+     p=Gmemscan(readbuf, bufsize, (void*) delimiter, seplen);
+     if (p!=NULL) {//found the delimiter
+           //position the stream after it
+           int l = (char*)p-(char*)readbuf;
+           fseek(stream, l+seplen-numread, SEEK_CUR);
+           numread=l+seplen;
+           }
+        else {//not found, go back if not eof
+           if (numread==bufsize) {
+               fseek(stream, -seplen, SEEK_CUR); //check if this works!
+               numread-=seplen;
+               }
+           }
+      if (data==&null_data) {
+        data=new_data(numread);
+        ::memcpy(data->chars, readbuf, numread);
+        acc_len+=numread;
+        }
+       else {
+        GREALLOC(data, sizeof(Data)+acc_len+numread);
+        memcpy(&data->chars[acc_len], readbuf, numread);
+        acc_len+=numread;
+        data->length=acc_len;
+        data->chars[acc_len]='\0';
+        }
+      } //if something read
+   } while (p==NULL && numread!=0);
+  replace_data(data); 
+  return acc_len;
+}
+
+
+int GStr::asInt(int base /*=10 */) {
+ return strtol(text(), NULL, base);
+}
+
+bool GStr::asInt(int& r, int base) {
+ errno=0;
+ char*endptr;
+ long val=strtol(text(), &endptr, base);
+ if (errno!=0) return false;
+ if (endptr == text()) return false;
+ /* If we got here, strtol() successfully parsed a number */
+ r=val;
+ return true;
+}
+
+double GStr::asReal() {
+ return strtod(text(), NULL);
+}
+
+bool GStr::asReal(double& r) {
+  errno=0;
+  char* endptr;
+  double val=strtod(text(), &endptr);
+  if (errno!=0) return false;
+  if (endptr == text()) return false; //no digits to parse
+  r=val;
+  return true;
+}
+
+
+int GStr::peelInt() const {
+ if (is_empty()) return 0;
+ char buf[24];
+ bool started=false;
+ int j=0;
+ int i;
+ for (i=0;i<length();i++) {
+  if (started) {
+    if (isdigit(my_data->chars[i])) j++; //set coord
+                               else break; //finished
+    }
+   else
+    if (isdigit(my_data->chars[i])) {
+        j++; started=true;
+        }
+  }
+ if (j>0) {
+   strncpy(buf, &my_data->chars[i-j], j);
+   buf[j]='\0';
+   return strtol(buf, NULL, 10);
+   }
+ return 0;
+}
+
+int GStr::peelIntR() const {
+ if (is_empty()) return 0;
+ char buf[24];
+ bool started=false;
+ int j=0;
+ int i;
+ for (i=length()-1;i>=0;i--) {
+  if (started) {
+    if (isdigit(my_data->chars[i])) j++; //set length
+                               else break; //finished
+    }
+   else
+    if (isdigit(my_data->chars[i])) {
+      j++; started=true;
+      }
+  }
+ if (j>0) {
+   strncpy(buf, &my_data->chars[i+1], j);
+   buf[j]='\0';
+   return strtol(buf, NULL, 10);
+   }
+ return 0;
+}
+
+GStr GStr::to(char c) { //return the first part up to first occurence of c
+ int i=index(c);
+ if (i>=0) return substr(0,i);
+      else return (*this);
+}
+                           //or whole string if c not found
+GStr GStr::from(char c) { //same as to, but starting from the right side
+ int i=rindex(c);
+ if (i>=0) return substr(i+1);
+      else return (*this);
+}
+
+int GStr::count(char c){
+ //return the number of occurences of char c within the string
+ int result=0;
+ for (int i=0;i<length();i++)
+    if (my_data->chars[i]==c) result++;
+ return result;
+ }
+
+//=========================================
+
+void GStr::invalid_args_error(const char *fname) {
+    GError("GStr:: %s  - invalid arguments\n", fname);
+}
+
+//****************************************************************************
+
+void GStr::invalid_index_error(const char *fname) {
+    GError("GStr:: %s  - invalid index\n", fname);
+}
+//****************************************************************************
+
diff --git a/src/TestGFFParse.cpp b/src/TestGFFParse.cpp
new file mode 100644
index 0000000..3354ca6
--- /dev/null
+++ b/src/TestGFFParse.cpp
@@ -0,0 +1,34 @@
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+
+#include "gff.h"
+
+int main(int argc, char* argv[]) {
+
+    if (argc == 1 or argc > 2) {
+        std::cerr << "Usage: TestGFFParse input\n";
+        std::exit(1);
+    }
+
+    GffReader reader(argv[1]);
+    reader.readAll(true);
+
+    std::cerr << "had count of " << reader.gflst.Count() << "\n";
+    size_t nfeat = reader.gflst.Count();
+    for (size_t i=0; i < nfeat; ++i) {
+       GffObj* f = reader.gflst[i];
+       if (f->isTranscript()) {
+
+           std::cout << f->getID() << '\t' << f->getGeneID() << '\t';
+           if (f->attrs) {
+               for (size_t j=0; j < f->attrs->Count(); ++j) {
+                   std::cout << f->getAttrName(j) << "\t" << f->getAttrValue(j) << "\t";
+               }
+           }
+           std::cout << "\n";
+       }
+    }
+    std::exit(0);
+}
+
diff --git a/src/codons.cpp b/src/codons.cpp
new file mode 100644
index 0000000..a459250
--- /dev/null
+++ b/src/codons.cpp
@@ -0,0 +1,90 @@
+#include "codons.h"
+
+static char codonTable[32768]; //32K table for fasta codon decoding
+       // codons are encoded as triplets of 5-bit-encoded nucleotides
+       // (so any codon can be encoded/decoded as a unique 15-bit value)
+
+static char codonData[]={ //long list of 3+1 characters (codon+translation)
+'A','A','A','K', 'A','A','C','N', 'A','A','G','K', 'A','A','R','K', 'A','A','T','N',
+'A','A','Y','N', 'A','C','A','T', 'A','C','B','T', 'A','C','C','T', 'A','C','D','T',
+'A','C','G','T', 'A','C','H','T', 'A','C','K','T', 'A','C','M','T', 'A','C','N','T',
+'A','C','R','T', 'A','C','S','T', 'A','C','T','T', 'A','C','V','T', 'A','C','W','T',
+'A','C','Y','T', 'A','G','A','R', 'A','G','C','S', 'A','G','G','R', 'A','G','R','R',
+'A','G','T','S', 'A','G','Y','S', 'A','T','A','I', 'A','T','C','I', 'A','T','G','M',
+'A','T','H','I', 'A','T','M','I', 'A','T','T','I', 'A','T','W','I', 'A','T','Y','I',
+'C','A','A','Q', 'C','A','C','H', 'C','A','G','Q', 'C','A','R','Q', 'C','A','T','H',
+'C','A','Y','H', 'C','C','A','P', 'C','C','B','P', 'C','C','C','P', 'C','C','D','P',
+'C','C','G','P', 'C','C','H','P', 'C','C','K','P', 'C','C','M','P', 'C','C','N','P',
+'C','C','R','P', 'C','C','S','P', 'C','C','T','P', 'C','C','V','P', 'C','C','W','P',
+'C','C','Y','P', 'C','G','A','R', 'C','G','B','R', 'C','G','C','R', 'C','G','D','R',
+'C','G','G','R', 'C','G','H','R', 'C','G','K','R', 'C','G','M','R', 'C','G','N','R',
+'C','G','R','R', 'C','G','S','R', 'C','G','T','R', 'C','G','V','R', 'C','G','W','R',
+'C','G','Y','R', 'C','T','A','L', 'C','T','B','L', 'C','T','C','L', 'C','T','D','L',
+'C','T','G','L', 'C','T','H','L', 'C','T','K','L', 'C','T','M','L', 'C','T','N','L',
+'C','T','R','L', 'C','T','S','L', 'C','T','T','L', 'C','T','V','L', 'C','T','W','L',
+'C','T','Y','L', 'G','A','A','E', 'G','A','C','D', 'G','A','G','E', 'G','A','R','E',
+'G','A','T','D', 'G','A','Y','D', 'G','C','A','A', 'G','C','B','A', 'G','C','C','A',
+'G','C','D','A', 'G','C','G','A', 'G','C','H','A', 'G','C','K','A', 'G','C','M','A',
+'G','C','N','A', 'G','C','R','A', 'G','C','S','A', 'G','C','T','A', 'G','C','V','A',
+'G','C','W','A', 'G','C','Y','A', 'G','G','A','G', 'G','G','B','G', 'G','G','C','G',
+'G','G','D','G', 'G','G','G','G', 'G','G','H','G', 'G','G','K','G', 'G','G','M','G',
+'G','G','N','G', 'G','G','R','G', 'G','G','S','G', 'G','G','T','G', 'G','G','V','G',
+'G','G','W','G', 'G','G','Y','G', 'G','T','A','V', 'G','T','B','V', 'G','T','C','V',
+'G','T','D','V', 'G','T','G','V', 'G','T','H','V', 'G','T','K','V', 'G','T','M','V',
+'G','T','N','V', 'G','T','R','V', 'G','T','S','V', 'G','T','T','V', 'G','T','V','V',
+'G','T','W','V', 'G','T','Y','V', 'M','G','A','R', 'M','G','G','R', 'M','G','R','R',
+'N','N','N','X', 'R','A','Y','B', 'S','A','R','Z', 'T','A','A','.', 'T','A','C','Y',
+'T','A','G','.', 'T','A','R','.', 'T','A','T','Y', 'T','A','Y','Y', 'T','C','A','S',
+'T','C','B','S', 'T','C','C','S', 'T','C','D','S', 'T','C','G','S', 'T','C','H','S',
+'T','C','K','S', 'T','C','M','S', 'T','C','N','S', 'T','C','R','S', 'T','C','S','S',
+'T','C','T','S', 'T','C','V','S', 'T','C','W','S', 'T','C','Y','S', 'T','G','A','.',
+'T','G','C','C', 'T','G','G','W', 'T','G','T','C', 'T','G','Y','C', 'T','R','A','.',
+'T','T','A','L', 'T','T','C','F', 'T','T','G','L', 'T','T','R','L', 'T','T','T','F',
+'T','T','Y','F', 'X','X','X','X', 'Y','T','A','L', 'Y','T','G','L', 'Y','T','R','L'
+};
+
+
+static bool isCodonTableReady=codonTableInit();
+
+unsigned short packCodon(char n1, char n2, char n3) {
+ //assumes they are uppercase already!
+ byte b1=n1-'A';
+ byte b2=n2-'A';
+ byte b3=n3-'A';
+ b1 |= (b2 << 5); 
+ b2 = (b2 >> 3) | (b3 << 2);
+ return ( ((unsigned short)b2) << 8) + b1;
+ }
+
+bool codonTableInit() {
+ memset((void*)codonTable, 'X', 32768);
+ int cdsize=sizeof(codonData);
+ for (int i=0;i<cdsize;i+=4) {
+   unsigned short aacode=packCodon(codonData[i], codonData[i+1], codonData[i+2]);
+   codonTable[aacode]=codonData[i+3];
+   } 
+ return true;
+ }
+
+
+char Codon::translate() {
+ for (byte i=0;i<3;i++) nuc[i]=toupper(nuc[i]);
+ unsigned short aacode=packCodon(nuc[0], nuc[1], nuc[2]);
+ return codonTable[aacode];
+ }
+
+//simple 1st frame forward translation of a given DNA string
+// allocate and returns the translation string
+char* translateDNA(const char* dnastr, int& aalen, int dnalen) {
+ if (dnastr==NULL || *dnastr==0) return NULL;
+ if (dnalen==0) dnalen=strlen(dnastr);
+ aalen=dnalen/3;
+ char* r=NULL;
+ GMALLOC(r, aalen+1);
+ r[aalen]=0;
+ int ai=0;
+ for (int i=0;i+2<dnalen;i+=3,ai++) {
+   r[ai]=codonTable[packCodon(toupper(dnastr[i]),toupper(dnastr[i+1]),toupper(dnastr[i+2]))];
+   }
+ return r;
+}
diff --git a/src/gdna.cpp b/src/gdna.cpp
new file mode 100644
index 0000000..4d8a4b0
--- /dev/null
+++ b/src/gdna.cpp
@@ -0,0 +1,90 @@
+#include "gdna.h"
+#include <string.h>
+
+const char* IUPAC_2BIT  ="AACCTTGGTTAAAAAACCCCGGAAAAAACCAAAAAA";
+const char* IUPAC_2BITN ="001133223300000011112200000011000000";
+const char* IUPAC_DEFS  ="AaCcTtGgUuMmRrWwSsYyKkVvHhDdBbNnXx-*";
+const char* IUPAC_COMP  ="TtGgAaCcAaKkYyWwSsRrMmBbDdHhVvNnXx-*";
+
+#define A_2BIT 0 // 00
+#define C_2BIT 1 // 01
+#define G_2BIT 2 // 10
+#define T_2BIT 3 // 11
+
+static byte ntCompTable[256];
+static byte nt2bit[256]; //maps any character to a 2bit base value (with N = A)
+static char v_2bit2nt[4] = {'A','C','G','T'};
+
+//----------------------
+
+static bool gdna_Ready=gDnaInit();
+
+//----------------------
+
+byte gdna2bit(char* &nt, int n) {
+// Pack n bases into a byte (n can be 1..4)
+byte out = 0;
+while (n && *nt) {
+    n--;
+    out <<= 2;
+    out += nt2bit[(int)*nt];
+    nt++;
+    }
+#ifdef GDEBUG
+if (n) {
+     GError("Error: attempt to read 6-mer beyond the end of the string!\n");
+     }
+#endif
+return out;
+}
+
+
+char ntComplement(char c) {
+ return ntCompTable[(int)c];
+ }
+
+char g2bit2base(byte v2bit) {
+ return v_2bit2nt[v2bit & 0x03 ];
+}
+
+//in place reverse complement of nucleotide (sub)sequence
+char* reverseComplement(char* seq, int slen) {
+   if (slen==0) slen=strlen(seq);
+   //reverseChars(seq,len);
+   int l=0;
+   int r=slen-1;
+   register char c;
+   while (l<r) {
+      c=seq[l];seq[l]=seq[r];
+      seq[r]=c;   //this was: Gswap(str[l],str[r]);
+      l++;r--;
+      }
+   for (int i=0;i<slen;i++) seq[i]=ntComplement(seq[i]);
+   return seq;
+ }
+
+bool gDnaInit() {
+       if (gdna_Ready) return true;
+       int l=strlen(IUPAC_DEFS);
+       ntCompTable[0]=0;
+       nt2bit[0]=0;
+       for (int ch=1;ch<256;ch++) {
+          ntCompTable[ch]=0;
+          nt2bit[ch]=0;
+          for (int i=0;i<l;i++)
+                if (ch==IUPAC_DEFS[i]) {
+                  ntCompTable[ch]=IUPAC_COMP[i];
+                  nt2bit[ch] = IUPAC_2BITN[i]-'0';
+                  break;
+                  }
+          if (ntCompTable[ch]==0) {
+              ntCompTable[ch]='N';
+              }
+          }
+      gdna_Ready=true;
+      return true;
+     }
+
+
+
+
diff --git a/src/gff.cpp b/src/gff.cpp
new file mode 100644
index 0000000..cd57de6
--- /dev/null
+++ b/src/gff.cpp
@@ -0,0 +1,2125 @@
+#include "gff.h"
+
+//GffNames* GffReader::names=NULL;
+GffNames* GffObj::names=NULL;
+//global set of feature names, attribute names etc.
+// -- common for all GffObjs in current application!
+
+const uint GFF_MAX_LOCUS = 7000000; //longest known gene in human is ~2.2M, UCSC claims a gene for mouse of ~ 3.1 M
+const uint GFF_MAX_EXON  =   30000; //longest known exon in human is ~11K
+const uint GFF_MAX_INTRON= 6000000; //Ensembl shows a >5MB human intron
+bool gff_show_warnings = false; //global setting, set by GffReader->showWarnings()
+const int gff_fid_mRNA=0;
+const int gff_fid_transcript=1;
+const int gff_fid_exon=2;
+
+const uint gfo_flag_HAS_ERRORS       = 0x00000001;
+const uint gfo_flag_CHILDREN_PROMOTED= 0x00000002;
+const uint gfo_flag_IS_GENE          = 0x00000004;
+const uint gfo_flag_IS_TRANSCRIPT    = 0x00000008;
+const uint gfo_flag_HAS_GFF_ID       = 0x00000010; //found GFF3 feature line with its own ID
+const uint gfo_flag_BY_EXON          = 0x00000020; //created by subfeature (exon) directly
+const uint gfo_flag_DISCARDED        = 0x00000100;
+const uint gfo_flag_LST_KEEP         = 0x00000200;
+const uint gfo_flag_LEVEL_MSK        = 0x00FF0000;
+const byte gfo_flagShift_LEVEL           = 16;
+
+void gffnames_ref(GffNames* &n) {
+  if (n==NULL) n=new GffNames();
+  n->numrefs++;
+}
+
+void gffnames_unref(GffNames* &n) {
+  if (n==NULL) GError("Error: attempt to remove reference to null GffNames object!\n");
+  n->numrefs--;
+  if (n->numrefs==0) { delete n; n=NULL; }
+}
+
+
+const char* strExonType(char xtype) {
+	static const char* extbl[7]={"None", "start_codon", "stop_codon", "CDS", "UTR", "CDS_UTR", "exon"};
+	if (xtype>0 && xtype<7)
+	   return extbl[(int)xtype];
+	else return "NULL";
+}
+
+int gfo_cmpByLoc(const pointer p1, const pointer p2) {
+ GffObj& g1=*((GffObj*)p1);
+ GffObj& g2=*((GffObj*)p2);
+ if (g1.gseq_id==g2.gseq_id) {
+             if (g1.start!=g2.start)
+                    return (int)(g1.start-g2.start);
+               else if (g1.getLevel()!=g2.getLevel())
+                        return (int)(g1.getLevel()-g2.getLevel());
+                    else
+                        if (g1.end!=g2.end)
+                              return (int)(g1.end-g2.end);
+                        else return strcmp(g1.getID(), g2.getID());
+             }
+             else return (int)(g1.gseq_id-g2.gseq_id);
+}
+
+char* GffLine::extractAttr(const char* attr, bool caseStrict, bool enforce_GTF2) {
+ //parse a key attribute and remove it from the info string
+ //(only works for attributes that have values following them after ' ' or '=')
+ static const char GTF2_ERR[]="Error parsing attribute %s ('\"' required) at GTF line:\n%s\n";
+ int attrlen=strlen(attr);
+ char cend=attr[attrlen-1];
+ //char* pos = (caseStrict) ? strstr(info, attr) : strifind(info, attr);
+ //must make sure attr is not found in quoted text
+ char* pos=info;
+ char prevch=0;
+ bool in_str=false;
+ bool notfound=true;
+ int (*strcmpfn)(const char*, const char*, int) = caseStrict ? Gstrcmp : Gstricmp;
+ while (notfound && *pos) {
+   char ch=*pos;
+   if (ch=='"') {
+     in_str=!in_str;
+     pos++;
+     prevch=ch;
+     continue;
+     }
+   if (!in_str && (prevch==0 || prevch==' ' || prevch == ';')
+          && strcmpfn(attr, pos, attrlen)==0) {
+      //attr match found
+      //check for word boundary on right
+      char* epos=pos+attrlen;
+      if (cend=='=' || cend==' ' || *epos==0 || *epos==' ') {
+        notfound=false;
+        break;
+        }
+      //not a perfect match, move on
+      pos=epos;
+      prevch=*(pos-1);
+      continue;
+      }
+   //not a match or in_str
+   prevch=ch;
+   pos++;
+   }
+ if (notfound) return NULL;
+ char* vp=pos+attrlen;
+ while (*vp==' ') vp++;
+ if (*vp==';' || *vp==0)
+      GError("Error parsing value of GFF attribute \"%s\", line:\n%s\n", attr, dupline);
+ bool dq_enclosed=false; //value string enclosed by double quotes
+ if (*vp=='"') {
+     dq_enclosed=true;
+     vp++;
+     }
+ if (enforce_GTF2 && !dq_enclosed)
+      GError(GTF2_ERR,attr, dupline);
+ char* vend=vp;
+ if (dq_enclosed) {
+    while (*vend!='"' && *vend!=';' && *vend!=0) vend++;
+    }
+ else {
+    while (*vend!=';' && *vend!=0) vend++;
+    }
+ if (enforce_GTF2 && *vend!='"')
+     GError(GTF2_ERR, attr, dupline);
+ char *r=Gstrdup(vp, vend-1);
+ //-- now remove this attribute from the info string
+ while (*vend!=0 && (*vend=='"' || *vend==';' || *vend==' ')) vend++;
+ if (*vend==0) vend--;
+ for (char *src=vend, *dest=pos;;src++,dest++) {
+   *dest=*src;
+   if (*src==0) break;
+   }
+ return r;
+}
+
+static char fnamelc[128];
+
+GffLine::GffLine(GffReader* reader, const char* l) {
+ llen=strlen(l);
+ GMALLOC(line,llen+1);
+ memcpy(line, l, llen+1);
+ GMALLOC(dupline, llen+1);
+ memcpy(dupline, l, llen+1);
+ skip=true;
+ gseqname=NULL;
+ track=NULL;
+ ftype=NULL;
+ info=NULL;
+ _parents=NULL;
+ _parents_len=0;
+ num_parents=0;
+ parents=NULL;
+ is_gff3=false;
+ is_cds=false;
+ is_transcript=false;
+ is_exon=false;
+ is_gene=false;
+ exontype=0;
+ gene_id=NULL;
+ gene_name=NULL;
+ qstart=0;
+ qend=0;
+ qlen=0;
+ ID=NULL;
+ char* t[9];
+ int i=0;
+ int tidx=1;
+ t[0]=line;
+
+ while (line[i]!=0) {
+  if (line[i]=='\t') {
+   line[i]=0;
+   t[tidx]=line+i+1;
+   tidx++;
+   if (tidx>8) break;
+   }
+  i++;
+  }
+
+ if (tidx<8) { // ignore non-GFF lines
+  // GMessage("Warning: error parsing GFF/GTF line:\n%s\n", l);
+  return;
+  }
+ gseqname=t[0];
+ track=t[1];
+ ftype=t[2];
+ info=t[8];
+ char* p=t[3];
+ if (!parseUInt(p,fstart)) {
+   //chromosome_band entries in Flybase
+   GMessage("Warning: invalid start coordinate at line:\n%s\n",l);
+   return;
+   }
+ p=t[4];
+ if (!parseUInt(p,fend)) {
+   GMessage("Warning: invalid end coordinate at line:\n%s\n",l);
+   return;
+   }
+ if (fend<fstart) Gswap(fend,fstart); //make sure fstart>=fend, always
+ p=t[5];
+ if (p[0]=='.' && p[1]==0) {
+  score=0;
+  }
+ else {
+  if (!parseDouble(p,score))
+       GError("Error parsing feature score from GFF line:\n%s\n",l);
+  }
+ strand=*t[6];
+ if (strand!='+' && strand!='-' && strand!='.')
+     GError("Error parsing strand (%c) from GFF line:\n%s\n",strand,l);
+ phase=*t[7]; // must be '.', '0', '1' or '2'
+ ID=NULL;
+ // exon/CDS/mrna filter
+ strncpy(fnamelc, ftype, 127);
+ fnamelc[127]=0;
+ strlower(fnamelc); //convert to lower case
+ bool is_t_data=false;
+ if (strstr(fnamelc, "utr")!=NULL) {
+   exontype=exgffUTR;
+   is_exon=true;
+   is_t_data=true;
+   }
+  else if (endsWith(fnamelc, "exon")) {
+   exontype=exgffExon;
+   is_exon=true;
+   is_t_data=true;
+   }
+  else if (strstr(fnamelc, "stop") &&
+      (strstr(fnamelc, "codon") || strstr(fnamelc, "cds"))){
+   exontype=exgffStop;
+   is_cds=true; //though some place it outside the last CDS segment
+   is_t_data=true;
+   }
+  else if (strstr(fnamelc, "start") &&
+      ((strstr(fnamelc, "codon")!=NULL) || strstr(fnamelc, "cds")!=NULL)){
+   exontype=exgffStart;
+   is_cds=true;
+   is_t_data=true;
+   }
+ else if (strcmp(fnamelc, "cds")==0) {
+   exontype=exgffCDS;
+   is_cds=true;
+   is_t_data=true;
+   }
+ else if (startsWith(fnamelc, "intron") || endsWith(fnamelc, "intron")) {
+	 exontype=exgffIntron;
+ }
+ else if (endsWith(fnamelc, "gene") || startsWith(fnamelc, "gene")) {
+   is_gene=true;
+   is_t_data=true; //because its name will be attached to parented transcripts
+   }
+ else if (endsWith(fnamelc,"rna") || endsWith(fnamelc,"transcript")) {
+   is_transcript=true;
+   is_t_data=true;
+   }
+
+if (reader->transcriptsOnly && !is_t_data) {
+        char* id=extractAttr("ID=");
+        if (id==NULL) id=extractAttr("transcript_id");
+        //GMessage("Discarding non-transcript line:\n%s\n",l);
+        if (id!=NULL) {
+          reader->discarded_ids.Add(id, new int(1));
+          GFREE(id);
+          }
+        return; //skip this line, unwanted feature name
+        }
+ ID=extractAttr("ID=",true);
+ char* Parent=extractAttr("Parent=",true);
+ is_gff3=(ID!=NULL || Parent!=NULL);
+ if (is_gff3) {
+   //parse as GFF3
+    if (ID!=NULL) {
+       //has ID attr so it's likely to be a parent feature
+       //look for explicit gene name
+       gene_name=extractAttr("gene_name=");
+       if (gene_name==NULL) {
+           gene_name=extractAttr("geneName=");
+           if (gene_name==NULL) {
+               gene_name=extractAttr("gene_sym=");
+               if (gene_name==NULL) {
+                   gene_name=extractAttr("gene=");
+                   }
+               }
+           }
+       gene_id=extractAttr("geneID=");
+       if (gene_id==NULL) {
+          gene_id=extractAttr("gene_id=");
+          }
+       if (is_gene) {
+         //special case: keep the Name and ID attributes of the gene feature
+         if (gene_name==NULL)
+              gene_name=extractAttr("Name=");
+         if (gene_id==NULL) //the ID is also gene_id in this case
+              gene_id=Gstrdup(ID);
+         //skip=false;
+         //return;
+         GFREE(Parent); //TMI, we really don't care about gene Parents?
+         } //gene feature
+       }// has GFF3 ID
+   if (Parent!=NULL) {
+        //keep Parent attr
+         //parse multiple parents
+         num_parents=1;
+         p=Parent;
+         int last_delim_pos=-1;
+         while (*p!=';' && *p!=0) {
+             if (*p==',' && *(p+1)!=0 && *(p+1)!=';') {
+                 num_parents++;
+                 last_delim_pos=(p-Parent);
+                 }
+             p++;
+             }
+         _parents_len=p-Parent+1;
+         _parents=Parent;
+         GMALLOC(parents, num_parents*sizeof(char*));
+         parents[0]=_parents;
+         int i=1;
+         if (last_delim_pos>0) {
+           for (p=_parents+1;p<=_parents+last_delim_pos;p++) {
+              if (*p==',') {
+                 char* ep=p-1;
+                 while (*ep==' ' && ep>_parents) ep--;
+                 *(ep+1)=0; //end the string there
+                 parents[i]=p+1;
+                 i++;
+                 }
+              }
+           }
+         } //has Parent field
+   } //GFF3
+  else { // GTF-like expected
+   Parent=extractAttr("transcript_id",true);
+   if (Parent!=NULL) { //GTF2 format detected
+     if (is_transcript) {
+         // atypical GTF with a parent transcript line declared
+         ID=Parent;
+         Parent=NULL;
+         }
+     gene_id=extractAttr("gene_id"); // for GTF this is the only attribute accepted as geneID
+     if (gene_id==NULL)
+       gene_id=extractAttr("geneid");
+     gene_name=extractAttr("gene_name");
+     if (gene_name==NULL) {
+
+           gene_name=extractAttr("gene_sym");
+           if (gene_name==NULL) {
+               gene_name=extractAttr("gene");
+               if (gene_name==NULL)
+                  gene_name=extractAttr("genesymbol");
+               }
+           }
+     //prepare for parseAttr by adding '=' character instead of spaces for all attributes
+     //after the attribute name
+     p=info;
+     bool noed=true; //not edited after the last delim
+     bool nsp=false; //non-space found after last delim
+     while (*p!=0) {
+       if (*p==' ') {
+          if (nsp && noed) {
+             *p='=';
+             noed=false;
+             p++;
+             continue;
+             }
+           }
+         else nsp=true; //non-space
+       if (*p==';') { noed=true; nsp=false; }
+       p++;
+       }
+     } //GTF2 detected (no parent line)
+    else {// Parent is NULL, check for jigsaw format or other pre-GTF2 format
+     //char* fexon=strstr(fnamelc, "exon");
+     //if (fexon!=NULL) {
+     if (exontype==exgffExon) {
+       if (startsWith(track,"jigsaw")) {
+          is_cds=true;
+          strcpy(track,"jigsaw");
+          p=strchr(info,';');
+          if (p==NULL) { Parent=Gstrdup(info); info=NULL; }
+           else { Parent=Gstrdup(info,p-1);
+                  info=p+1;
+                }
+          }
+        } //exon feature?
+        if (Parent==NULL && exontype>=exgffCDS &&
+               (i=strcspn(info,"; \t\n\r"))<=(int)(strlen(info)+1)) {
+          //one word ID ? really desperate attempt to parse it here
+          Parent=Gstrdup(info,info+i-1);
+          info=NULL; //discard anything else on the line
+          }
+     }
+   if (Parent!=NULL) { //GTF transcript_id for exon/CDS feature
+      _parents=Parent;
+      GMALLOC(parents,sizeof(char*));
+      num_parents=1;
+      parents[0]=_parents;
+      }
+   } //GTF-like
+
+ //parse other potentially useful features
+ if (is_gff3) {
+   if ((p=strstr(info,"Target="))!=NULL) { //has Target attr
+      p+=7;
+      while (*p!=';' && *p!=0 && *p!=' ') p++;
+      if (*p!=' ') {
+         GError("Error parsing target coordinates from GFF line:\n%s\n",l);
+         }
+      if (!parseUInt(p,qstart))
+         GError("Error parsing target start coordinate from GFF line:\n%s\n",l);
+      if (*p!=' ') {
+         GError("Error parsing next target coordinate from GFF line:\n%s\n",l);
+         }
+      p++;
+      if (!parseUInt(p,qend))
+         GError("Error parsing target end coordinate from GFF line:\n%s\n",l);
+      }
+   if ((p=strifind(info,"Qreg="))!=NULL) { //has Qreg attr
+       p+=5;
+       if (!parseUInt(p,qstart))
+         GError("Error parsing target start coordinate from GFF line:\n%s\n",l);
+       if (*p!='-') {
+          GError("Error parsing next target coordinate from GFF line:\n%s\n",l);
+          }
+       p++;
+       if (!parseUInt(p,qend))
+         GError("Error parsing target end coordinate from GFF line:\n%s\n",l);
+       if (*p=='|' || *p==':') {
+         p++;
+         if (!parseUInt(p,qlen))
+           GError("Error parsing target length from GFF Qreg|: \n%s\n",l);
+         }
+       }//has Qreg attr
+   if (qlen==0 && (p=strifind(info,"Qlen="))!=NULL) {
+     p+=5;
+     if (!parseUInt(p,qlen))
+         GError("Error parsing target length from GFF Qlen:\n%s\n",l);
+     }
+   }//parsing some useful attributes in GFF3 records
+ if (ID==NULL && parents==NULL) {
+      if (reader->gff_warns)
+          GMessage("Warning: could not parse ID or Parent from GFF line:\n%s\n",dupline);
+      return; //skip
+      }
+ skip=false;
+}
+
+
+void GffObj::addCDS(uint cd_start, uint cd_end, char phase) {
+  if (cd_start>=this->start) {
+        this->CDstart=cd_start;
+        if (strand=='+') this->CDphase=phase;
+        }
+      else this->CDstart=this->start;
+  if (cd_end<=this->end) {
+      this->CDend=cd_end;
+      if (strand=='-') this->CDphase=phase;
+      }
+     else this->CDend=this->end;
+  isTranscript(true);
+  exon_ftype_id=gff_fid_exon;
+  if (monoFeature()) {
+     if (exons.Count()==0) addExon(this->start, this->end,0,'.',0,0,false,exgffExon);
+            else exons[0]->exontype=exgffExon;
+     }
+}
+
+int GffObj::addExon(GffReader* reader, GffLine* gl, bool keepAttr, bool noExonAttr) {
+  //this will make sure we have the right subftype_id!
+  //int subf_id=-1;
+  if (!isTranscript() && gl->is_cds) {
+          isTranscript(true);
+          exon_ftype_id=gff_fid_exon;
+          if (exons.Count()==1) exons[0]->exontype=exgffExon;
+          }
+  if (isTranscript()) {
+     if (exon_ftype_id<0) {//exon_ftype_id=gff_fid_exon;
+          if (gl->exontype>0) exon_ftype_id=gff_fid_exon;
+                         else exon_ftype_id=names->feats.addName(gl->ftype);
+          }
+     //any recognized mRNA segment gets the generic "exon" type (also applies to CDS)
+     if (gl->exontype==0 && !gl->is_transcript) {
+          //extraneous mRNA feature, discard
+          if (reader->gff_warns)
+            GMessage("Warning: discarding unrecognized transcript subfeature '%s' of %s\n",
+                gl->ftype, gffID);
+          return -1;
+          }
+     }
+  else { //non-mRNA parent feature, check this subf type
+    int subf_id=names->feats.addName(gl->ftype);
+    if (exon_ftype_id<0 || exons.Count()==0) //never assigned a subfeature type before (e.g. first exon being added)
+       exon_ftype_id=subf_id;
+     else {
+       if (exon_ftype_id!=subf_id) {
+         //
+         if (exon_ftype_id==ftype_id && exons.Count()==1 && exons[0]->start==start && exons[0]->end==end) {
+            //the existing exon was just a dummy one created by default, discard it
+            exons.Clear();
+            covlen=0;
+            exon_ftype_id=subf_id; //allow the new subfeature to completely takeover
+            }
+         else { //multiple subfeatures, prefer those with
+             if (reader->gff_warns)
+               GMessage("GFF Warning: multiple subfeatures (%s and %s) found for %s, discarding ",
+                  names->feats.getName(subf_id), names->feats.getName(exon_ftype_id),gffID);
+            if (gl->exontype!=0) { //new feature is an exon, discard previously parsed subfeatures
+               if (reader->gff_warns) GMessage("%s.\n", names->feats.getName(exon_ftype_id));
+               exon_ftype_id=subf_id;
+               exons.Clear();
+               covlen=0;
+               }
+              else { //discard new feature
+               if (reader->gff_warns) GMessage("%s.\n", names->feats.getName(subf_id));
+               return -1; //skip this 2nd subfeature type for this parent!
+               }
+            }
+         } //incoming subfeature is of different type
+       } //new subfeature type
+    } //non-mRNA parent
+  int eidx=addExon(gl->fstart, gl->fend, gl->score, gl->phase,
+         gl->qstart,gl->qend, gl->is_cds, gl->exontype);
+  if (eidx<0) return eidx; //this should never happen
+  if (keepAttr) {
+     if (noExonAttr) {
+         if (attrs==NULL) //place the parsed attributes directly at transcript level
+           parseAttrs(attrs, gl->info);
+         }
+       else { //need all exon-level attributes
+         parseAttrs(exons[eidx]->attrs, gl->info, true);
+         }
+      }
+  return eidx;
+}
+
+
+int GffObj::addExon(uint segstart, uint segend, double sc, char fr, int qs, int qe, bool iscds, char exontype) {
+  if (exons.Count()==0) {
+      if (iscds) isCDS=true; //for now, assume CDS only if first "exon" given is a CDS
+      if (exon_ftype_id<0) {
+         exon_ftype_id = isTranscript() ? gff_fid_exon : ftype_id;
+         }
+      }
+  //special treatment of start/stop codon features, they might be broken/split between exons
+  //and in that case some providers will still give the wrong end coordinate as start+2 (e.g. UCSC)
+  //so we should not trust the end coordinate for such features
+  if (exontype==exgffStart || exontype==exgffStop) {
+     if (strand=='-') segstart=segend;
+                else  segend=segstart;
+     if (exontype==exgffStart) {
+           if (CDstart==0 || segstart<CDstart) CDstart=segstart;
+           }
+         else {
+           if (segstart>CDend) CDend=segstart;
+           }
+     }
+    else if (iscds) { //update CDS anchors:
+     if (CDstart==0 || segstart<CDstart)  {
+           CDstart=segstart;
+           if (exontype==exgffCDS && strand=='+') CDphase=fr;
+           }
+     if (segend>CDend) {
+           if (exontype==exgffCDS && strand=='-') CDphase=fr;
+           CDend=segend;
+           }
+     }
+   else { // not a CDS/start/stop
+     isCDS=false;
+     }
+  if (qs || qe) {
+    if (qs>qe) Gswap(qs,qe);
+    if (qs==0) qs=1;
+	}
+  int ovlen=0;
+  if (exontype>0) { //check for overlaps between exon-type segments
+      int oi=exonOverlapIdx(segstart, segend, &ovlen);
+      if (oi>=0) { //overlap existing segment
+         if (ovlen==0) {
+			  //adjacent segments will be merged
+			  //e.g. CDS to (UTR|exon)
+			  if ((exons[oi]->exontype>=exgffUTR && exontype==exgffCDS) ||
+				  (exons[oi]->exontype==exgffCDS && exontype>=exgffUTR)) {
+					expandExon(oi, segstart, segend, exgffCDSUTR, sc, fr, qs, qe);
+					return oi;
+					}
+			  //CDS adjacent to stop_codon: UCSC does (did?) this
+			  if ((exons[oi]->exontype==exgffStop && exontype==exgffCDS) ||
+				  (exons[oi]->exontype==exgffCDS && exontype==exgffStop)) {
+					expandExon(oi, segstart, segend, exgffCDS, sc, fr, qs, qe);
+					return oi;
+					}
+        }
+		 //only allow this for CDS within exon, stop_codon within (CDS|UTR|exon),
+         //                   start_codon within (CDS|exon)
+        if (exons[oi]->start<=segstart && exons[oi]->end>=segend) {
+          //larger segment given first, now the smaller included one is redundant
+           if (exons[oi]->exontype>exontype &&
+             !(exons[oi]->exontype==exgffUTR && exontype==exgffCDS)) {
+              return oi; //only used to store attributes from current GffLine
+              }
+           else {
+          	 if (gff_show_warnings && (exons[oi]->start<segstart || exons[oi]->end>segend)) {
+          		 GMessage("GFF Warning: unusual segment inclusion: %s(%d-%d) within %s(%d-%d) (ID=%s)\n",
+          				 strExonType(exontype), segstart, segend, strExonType(exons[oi]->exontype),
+          				 exons[oi]->start, exons[oi]->end, this->gffID);
+          	 }
+            return oi;
+           }
+        }
+        if (exontype>exons[oi]->exontype &&
+             segstart<=exons[oi]->start && segend>=exons[oi]->end &&
+             !(exontype==exgffUTR && exons[oi]->exontype==exgffCDS)) {
+               //smaller segment given first, so we have to enlarge it
+			  expandExon(oi, segstart, segend, exontype, sc, fr, qs, qe);
+				//this should also check for overlapping next exon (oi+1) ?
+              return oi;
+              }
+        //there is also the special case of "ribosomal slippage exception" (programmed frameshift)
+        //where two CDS segments may actually overlap for 1 or 2 bases, but there should be only one encompassing exon
+		//if (ovlen>2 || exons[oi]->exontype!=exgffCDS || exontype!=exgffCDS) {
+		// had to relax this because of some weird UCSC annotations with exons partially overlapping the CDS segments
+		/*
+		if (ovlen>2 && exons[oi]->exontype!=exgffUTR && exontype!=exgffUTR) {
+		   if (gff_show_warnings)
+			   GMessage("GFF Warning: discarding overlapping feature segment (%d-%d) (vs %d-%d (%s)) for GFF ID %s on %s\n",
+			   segstart, segend, exons[oi]->start, exons[oi]->end, getSubfName(), gffID, getGSeqName());
+		   hasErrors(true);
+		   return -1; //segment NOT added
+		   }
+		*/
+
+		 if ((ovlen>2 || ovlen==0) || exons[oi]->exontype!=exgffCDS || exontype!=exgffCDS) {
+		  if (gff_show_warnings)
+			 GMessage("GFF Warning: merging overlapping/adjacent feature segment %s (%d-%d) with %s (%d-%d) for GFF ID %s on %s\n",
+					 strExonType(exontype), segstart, segend, strExonType(exons[oi]->exontype), exons[oi]->start, exons[oi]->end, gffID, getGSeqName());
+			expandExon(oi, segstart, segend, exontype, sc, fr, qs, qe);
+			return oi;
+		 }
+		// else add the segment if the overlap is small and between two CDS segments
+		//TODO: we might want to add an attribute here with the slippage coordinate and size?
+        covlen-=ovlen;
+		}//overlap or adjacent to existing segment
+	 } //check for overlap
+   // --- no overlap, or accepted micro-overlap (ribosomal slippage)
+   // create & add the new segment
+   /*
+   if (start>0 && exontype==exgffCDS && exons.Count()==0) {
+      //adding a CDS directly as the first subfeature of a declared parent
+      segstart=start;
+      segend=end;
+      }
+   */
+   GffExon* enew=new GffExon(segstart, segend, sc, fr, qs, qe, exontype);
+   int eidx=exons.Add(enew);
+   if (eidx<0) {
+    //this would actually be acceptable if the object is a "Gene" and "exons" are in fact isoforms
+     if (gff_show_warnings)
+       GMessage("GFF Warning: failed adding segment %d-%d for %s (discarded)!\n",
+            segstart, segend, gffID);
+     delete enew;
+     hasErrors(true);
+     return -1;
+     }
+   covlen+=(int)(exons[eidx]->end-exons[eidx]->start)+1;
+   //adjust parent feature coordinates to contain this exon
+   if (start==0 || start>exons.First()->start) {
+     start=exons.First()->start;
+     }
+   if (end<exons.Last()->end) end=exons.Last()->end;
+
+   return eidx;
+}
+
+void GffObj::expandExon(int oi, uint segstart, uint segend, char exontype, double sc, char fr, int qs, int qe) {
+  //oi is the index of the *first* overlapping segment found that must be enlarged
+  covlen-=exons[oi]->len();
+  if (segstart<exons[oi]->start)
+    exons[oi]->start=segstart;
+  if (qs && qs<exons[oi]->qstart) exons[oi]->qstart=qs;
+  if (segend>exons[oi]->end)
+    exons[oi]->end=segend;
+  if (qe && qe>exons[oi]->qend) exons[oi]->qend=qe;
+  //warning: score cannot be properly adjusted! e.g. if it's a p-value it's just going to get worse
+  if (sc!=0) exons[oi]->score=sc;
+  covlen+=exons[oi]->len();
+  //if (exons[oi]->exontype< exontype) -- always true
+  exons[oi]->exontype = exontype;
+  if (exontype==exgffCDS) exons[oi]->phase=fr;
+  //we must check if any more exons are also overlapping this
+  int ni=oi+1; //next exon index after oi
+  while (ni<exons.Count() && segend>=exons[ni]->start) { // next segment overlaps new enlarged segment
+     //only allow this if next segment is fully included, and a subordinate
+     if (exons[ni]->exontype<exontype && exons[ni]->end<=segend) {
+/* I guess we have to relax this due to stupid UCSC hg18 files having a start_codon sticking out
+chr1	hg18_knownGene	start_codon	69806911	69806913	0.000000	+	.
+chr1	hg18_knownGene	CDS	69806911	69806912	0.000000	+	0
+chr1	hg18_knownGene	exon	69805456	69806912	0.000000	+	.
+*/
+         if (exons[ni]->qstart<exons[oi]->qstart) exons[oi]->qstart=exons[ni]->qstart;
+         if (exons[ni]->qend>exons[oi]->qend) exons[oi]->qend=exons[ni]->qend;
+         exons.Delete(ni);
+         }
+      else {
+         if (gff_show_warnings) GMessage("GFF Warning: overlapping existing exon(%d-%d) while expanding to %d-%d for GFF ID %s\n",
+                exons[ni]->start, exons[ni]->end, segstart, segend, gffID);
+         //hasErrors(true);
+         break;
+         }
+     }
+  // -- make sure any other related boundaries are updated:
+  start=exons.First()->start;
+  end=exons.Last()->end;
+  if (uptr!=NULL) { //collect stats about the underlying genomic sequence
+    GSeqStat* gsd=(GSeqStat*)uptr;
+    if (start<gsd->mincoord) gsd->mincoord=start;
+    if (end>gsd->maxcoord) gsd->maxcoord=end;
+    if (this->len()>gsd->maxfeat_len) {
+        gsd->maxfeat_len=this->len();
+        gsd->maxfeat=this;
+        }
+    }
+}
+
+void GffObj::removeExon(int idx) {
+  /*
+   if (idx==0 && segs[0].start==gstart)
+                  gstart=segs[1].start;
+   if (idx==segcount && segs[segcount].end==gend)
+                  gend=segs[segcount-1].end;
+  */
+  if (idx<0 || idx>=exons.Count()) return;
+  int segstart=exons[idx]->start;
+  int segend=exons[idx]->end;
+  exons.Delete(idx);
+  covlen -= (int)(segend-segstart)+1;
+  start=exons.First()->start;
+  end=exons.Last()->end;
+  if (isCDS) { CDstart=start; CDend=end; }
+}
+
+void GffObj::removeExon(GffExon* p) {
+  for (int idx=0;idx<exons.Count();idx++) {
+     if (exons[idx]==p) {
+        int segstart=exons[idx]->start;
+        int segend=exons[idx]->end;
+        exons.Delete(idx);
+        covlen -= (int)(segend-segstart)+1;
+
+	if (exons.Count() > 0) {
+	  start=exons.First()->start;
+	  end=exons.Last()->end;
+	  if (isCDS) { CDstart=start; CDend=end; }
+	}
+        return;
+        }
+     }
+}
+
+
+
+GffObj::GffObj(GffReader *gfrd, GffLine* gffline, bool keepAttr, bool noExonAttr):
+     GSeg(0,0), exons(true,true,false), children(1,false) {
+  xstart=0;
+  xend=0;
+  xstatus=0;
+  partial=false;
+  isCDS=false;
+  uptr=NULL;
+  ulink=NULL;
+  parent=NULL;
+  udata=0;
+  flags=0;
+  CDstart=0;
+  CDend=0;
+  CDphase=0;
+  geneID=NULL;
+  gene_name=NULL;
+  attrs=NULL;
+  gffID=NULL;
+  track_id=-1;
+  gseq_id=-1;
+  ftype_id=-1;
+  exon_ftype_id=-1;
+  strand='.';
+  if (gfrd==NULL)
+    GError("Cannot use this GffObj constructor with a NULL GffReader!\n");
+  gffnames_ref(names);
+  if (gfrd->names==NULL) gfrd->names=names;
+  //qlen=0;qstart=0;qend=0;
+  gscore=0;
+  uscore=0;
+  covlen=0;
+  qcov=0;
+  start=gffline->fstart;
+  end=gffline->fend;
+  gseq_id=names->gseqs.addName(gffline->gseqname);
+  track_id=names->tracks.addName(gffline->track);
+  strand=gffline->strand;
+  qlen=gffline->qlen;
+  qstart=gffline->qstart;
+  qend=gffline->qend;
+  //setup flags from gffline
+  isCDS=gffline->is_cds; //for now
+  isGene(gffline->is_gene);
+  isTranscript(gffline->is_transcript || gffline->exontype!=0);
+  //fromGff3(gffline->is_gff3);
+
+  if (gffline->parents!=NULL && !gffline->is_transcript) {
+    //GTF style -- create a GffObj directly by subfeature
+    //(also possible orphan GFF3 exon line, or an exon given before its parent (chado))
+    if (gffline->exontype!=0) { //recognized exon-like feature
+       ftype_id=gff_fid_transcript; //so this is some sort of transcript
+       exon_ftype_id=gff_fid_exon; //subfeatures MUST be exons
+       }
+     else {//unrecognized subfeatures
+       //make this GffObj of the same feature type
+       ftype_id=names->feats.addName(gffline->ftype);
+       }
+    if (gffline->ID==NULL) { //typical GTF2 without "transcript" line
+        gffID=Gstrdup(gffline->parents[0]);
+        this->createdByExon(true);
+        //this is likely the first exon/segment of the feature
+        addExon(gfrd, gffline, keepAttr, noExonAttr);
+        }
+      else { //a parented feature with an ID: orphan or premature GFF3 subfeature line
+        if (gffline->is_gff3 && gffline->exontype!=0) {
+             //premature exon given before its parent transcript
+             //create the transcript entry here
+             gffID=Gstrdup(gffline->parents[0]);
+             this->createdByExon(true);
+             //this is the first exon/segment of the transcript
+             addExon(gfrd, gffline, keepAttr, noExonAttr);
+             }
+        else { //unrecognized non-exon feature ? use the ID instead
+             this->hasGffID(true);
+             gffID=Gstrdup(gffline->ID);
+             if (keepAttr) this->parseAttrs(attrs, gffline->info);
+             }
+        }
+    } //non-transcript parented subfeature given directly
+  else {
+	//non-parented feature OR a recognizable transcript
+    //create a parent feature in its own right
+    gscore=gffline->score;
+    if (gffline->ID==NULL || gffline->ID[0]==0)
+      GError("Error: no ID found for GFF record start\n");
+    this->hasGffID(true);
+    gffID=Gstrdup(gffline->ID); //there must be an ID here
+    //if (gffline->is_transcript) ftype_id=gff_fid_mRNA;
+      //else
+    ftype_id=names->feats.addName(gffline->ftype);
+    if (gffline->is_transcript)
+      exon_ftype_id=gff_fid_exon;
+    if (keepAttr) this->parseAttrs(attrs, gffline->info);
+    }//no parent
+
+  if (gffline->gene_name!=NULL) {
+     gene_name=Gstrdup(gffline->gene_name);
+     }
+  if (gffline->gene_id) {
+     geneID=Gstrdup(gffline->gene_id);
+     }
+  else if (gffline->is_transcript && gffline->parents) {
+	 geneID=Gstrdup(gffline->parents[0]);
+     }
+
+  //GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,names->gseqs.lastNameUsed()),true);
+  GSeqStat* gsd=gfrd->gseqstats.AddIfNew(new GSeqStat(gseq_id,gffline->gseqname), true);
+  uptr=gsd;
+  /*
+  if (start<gsd->mincoord) gsd->mincoord=start;
+  if (end>gsd->maxcoord) gsd->maxcoord=end;
+    if (this->len()>gsd->maxfeat_len) {
+        gsd->maxfeat_len=this->len();
+        gsd->maxfeat=this;
+        }
+  */
+}
+
+GffLine* GffReader::nextGffLine() {
+ if (gffline!=NULL) return gffline; //caller should free gffline after processing
+ while (gffline==NULL) {
+    int llen=0;
+    buflen=GFF_LINELEN-1;
+    char* l=fgetline(linebuf, buflen, fh, &fpos, &llen);
+    if (l==NULL) {
+         return NULL; //end of file
+         }
+
+
+    // _crc_result.process_bytes( linebuf, llen );
+    int ns=0; //first nonspace position
+    while (l[ns]!=0 && isspace(l[ns])) ns++;
+    if (l[ns]=='#' || llen<10) continue;
+    gffline=new GffLine(this, l);
+    if (gffline->skip) {
+       delete gffline;
+       gffline=NULL;
+       continue;
+       }
+    if (gffline->ID==NULL && gffline->parents==NULL)  { //it must have an ID
+        //this might not be needed, already checked in the GffLine constructor
+        if (gff_warns)
+            GMessage("Warning: malformed GFF line, no parent or record Id (kipping\n");
+        delete gffline;
+        gffline=NULL;
+        //continue;
+        }
+    }
+return gffline;
+}
+
+
+char* GffReader::gfoBuildId(const char* id, const char* ctg) {
+//caller must free the returned pointer
+ char* buf=NULL;
+ int idlen=strlen(id);
+ GMALLOC(buf, idlen+strlen(ctg)+2);
+ strcpy(buf, id);
+ buf[idlen]='~';
+ strcpy(buf+idlen+1, ctg);
+ return buf;
+}
+/*
+void GffReader::gfoRemove(const char* id, const char* ctg) {
+ char* buf=gfoBuildId(id,ctg);
+ phash.Remove(buf);
+ GFREE(buf);
+}
+*/
+GffObj* GffReader::gfoAdd(GffObj* gfo) {
+ GPVec<GffObj>* glst=phash.Find(gfo->gffID);
+ if (glst==NULL)
+	 glst=new GPVec<GffObj>(false);
+ //GfoHolder gh(gfo); //,idx);
+ int i=glst->Add(gfo);
+ phash.Add(gfo->gffID, glst);
+ return glst->Get(i);
+}
+
+GffObj* GffReader::gfoAdd(GPVec<GffObj>& glst, GffObj* gfo) {
+ int i=glst.Add(gfo);
+ return glst[i];
+}
+
+GffObj* GffReader::gfoFind(const char* id, const char* ctg,
+	            GPVec<GffObj>** glst, char strand, uint start, uint end) {
+ GPVec<GffObj>* gl=phash.Find(id);
+ GffObj* gh=NULL;
+ if (gl) {
+   for (int i=0;i<gl->Count();i++) {
+      GffObj& gfo = *(gl->Get(i));
+      if (ctg!=NULL && strcmp(ctg, gfo.getGSeqName())!=0)
+           continue;
+      if (strand && gfo.strand!='.' && strand != gfo.strand)
+           continue;
+      if (start>0) {
+	       if (abs((int)start-(int)gfo.start)> (int)GFF_MAX_LOCUS)
+               continue;
+           if (end>0 && (gfo.start>end || gfo.end<start))
+        	   continue;
+           }
+      //must be the same transcript, according to given comparison criteria
+      gh=&gfo;
+      break;
+      }
+   }
+ if (glst) *glst=gl;
+ return gh;
+}
+/*
+GffObj* GffReader::replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx) {
+  GffObj* newgfo=new GffObj(this, gffline, keepAttr, noExonAttr);
+  GffObj* r=NULL;
+  if (replaceidx>=0) {
+     gflst.Put(replaceidx,newgfo);
+     r=gfoAdd(newgfo);
+     }
+   else {
+     int gfoidx=gflst.Add(newgfo);
+     r=gfoAdd(newgfo);
+     }
+  return r;
+} */
+
+GffObj* GffReader::updateParent(GffObj* newgfo, GffObj* parent) {
+  //assert(parent);
+  //assert(newgfo);
+  parent->children.Add(newgfo);
+  if (newgfo->parent==NULL) newgfo->parent=parent;
+  newgfo->setLevel(parent->getLevel()+1);
+  if (parent->isGene()) {
+      if (parent->gene_name!=NULL && newgfo->gene_name==NULL)
+        newgfo->gene_name=Gstrdup(parent->gene_name);
+      if (parent->geneID!=NULL && newgfo->geneID==NULL)
+        newgfo->geneID=Gstrdup(parent->geneID);
+      }
+
+  return newgfo;
+}
+
+GffObj* GffReader::newGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr,
+                          GffObj* parent, GffExon* pexon, GPVec<GffObj>* glst) {
+  GffObj* newgfo=new GffObj(this, gffline, keepAttr, noExonAttr);
+  GffObj* r=NULL;
+  //int gfoidx=gflst.Add(newgfo);
+  gflst.Add(newgfo);
+  r=(glst) ? gfoAdd(*glst, newgfo) : gfoAdd(newgfo);
+  if (parent!=NULL) {
+    updateParent(r, parent);
+    if (pexon!=NULL) parent->removeExon(pexon);
+    }
+  /*
+  if (gff_warns) {
+    int* pcount=tids.Find(newgfo->gffID);
+    if (pcount!=NULL) {
+       if (gff_warns) GMessage("Warning: duplicate GFF ID: %s\n", newgfo->gffID);
+       (*pcount)++;
+       }
+     else {
+       tids.Add(newgfo->gffID,new int(1));
+       }
+    }
+  */
+  return r;
+}
+
+GffObj* GffReader::updateGffRec(GffObj* prevgfo, GffLine* gffline,
+                                         bool keepAttr) {
+ if (prevgfo==NULL) return NULL;
+ //prevgfo->gffobj->createdByExon(false);
+ prevgfo->ftype_id=prevgfo->names->feats.addName(gffline->ftype);
+ prevgfo->start=gffline->fstart;
+ prevgfo->end=gffline->fend;
+ prevgfo->isGene(gffline->is_gene);
+ prevgfo->isTranscript(gffline->is_transcript || gffline->exontype!=0);
+ prevgfo->hasGffID(gffline->ID!=NULL);
+ if (keepAttr) {
+   if (prevgfo->attrs!=NULL) prevgfo->attrs->Clear();
+   prevgfo->parseAttrs(prevgfo->attrs, gffline->info);
+   }
+ return prevgfo;
+}
+
+
+bool GffReader::addExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon>& pex, bool noExonAttr) {
+  bool r=true;
+  if (gffline->strand!=prevgfo->strand) {
+        if (prevgfo->strand=='.') {
+            prevgfo->strand=gffline->strand;
+        }
+     else {
+       GMessage("GFF Error at %s (%c): exon %d-%d (%c) found on different strand; discarded.\n",
+       prevgfo->gffID, prevgfo->strand,
+       gffline->fstart, gffline->fend, gffline->strand, prevgfo->getGSeqName());
+       //r=false;
+       return true;
+       }
+   }
+  int gdist=(gffline->fstart>prevgfo->end) ? gffline->fstart-prevgfo->end :
+                      ((gffline->fend<prevgfo->start)? prevgfo->start-gffline->fend :
+                         0 );
+  if (gdist>(int)GFF_MAX_LOCUS) { //too far apart, most likely this is a duplicate ID
+    GMessage("Error: duplicate GFF ID '%s' (or exons too far apart)!\n",prevgfo->gffID);
+    //validation_errors = true;
+    r=false;
+    if (!gff_warns) exit(1);
+    }
+  int eidx=prevgfo->addExon(this, gffline, !noExonAttr, noExonAttr);
+  if (eidx>=0 && gffline->ID!=NULL && gffline->exontype==0)
+      subfPoolAdd(pex, prevgfo);
+  return r;
+}
+
+CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name) {
+  CNonExon* subp=NULL;
+  subp_name=NULL;
+  for (int i=0;i<gffline->num_parents;i++) {
+    if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL)
+        continue;
+    subp_name=gfoBuildId(gffline->parents[i], gffline->gseqname); //e.g. mRNA name
+    subp=pex.Find(subp_name);
+    if (subp!=NULL)
+       return subp;
+    GFREE(subp_name);
+    }
+  return NULL;
+}
+
+void GffReader::subfPoolAdd(GHash<CNonExon>& pex, GffObj* newgfo) {
+//this might become a parent feature later
+if (newgfo->exons.Count()>0) {
+   char* xbuf=gfoBuildId(gffline->ID, gffline->gseqname);
+   pex.Add(xbuf, new CNonExon(newgfo, newgfo->exons[0], gffline));
+   GFREE(xbuf);
+   }
+}
+
+GffObj* GffReader::promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex,
+    bool keepAttr, bool noExonAttr) {
+  GffObj* prevp=subp->parent; //grandparent of gffline (e.g. gene)
+  //if (prevp!=gflst[subp->idx])
+  //  GError("Error promoting subfeature %s, gflst index mismatch?!\n", subp->gffline->ID);
+  subp->gffline->discardParent();
+  GffObj* gfoh=newGffRec(subp->gffline, keepAttr, noExonAttr, prevp, subp->exon);
+  pex.Remove(subp_name); //no longer a potential parent, moved it to phash already
+  prevp->promotedChildren(true);
+  return gfoh; //returns the holder of newly promoted feature
+}
+
+//have to parse the whole file because exons and other subfeatures can be scattered, unordered in the input
+//Trans-splicing and fusions are only accepted in proper GFF3 format, i.e. multiple features with the same ID
+//are accepted if they are NOT overlapping/continuous
+//  *** BUT (exception): proximal xRNA features with the same ID, on the same strand, will be merged
+//  and the segments will be treated like exons (e.g. TRNAR15 (rna1940) in RefSeq)
+void GffReader::readAll(bool keepAttr, bool mergeCloseExons, bool noExonAttr) {
+  bool validation_errors = false;
+  //loc_debug=false;
+  GHash<CNonExon> pex; //keep track of any "exon"-like features that have an ID
+                     //and thus could become promoted to parent features
+  while (nextGffLine()!=NULL) {
+     GffObj* prevseen=NULL;
+     GPVec<GffObj>* prevgflst=NULL;
+     if (gffline->ID && gffline->exontype==0) {
+	 //>> for a parent-like IDed feature (mRNA, gene, etc.)
+		 //look for same ID on the same chromosome/strand/locus
+		 prevseen=gfoFind(gffline->ID, gffline->gseqname, &prevgflst, gffline->strand, gffline->fstart);
+		 if (prevseen!=NULL) {
+				//same ID/chromosome combo encountered before
+				if (prevseen->createdByExon()) {
+					  if (gff_show_warnings && (prevseen->start<gffline->fstart ||
+					        prevseen->end>gffline->fend))
+					  	GMessage("GFF Warning: invalid coordinates for %s parent feature (ID=%s)\n", gffline->ftype, gffline->ID);
+					//an exon of this ID was given before
+					//this line has the main attributes for this ID
+					  updateGffRec(prevseen, gffline, keepAttr);
+					  }
+				 else {
+					//- duplicate ID -- this must be a discontinuous feature according to GFF3 specs
+				   //   e.g. a trans-spliced transcript
+				   if (prevseen->overlap(gffline->fstart, gffline->fend)) {
+					  //overlapping with same ID not allowed
+					 GMessage("GFF Error: duplicate/invalid '%s' feature ID=%s\n", gffline->ftype, gffline->ID);
+					 //validation_errors = true;
+					 if (gff_warns) {
+					 	   delete gffline;
+					 	   gffline=NULL;
+					 	   continue;
+					 	   }
+					 else exit(1);
+				     }
+				    //create a new entry with the same ID
+				   int distance=INT_MAX;
+				   if (prevseen->isTranscript() && prevseen->strand==gffline->strand) {
+				  	 if (prevseen->start>=gffline->fstart)
+				  		    distance=prevseen->start-gffline->fend;
+				  	 else
+				  		 distance=gffline->fstart-prevseen->end;
+				   }
+				   if (distance<1000) {//FIXME: arbitrary proximity threshold (yuck)
+				  	 //exception: make this an exon of previous ID
+				  	 //addExonFeature(prevseen, gffline, pex, noExonAttr);
+				  	 prevseen->addExon(this, gffline, false, true);
+				   }
+				   else { //create a separate entry (true discontinuous feature)
+				  	   prevseen=newGffRec(gffline, keepAttr, noExonAttr,
+				  	          prevseen->parent, NULL, prevgflst);
+				        }
+					} //duplicate ID on the same chromosome
+				} //prevseeen != NULL
+       } //parent-like ID feature
+    if (gffline->parents==NULL) {//start GFF3-like record with no parent (mRNA, gene)
+       if (!prevseen) newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, prevgflst);
+       }
+    else { //--- it's a child feature (exon/CDS but could still be a mRNA with gene(s) as parent)
+       //updates all the declared parents with this child
+       bool found_parent=false;
+       GffObj* newgfo=prevseen;
+       GPVec<GffObj>* newgflst=NULL;
+       for (int i=0;i<gffline->num_parents;i++) {
+            if (transcriptsOnly && discarded_ids.Find(gffline->parents[i])!=NULL)
+                continue; //skipping discarded parent feature
+            GffObj* parentgfo=NULL;
+            if (gffline->is_transcript || gffline->exontype==0) {//possibly a transcript
+               parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
+                                    &newgflst, gffline->strand, gffline->fstart, gffline->fend);
+               }
+            else {
+               //for exon-like entities we only need a parent to be in locus distance,
+               //on the same strand
+               parentgfo=gfoFind(gffline->parents[i], gffline->gseqname,
+                                     &newgflst, gffline->strand, gffline->fstart);
+               }
+            if (parentgfo!=NULL) { //parent GffObj parsed earlier
+                   found_parent=true;
+                   if (parentgfo->isGene() && gffline->is_transcript
+                                   && gffline->exontype==0) {
+                       //not an exon, but a transcript parented by a gene
+                       if (newgfo) {
+                           updateParent(newgfo, parentgfo);
+                           }
+                         else {
+                           newgfo=newGffRec(gffline, keepAttr, noExonAttr, parentgfo);
+                           }
+                   }
+                   else { //potential exon subfeature?
+                  	 //always discards dummy "intron" features
+                       if (!(gffline->exontype==exgffIntron && (parentgfo->isTranscript() || parentgfo->exons.Count()>0))) {
+                        if (!addExonFeature(parentgfo, gffline, pex, noExonAttr))
+                          validation_errors=true;
+                       }
+                   }
+                } //overlapping parent feature found
+            } //for each parsed parent Id
+       if (!found_parent) { //new GTF-like record starting here with a subfeature directly
+             //or it could be some chado GFF3 barf with exons coming BEFORE their parent :(
+            //check if this feature isn't parented by a previously stored "exon" subfeature
+            char* subp_name=NULL;
+            CNonExon* subp=subfPoolCheck(gffline, pex, subp_name);
+            if (subp!=NULL) { //found a subfeature that is the parent of this gffline
+               //promote that subfeature to a full GffObj
+               GffObj* gfoh=promoteFeature(subp, subp_name, pex, keepAttr, noExonAttr);
+               //add current gffline as an exon of the newly promoted subfeature
+               if (!addExonFeature(gfoh, gffline, pex, noExonAttr))
+                      validation_errors=true;
+               }
+              else { //no parent seen before,
+               //loc_debug=true;
+               GffObj* ngfo=prevseen;
+               if (ngfo==NULL) {
+                   //if it's an exon type, create directly the parent with this exon
+                   //but if it's recognized as a transcript, the object itself is created
+                   ngfo=newGffRec(gffline, keepAttr, noExonAttr, NULL, NULL, newgflst);
+                   }
+               if (!ngfo->isTranscript() &&
+                     gffline->ID!=NULL && gffline->exontype==0)
+                     subfPoolAdd(pex, ngfo);
+               //even those with errors will be added here!
+               }
+            GFREE(subp_name);
+            } //no previous parent found
+       } //parented feature
+        //--
+      delete gffline;
+      gffline=NULL;
+      }//while gff lines
+  if (gflst.Count()>0) {
+    gflst.finalize(this, mergeCloseExons, keepAttr, noExonAttr); //force sorting by locus if so constructed
+    gseqStats.setCount(gseqstats.Last()->gseqid+1);
+    for (int gi=0;gi<gseqstats.Count();gi++) {
+        gseqStats.Put(gseqstats[gi]->gseqid, gseqstats[gi]); //copy the pointer only
+    }
+  }
+ // all gff records are now loaded in GList gflst
+ // so we can free the hash
+  phash.Clear();
+  //tids.Clear();
+  if (validation_errors) {
+    exit(1);
+    }
+}
+
+void GfList::finalize(GffReader* gfr, bool mergeCloseExons,
+             bool keepAttrs, bool noExonAttr) { //if set, enforce sort by locus
+  if (mustSort) { //force (re-)sorting
+     this->setSorted(false);
+     this->setSorted((GCompareProc*)gfo_cmpByLoc);
+     }
+  GList<GffObj> discarded(false,true,false);
+  for (int i=0;i<Count();i++) {
+    //finish the parsing for each GffObj
+    fList[i]->finalize(gfr, mergeCloseExons, keepAttrs, noExonAttr);
+    if (fList[i]->isDiscarded()) {
+       discarded.Add(fList[i]);
+       if (fList[i]->children.Count()>0) {
+      	 for (int c=0;c<fList[i]->children.Count();c++) {
+      		 fList[i]->children[c]->parent=NULL;
+      		 if (keepAttrs)
+      			 fList[i]->children[c]->copyAttrs(fList[i]); //inherit the attributes of discarded parent (e.g. pseudo=true; )
+      	 }
+       }
+       this->Forget(i);
+    }
+  }
+  if (discarded.Count()>0) {
+          this->Pack();
+          }
+}
+
+GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, bool noExonAttr) {
+ //merge
+ //always merge adjacent or overlapping segments
+ //but if mergeCloseExons then merge even when distance is up to 5 bases
+ udata=0;
+ uptr=NULL;
+ if (gfr->transcriptsOnly && !(isTranscript() || (isGene() && children.Count()==0))) {
+       isDiscarded(true);
+       }
+ if (ftype_id==gff_fid_transcript && CDstart>0) {
+    ftype_id=gff_fid_mRNA;
+    //exon_ftype_id=gff_fid_exon;
+    }
+ if (exons.Count()>0 && (isTranscript() || exon_ftype_id==gff_fid_exon)) {
+   if (mergeCloseExons) {
+     int mindist=mergeCloseExons ? 5:1;
+     for (int i=0;i<exons.Count()-1;i++) {
+       int ni=i+1;
+       uint mend=exons[i]->end;
+       while (ni<exons.Count()) {
+         int dist=(int)(exons[ni]->start-mend);
+         if (dist>mindist) break; //no merging with next segment
+         if (gfr!=NULL && gfr->gff_warns && dist!=0 && (exons[ni]->exontype!=exgffUTR && exons[i]->exontype!=exgffUTR)) {
+            GMessage("GFF warning: merging adjacent/overlapping segments of %s on %s (%d-%d, %d-%d)\n",
+                 gffID, getGSeqName(), exons[i]->start, exons[i]->end,exons[ni]->start, exons[ni]->end);
+            }
+         mend=exons[ni]->end;
+         covlen-=exons[i]->len();
+         exons[i]->end=mend;
+         covlen+=exons[i]->len();
+         covlen-=exons[ni]->len();
+         if (exons[ni]->attrs!=NULL && (exons[i]->attrs==NULL ||
+              exons[i]->attrs->Count()<exons[ni]->attrs->Count())) {
+                //use the other exon attributes, if more
+                delete(exons[i]->attrs);
+                exons[i]->attrs=exons[ni]->attrs;
+                exons[ni]->attrs=NULL;
+                }
+         exons.Delete(ni);
+         } //check for merge with next exon
+     } //for each exon
+   } //merge close exons
+   //shrink transcript to the exons' span
+   this->start=exons.First()->start;
+   this->end=exons.Last()->end;
+   //also update the stats for the reference sequence
+   if (uptr!=NULL) { //collect stats about the underlying genomic sequence
+       GSeqStat* gsd=(GSeqStat*)uptr;
+       if (start<gsd->mincoord) gsd->mincoord=start;
+       if (end>gsd->maxcoord) gsd->maxcoord=end;
+       if (this->len()>gsd->maxfeat_len) {
+          gsd->maxfeat_len=this->len();
+          gsd->maxfeat=this;
+          }
+       }
+   this->uptr=NULL;
+   this->udata=0;
+ }
+ //attribute reduction for GTF records
+ if (keepAttrs && !noExonAttr && !hasGffID()
+          && exons.Count()>0 && exons[0]->attrs!=NULL) {
+   bool attrs_discarded=false;
+   for (int a=0;a<exons[0]->attrs->Count();a++) {
+      int attr_name_id=exons[0]->attrs->Get(a)->attr_id;
+      char* attr_name=names->attrs.getName(attr_name_id);
+      char* attr_val =exons[0]->attrs->Get(a)->attr_val;
+      bool sameExonAttr=true;
+      for (int i=1;i<exons.Count();i++) {
+         char* ov=exons[i]->getAttr(attr_name_id);
+         if (ov==NULL || (strcmp(ov,attr_val)!=0)) {
+             sameExonAttr=false;
+             break;
+             }
+         }
+      if (sameExonAttr) {
+             //delete this attribute from exons level
+             attrs_discarded=true;
+             this->addAttr(attr_name, attr_val);
+             for (int i=1;i<exons.Count();i++) {
+                 removeExonAttr(*(exons[i]), attr_name_id);
+                 }
+             exons[0]->attrs->freeItem(a);
+             }
+      }
+   if (attrs_discarded) exons[0]->attrs->Pack();
+   }
+ return this;
+}
+
+void GffObj::parseAttrs(GffAttrs*& atrlist, char* info, bool isExon) {
+  if (names==NULL)
+     GError(ERR_NULL_GFNAMES, "parseAttrs()");
+  if (atrlist==NULL)
+      atrlist=new GffAttrs();
+  char* endinfo=info+strlen(info);
+  char* start=info;
+  char* pch=start;
+  while (start<endinfo) {
+    //skip spaces
+    while (*start==' ' && start<endinfo) start++;
+    pch=strchr(start, ';');
+    if (pch==NULL) pch=endinfo;
+       else {
+            *pch='\0';
+            pch++;
+            }
+    char* ech=strchr(start,'=');
+    if (ech!=NULL) { // attr=value format found
+       *ech='\0';
+       //if (noExonAttr && (strcmp(start, "exon_number")==0 || strcmp(start, "exon")==0)) { start=pch; continue; }
+       if (strcmp(start, "exon_number")==0 || strcmp(start, "exon")==0 ||
+              strcmp(start, "exon_id")==0)
+           { start=pch; continue; }
+       ech++;
+       while (*ech==' ' && ech<endinfo) ech++;//skip extra spaces after the '='
+       //atrlist->Add(new GffAttr(names->attrs.addName(start),ech));
+       //make sure we don't add the same attribute more than once
+       if (isExon && (strcmp(start, "protein_id")==0)) {
+             //Ensembl special case
+             this->addAttr(start, ech);
+             start=pch;
+             continue;
+             }
+       atrlist->add_or_update(names, start, ech);
+       }
+      /*
+      else { //not an attr=value format
+        atrlist->Add(new GffAttr(names->attrs.addName(start),"1"));
+        }
+      */
+    start=pch;
+    }
+  if (atrlist->Count()==0) { delete atrlist; atrlist=NULL; }
+}
+
+void GffObj::addAttr(const char* attrname, const char* attrvalue) {
+  if (this->attrs==NULL)
+      this->attrs=new GffAttrs();
+  //this->attrs->Add(new GffAttr(names->attrs.addName(attrname),attrvalue));
+  this->attrs->add_or_update(names, attrname, attrvalue);
+}
+
+void GffObj::copyAttrs(GffObj* from) { //typically from is the parent gene, and this is a transcript
+	if (from==NULL || from->attrs==NULL) return;
+	if (this->attrs==NULL) {
+		this->attrs=new GffAttrs();
+	}
+	//special RefSeq case
+	int desc_attr_id=names->attrs.getId("description"); //from gene
+	int prod_attr_id=names->attrs.getId("product"); //from transcript (this)
+	char* prod = (prod_attr_id>=0) ? this->attrs->getAttr(prod_attr_id) : NULL;
+
+	for (int i=0;i<from->attrs->Count();++i) {
+		//this->attrs->add_no_update(names, from->attrs->Get(i)->attr_id, from->attrs->Get(i)->attr_val);
+		int aid=from->attrs->Get(i)->attr_id;
+		//special case for GenBank refseq genes vs transcripts:
+		if (prod && aid==desc_attr_id && strcmp(from->attrs->getAttr(desc_attr_id), prod)==0)
+			continue; //skip description if product already there and the same
+		bool haveit=false;
+		for (int ai=0;ai<this->attrs->Count();++ai) {
+			//do we have it already?
+			if (aid==this->attrs->Get(i)->attr_id) {
+				haveit=true;
+				break; //skip this, don't replace
+			}
+		}
+		if (!haveit)
+			this->attrs->Add(new GffAttr(aid, from->attrs->Get(i)->attr_val));
+	}
+}
+
+void GffObj::setFeatureName(const char* feature) {
+ //change the feature name/type for a transcript
+ int fid=names->feats.addName(feature);
+ if (monoFeature() && exons.Count()>0)
+    this->exon_ftype_id=fid;
+ this->ftype_id=fid;
+}
+
+void GffObj::setRefName(const char* newname) {
+ //change the feature name/type for a transcript
+ int rid=names->gseqs.addName(newname);
+ this->gseq_id=rid;
+}
+
+
+
+int GffObj::removeAttr(const char* attrname, const char* attrval) {
+  if (this->attrs==NULL || attrname==NULL || attrname[0]==0) return 0;
+  int aid=this->names->attrs.getId(attrname);
+  if (aid<0) return 0;
+  int delcount=0;  //could be more than one ?
+  for (int i=0;i<this->attrs->Count();i++) {
+     if (aid==this->attrs->Get(i)->attr_id) {
+       if (attrval==NULL ||
+          strcmp(attrval, this->attrs->Get(i)->attr_val)==0) {
+             delcount++;
+             this->attrs->freeItem(i);
+             }
+       }
+     }
+  if (delcount>0) this->attrs->Pack();
+  return delcount;
+}
+
+int GffObj::removeAttr(int aid, const char* attrval) {
+  if (this->attrs==NULL || aid<0) return 0;
+  int delcount=0;  //could be more than one ?
+  for (int i=0;i<this->attrs->Count();i++) {
+     if (aid==this->attrs->Get(i)->attr_id) {
+       if (attrval==NULL ||
+          strcmp(attrval, this->attrs->Get(i)->attr_val)==0) {
+             delcount++;
+             this->attrs->freeItem(i);
+             }
+       }
+     }
+  if (delcount>0) this->attrs->Pack();
+  return delcount;
+}
+
+
+int GffObj::removeExonAttr(GffExon& exon, const char* attrname, const char* attrval) {
+  if (exon.attrs==NULL || attrname==NULL || attrname[0]==0) return 0;
+  int aid=this->names->attrs.getId(attrname);
+  if (aid<0) return 0;
+  int delcount=0;  //could be more than one
+  for (int i=0;i<exon.attrs->Count();i++) {
+     if (aid==exon.attrs->Get(i)->attr_id) {
+       if (attrval==NULL ||
+          strcmp(attrval, exon.attrs->Get(i)->attr_val)==0) {
+             delcount++;
+             exon.attrs->freeItem(i);
+             }
+       }
+     }
+  if (delcount>0) exon.attrs->Pack();
+  return delcount;
+}
+
+int GffObj::removeExonAttr(GffExon& exon, int aid, const char* attrval) {
+  if (exon.attrs==NULL || aid<0) return 0;
+  int delcount=0;  //could be more than one
+  for (int i=0;i<exon.attrs->Count();i++) {
+     if (aid==exon.attrs->Get(i)->attr_id) {
+       if (attrval==NULL ||
+          strcmp(attrval, exon.attrs->Get(i)->attr_val)==0) {
+             delcount++;
+             exon.attrs->freeItem(i);
+             }
+       }
+     }
+  if (delcount>0) exon.attrs->Pack();
+  return delcount;
+}
+
+
+void GffObj::getCDS_ends(uint& cds_start, uint& cds_end) {
+  cds_start=0;
+  cds_end=0;
+  if (CDstart==0 || CDend==0) return; //no CDS info
+  int cdsadj=0;
+  if (CDphase=='1' || CDphase=='2') {
+      cdsadj=CDphase-'0';
+      }
+  cds_start=CDstart;
+  cds_end=CDend;
+  if (strand=='-') cds_end-=cdsadj;
+              else cds_start+=cdsadj;
+  }
+
+void GffObj::mRNA_CDS_coords(uint& cds_mstart, uint& cds_mend) {
+  //sets cds_start and cds_end to the CDS start,end coordinates on the spliced mRNA transcript
+  cds_mstart=0;
+  cds_mend=0;
+  if (CDstart==0 || CDend==0) return; //no CDS info
+  //restore normal coordinates, just in case
+  unxcoord();
+  int cdsadj=0;
+  if (CDphase=='1' || CDphase=='2') {
+      cdsadj=CDphase-'0';
+      }
+  /*
+   uint seqstart=CDstart;
+   uint seqend=CDend;
+  */
+  uint seqstart=exons.First()->start;
+  uint seqend=exons.Last()->end;
+  int s=0; //resulting nucleotide counter
+  if (strand=='-') {
+    for (int x=exons.Count()-1;x>=0;x--) {
+       uint sgstart=exons[x]->start;
+       uint sgend=exons[x]->end;
+       if (seqend<sgstart || seqstart>sgend) continue;
+       if (seqstart>=sgstart && seqstart<=sgend)
+             sgstart=seqstart; //seqstart within this segment
+       if (seqend>=sgstart && seqend<=sgend)
+             sgend=seqend; //seqend within this segment
+       s+=(int)(sgend-sgstart)+1;
+       if (CDstart>=sgstart && CDstart<=sgend) {
+             //CDstart in this segment
+             //and we are getting the whole transcript
+             cds_mend=s-(int)(CDstart-sgstart);
+             }
+       if (CDend>=sgstart && CDend<=sgend) {
+             //CDstart in this segment
+             //and we are getting the whole transcript
+             cds_mstart=s-(int)(CDend-cdsadj-sgstart);
+             }
+      } //for each exon
+    } // - strand
+   else { // + strand
+    for (int x=0;x<exons.Count();x++) {
+      uint sgstart=exons[x]->start;
+      uint sgend=exons[x]->end;
+      if (seqend<sgstart || seqstart>sgend) continue;
+      if (seqstart>=sgstart && seqstart<=sgend)
+            sgstart=seqstart; //seqstart within this segment
+      if (seqend>=sgstart && seqend<=sgend)
+            sgend=seqend; //seqend within this segment
+      s+=(int)(sgend-sgstart)+1;
+      /* for (uint i=sgstart;i<=sgend;i++) {
+          spliced[s]=gsubseq[i-gstart];
+          s++;
+          }//for each nt
+          */
+      if (CDstart>=sgstart && CDstart<=sgend) {
+            //CDstart in this segment
+            cds_mstart=s-(int)(sgend-CDstart-cdsadj);
+            }
+      if (CDend>=sgstart && CDend<=sgend) {
+            //CDend in this segment
+            cds_mend=s-(int)(sgend-CDend);
+            }
+      } //for each exon
+    } // + strand
+  //spliced[s]=0;
+  //if (rlen!=NULL) *rlen=s;
+  //return spliced;
+}
+
+char* GffObj::getUnspliced(GFaSeqGet* faseq, int* rlen, GList<GSeg>* seglst)
+{
+    if (faseq==NULL) { GMessage("Warning: getUnspliced(NULL,.. ) called!\n");
+        return NULL;
+    }
+    //restore normal coordinates:
+    unxcoord();
+    if (exons.Count()==0) return NULL;
+    int fspan=end-start+1;
+    const char* gsubseq=faseq->subseq(start, fspan);
+    if (gsubseq==NULL) {
+        GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
+    }
+    char* unspliced=NULL;
+
+    int seqstart=exons.First()->start;
+    int seqend=exons.Last()->end;
+
+    int unsplicedlen = 0;
+
+    unsplicedlen += seqend - seqstart + 1;
+
+    GMALLOC(unspliced, unsplicedlen+1); //allocate more here
+    //uint seqstart, seqend;
+
+    int s = 0; //resulting nucleotide counter
+    if (strand=='-')
+    {
+        if (seglst!=NULL)
+            seglst->Add(new GSeg(s+1,s+1+seqend-seqstart));
+        for (int i=seqend;i>=seqstart;i--)
+        {
+            unspliced[s] = ntComplement(gsubseq[i-start]);
+            s++;
+        }//for each nt
+    } // - strand
+    else
+    { // + strand
+        if (seglst!=NULL)
+            seglst->Add(new GSeg(s+1,s+1+seqend-seqstart));
+        for (int i=seqstart;i<=seqend;i++)
+        {
+            unspliced[s]=gsubseq[i-start];
+            s++;
+        }//for each nt
+    } // + strand
+    //assert(s <= unsplicedlen);
+    unspliced[s]=0;
+    if (rlen!=NULL) *rlen=s;
+    return unspliced;
+}
+
+char* GffObj::getSpliced(GFaSeqGet* faseq, bool CDSonly, int* rlen, uint* cds_start, uint* cds_end,
+          GList<GSeg>* seglst) {
+  if (CDSonly && CDstart==0) return NULL;
+  if (faseq==NULL) { GMessage("Warning: getSpliced(NULL,.. ) called!\n");
+              return NULL;
+              }
+  //restore normal coordinates:
+  unxcoord();
+  if (exons.Count()==0) return NULL;
+  int fspan=end-start+1;
+  const char* gsubseq=faseq->subseq(start, fspan);
+  if (gsubseq==NULL) {
+        GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
+        }
+  if (fspan<(int)(end-start+1)) { //special case: stop coordinate was extended past the gseq length, must adjust
+     int endadj=end-start+1-fspan;
+     uint prevend=end;
+     end-=endadj;
+     if (CDend>end) CDend=end;
+     if (exons.Last()->end>end) {
+         exons.Last()->end=end; //this could get us into trouble if exon start is also > end
+         if (exons.Last()->start>exons.Last()->end) {
+            GError("GffObj::getSpliced() error: improper genomic coordinate %d on %s for %s\n",
+                  prevend,getGSeqName(), getID());
+            }
+         covlen-=endadj;
+         }
+     }
+  char* spliced=NULL;
+  GMALLOC(spliced, covlen+1); //allocate more here
+  uint seqstart, seqend;
+  int cdsadj=0;
+  if (CDphase=='1' || CDphase=='2') {
+      cdsadj=CDphase-'0';
+      }
+  if (CDSonly) {
+     seqstart=CDstart;
+     seqend=CDend;
+     if (strand=='-') seqend-=cdsadj;
+           else seqstart+=cdsadj;
+     }
+   else {
+     seqstart=exons.First()->start;
+     seqend=exons.Last()->end;
+     }
+  int s=0; //resulting nucleotide counter
+  if (strand=='-') {
+    for (int x=exons.Count()-1;x>=0;x--) {
+       uint sgstart=exons[x]->start;
+       uint sgend=exons[x]->end;
+       if (seqend<sgstart || seqstart>sgend) continue;
+       if (seqstart>=sgstart && seqstart<=sgend)
+             sgstart=seqstart; //seqstart within this segment
+       if (seqend>=sgstart && seqend<=sgend)
+             sgend=seqend; //seqend within this segment
+       if (seglst!=NULL)
+           seglst->Add(new GSeg(s+1,s+1+sgend-sgstart));
+       for (uint i=sgend;i>=sgstart;i--) {
+            spliced[s] = ntComplement(gsubseq[i-start]);
+            s++;
+            }//for each nt
+
+       if (!CDSonly && cds_start!=NULL && CDstart>0) {
+          if (CDstart>=sgstart && CDstart<=sgend) {
+             //CDstart in this segment
+             //and we are getting the whole transcript
+             *cds_end=s-(CDstart-sgstart);
+             }
+          if (CDend>=sgstart && CDend<=sgend) {
+             //CDstart in this segment
+             //and we are getting the whole transcript
+             *cds_start=s-(CDend-cdsadj-sgstart);
+             }
+         }//update local CDS coordinates
+      } //for each exon
+    } // - strand
+   else { // + strand
+    for (int x=0;x<exons.Count();x++) {
+      uint sgstart=exons[x]->start;
+      uint sgend=exons[x]->end;
+      if (seqend<sgstart || seqstart>sgend) continue;
+      if (seqstart>=sgstart && seqstart<=sgend)
+            sgstart=seqstart; //seqstart within this segment
+      if (seqend>=sgstart && seqend<=sgend)
+            sgend=seqend; //seqend within this segment
+      if (seglst!=NULL)
+          seglst->Add(new GSeg(s+1,s+1+sgend-sgstart));
+      for (uint i=sgstart;i<=sgend;i++) {
+          spliced[s]=gsubseq[i-start];
+          s++;
+          }//for each nt
+      if (!CDSonly && cds_start!=NULL && CDstart>0) {
+         if (CDstart>=sgstart && CDstart<=sgend) {
+            //CDstart in this segment
+            //and we are getting the whole transcript
+            *cds_start=s-(sgend-CDstart-cdsadj);
+            }
+         if (CDend>=sgstart && CDend<=sgend) {
+            //CDstart in this segment
+            //and we are getting the whole transcript
+            *cds_end=s-(sgend-CDend);
+            }
+        }//update local CDS coordinates
+      } //for each exon
+    } // + strand
+  spliced[s]=0;
+  if (rlen!=NULL) *rlen=s;
+  return spliced;
+}
+
+char* GffObj::getSplicedTr(GFaSeqGet* faseq, bool CDSonly, int* rlen) {
+  if (CDSonly && CDstart==0) return NULL;
+  //restore normal coordinates:
+  unxcoord();
+  if (exons.Count()==0) return NULL;
+  int fspan=end-start+1;
+  const char* gsubseq=faseq->subseq(start, fspan);
+  if (gsubseq==NULL) {
+    GError("Error getting subseq for %s (%d..%d)!\n", gffID, start, end);
+    }
+
+  char* translation=NULL;
+  GMALLOC(translation, (int)(covlen/3)+1);
+  uint seqstart, seqend;
+  int cdsadj=0;
+  if (CDphase=='1' || CDphase=='2') {
+      cdsadj=CDphase-'0';
+      }
+  if (CDSonly) {
+     seqstart=CDstart;
+     seqend=CDend;
+     if (strand=='-') seqend-=cdsadj;
+           else seqstart+=cdsadj;
+     }
+   else {
+     seqstart=exons.First()->start;
+     seqend=exons.Last()->end;
+     }
+  Codon codon;
+  int nt=0; //codon nucleotide counter (0..2)
+  int aa=0; //aminoacid count
+  if (strand=='-') {
+    for (int x=exons.Count()-1;x>=0;x--) {
+       uint sgstart=exons[x]->start;
+       uint sgend=exons[x]->end;
+       if (seqend<sgstart || seqstart>sgend) continue;
+       if (seqstart>=sgstart && seqstart<=sgend)
+             sgstart=seqstart; //seqstart within this segment
+       if (seqend>=sgstart && seqend<=sgend) {
+             sgend=seqend; //seqend within this segment
+             }
+       for (uint i=sgend;i>=sgstart;i--) {
+            codon.nuc[nt]=ntComplement(gsubseq[i-start]);
+            nt++;
+            if (nt==3) {
+               nt=0;
+               translation[aa]=codon.translate();
+               aa++;
+               }
+            }//for each nt
+      } //for each exon
+    } // - strand
+   else { // + strand
+    for (int x=0;x<exons.Count();x++) {
+      uint sgstart=exons[x]->start;
+      uint sgend=exons[x]->end;
+      if (seqend<sgstart || seqstart>sgend) continue;
+      if (seqstart>=sgstart && seqstart<=sgend)
+            sgstart=seqstart; //seqstart within this segment
+      if (seqend>=sgstart && seqend<=sgend)
+            sgend=seqend; //seqend within this segment
+      for (uint i=sgstart;i<=sgend;i++) {
+          codon.nuc[nt]=gsubseq[i-start];
+          nt++;
+          if (nt==3) {
+             nt=0;
+             translation[aa]=codon.translate();
+             aa++;
+             }
+          }//for each nt
+        } //for each exon
+    } // + strand
+ translation[aa]=0;
+ if (rlen!=NULL) *rlen=aa;
+ return translation;
+}
+
+void GffObj::printSummary(FILE* fout) {
+ if (fout==NULL) fout=stdout;
+ fprintf(fout, "%s\t%c\t%d\t%d\t%4.2f\t%4.1f\n", gffID,
+          strand, start, end, gscore, (float)qcov/10.0);
+}
+
+void decodeHexChars(char* dbuf, const char* s, int maxlen=1023) {
+	int dlen=0;
+	dbuf[0]=0;
+	if (s==NULL) return;
+	for (const char* p=s;(*p)!=0 && dlen<maxlen;++p) {
+		if (p[0]=='%' && isxdigit(p[1]) && isxdigit(p[2])) {
+			int a=p[1];
+			if (a>'Z') a^=0x20; //toupper()
+			if (a>'9') a=10+(a-'A');
+			      else a-='0';
+			int b=p[2];
+			if (b>'Z') b^=0x20;
+			if (b>'9') b=10+(b-'A');
+			      else b-='0';
+			char c=(char)((a<<4)+b);
+			if (c==';') c='.';
+			if (c>' ') {
+				dbuf[dlen]=c;
+				++p;++p;
+				++dlen;
+				continue;
+			}
+		}
+		dbuf[dlen]=*p;
+		++dlen;
+	}
+	dbuf[dlen]=0;
+}
+
+void GffObj::printGxfLine(FILE* fout, const char* tlabel, const char* gseqname, bool iscds,
+                             uint segstart, uint segend, int exidx, char phase, bool gff3, bool cvtChars) {
+  char dbuf[1024];
+  strcpy(dbuf,".");
+  GffAttrs* xattrs=NULL;
+  if (exidx>=0) {
+     if (exons[exidx]->score) sprintf(dbuf,"%.2f", exons[exidx]->score);
+     xattrs=exons[exidx]->attrs;
+  }
+  if (phase==0 || !iscds) phase='.';
+  const char* ftype=iscds ? "CDS" : getSubfName();
+  const char* attrname=NULL;
+  const char* attrval=NULL;
+  if (gff3) {
+    fprintf(fout,
+      "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\tParent=%s",
+      gseqname, tlabel, ftype, segstart, segend, dbuf, strand,
+      phase, gffID);
+    if (xattrs!=NULL) {
+      for (int i=0;i<xattrs->Count();i++) {
+        attrname=names->attrs.getName(xattrs->Get(i)->attr_id);
+        if (cvtChars) {
+          decodeHexChars(dbuf, xattrs->Get(i)->attr_val);
+          fprintf(fout,";%s=%s", attrname, dbuf);
+        } else {
+          fprintf(fout,";%s=%s", attrname, xattrs->Get(i)->attr_val);
+        }
+      }
+    }
+    fprintf(fout, "\n");
+    } //GFF3
+  else {//for GTF -- we print only transcripts
+    //if (isValidTranscript())
+    fprintf(fout, "%s\t%s\t%s\t%d\t%d\t%s\t%c\t%c\ttranscript_id \"%s\";",
+           gseqname, tlabel, ftype, segstart, segend, dbuf, strand, phase, gffID);
+    //char* geneid=(geneID!=NULL)? geneID : gffID;
+    if (geneID)
+      fprintf(fout," gene_id \"%s\";",geneID);
+    if (gene_name!=NULL) {
+       //fprintf(fout, " gene_name ");
+       //if (gene_name[0]=='"') fprintf (fout, "%s;",gene_name);
+       //              else fprintf(fout, "\"%s\";",gene_name);
+       fprintf(fout," gene_name \"%s\";",gene_name);
+       }
+    if (xattrs!=NULL) {
+          for (int i=0;i<xattrs->Count();i++) {
+            if (xattrs->Get(i)->attr_val==NULL) continue;
+            attrname=names->attrs.getName(xattrs->Get(i)->attr_id);
+            fprintf(fout, " %s ",attrname);
+            if (cvtChars) {
+              decodeHexChars(dbuf, xattrs->Get(i)->attr_val);
+              attrval=dbuf;
+            } else {
+              attrval=xattrs->Get(i)->attr_val;
+            }
+
+            if (attrval[0]=='"') fprintf(fout, "%s;",attrval);
+                           else fprintf(fout, "\"%s\";",attrval);
+             }
+          }
+    //for GTF, also append the GffObj attributes to each exon line
+    if ((xattrs=this->attrs)!=NULL) {
+          for (int i=0;i<xattrs->Count();i++) {
+            if (xattrs->Get(i)->attr_val==NULL) continue;
+            attrname=names->attrs.getName(xattrs->Get(i)->attr_id);
+            fprintf(fout, " %s ",attrname);
+            if (cvtChars) {
+              decodeHexChars(dbuf, xattrs->Get(i)->attr_val);
+              attrval=dbuf;
+            } else {
+              attrval=xattrs->Get(i)->attr_val;
+            }
+            if (attrval[0]=='"') fprintf(fout, "%s;",attrval);
+                           else fprintf(fout, "\"%s\";",attrval);
+             }
+           }
+    fprintf(fout, "\n");
+    }//GTF
+}
+
+void GffObj::printGxf(FILE* fout, GffPrintMode gffp,
+                   const char* tlabel, const char* gfparent, bool cvtChars) {
+ //char tmpstr[255];
+ char dbuf[1024];
+ if (tlabel==NULL) {
+    tlabel=track_id>=0 ? names->tracks.Get(track_id)->name :
+         (char*)"gffobj" ;
+    }
+ unxcoord();
+ //if (exons.Count()==0) return;
+ const char* gseqname=names->gseqs.Get(gseq_id)->name;
+ bool gff3 = (gffp>=pgffAny);
+ bool showCDS = (gffp==pgtfAny || gffp==pgtfCDS || gffp==pgffCDS || gffp==pgffAny || gffp==pgffBoth);
+ bool showExon = (gffp<=pgtfExon || gffp==pgffAny || gffp==pgffExon || gffp==pgffBoth);
+ if (gff3) {
+   //print GFF3 mRNA line:
+   if (gscore>0.0) sprintf(dbuf,"%.2f", gscore);
+          else strcpy(dbuf,".");
+   uint pstart, pend;
+   if (gffp==pgffCDS) {
+      pstart=CDstart;
+      pend=CDend;
+      }
+   else { pstart=start;pend=end; }
+   //const char* ftype=isTranscript() ? "mRNA" : getFeatureName();
+   const char* ftype=getFeatureName();
+   fprintf(fout,
+     "%s\t%s\t%s\t%d\t%d\t%s\t%c\t.\tID=%s",
+     gseqname, tlabel, ftype, pstart, pend, dbuf, strand, gffID);
+   if (CDstart>0 && !showCDS/* && !isCDS*/) fprintf(fout,";CDS=%d-%d",CDstart,CDend);
+   if (gfparent!=NULL) {
+      //parent override
+      fprintf(fout, ";Parent=%s",gfparent);
+      }
+     else {
+       if (parent!=NULL && !parent->isDiscarded())
+           fprintf(fout, ";Parent=%s",parent->getID());
+       }
+   if (geneID!=NULL)
+      fprintf(fout, ";geneID=%s",geneID);
+   if (gene_name!=NULL)
+      fprintf(fout, ";gene_name=%s",gene_name);
+   if (attrs!=NULL) {
+      for (int i=0;i<attrs->Count();i++) {
+        const char* attrname=names->attrs.getName(attrs->Get(i)->attr_id);
+        if (cvtChars) {
+          decodeHexChars(dbuf, attrs->Get(i)->attr_val);
+          fprintf(fout,";%s=%s", attrname, dbuf);
+        } else {
+          fprintf(fout,";%s=%s", attrname, attrs->Get(i)->attr_val);
+        }
+      }
+    }
+   fprintf(fout,"\n");
+   }// gff3 mRNA line
+ bool is_cds_only = (gffp==pgffBoth) ? false : isCDS;
+ if (showExon) {
+   //print exons
+    if (isCDS && exons.Count()>0 &&
+        ((strand=='-' && exons.Last()->phase<'0') || (strand=='+' && exons.Last()->phase<'0')))
+         updateExonPhase();
+    for (int i=0;i<exons.Count();i++) {
+      printGxfLine(fout, tlabel, gseqname, is_cds_only, exons[i]->start, exons[i]->end, i, exons[i]->phase, gff3, cvtChars);
+      }
+    }//printing exons
+ if (showCDS && !is_cds_only && CDstart>0) {
+	  if (isCDS) {
+	    for (int i=0;i<exons.Count();i++) {
+	      printGxfLine(fout, tlabel, gseqname, true, exons[i]->start, exons[i]->end, i, exons[i]->phase, gff3, cvtChars);
+	      }
+	  }
+	  else {
+			GArray<GffCDSeg> cds(true,true);
+			getCDSegs(cds);
+			for (int i=0;i<cds.Count();i++) {
+				printGxfLine(fout, tlabel, gseqname, true, cds[i].start, cds[i].end, -1, cds[i].phase, gff3, cvtChars);
+				}
+	  }
+  } //showCDS
+}
+
+void GffObj::updateExonPhase() {
+  if (!isCDS) return;
+  int cdsacc=0;
+  if (CDphase=='1' || CDphase=='2') {
+      cdsacc+= 3-(CDphase-'0');
+      }
+  if (strand=='-') { //reverse strand
+     for (int i=exons.Count()-1;i>=0;i--) {
+         exons[i]->phase='0'+ (3-cdsacc%3)%3;
+         cdsacc+=exons[i]->end-exons[i]->start+1;
+         }
+     }
+    else { //forward strand
+     for (int i=0;i<exons.Count();i++) {
+         exons[i]->phase='0'+ (3-cdsacc%3)%3;
+         cdsacc+=exons[i]->end-exons[i]->start+1;
+         }
+     }
+}
+
+
+void GffObj::getCDSegs(GArray<GffCDSeg>& cds) {
+  GffCDSeg cdseg;
+  int cdsacc=0;
+  if (CDphase=='1' || CDphase=='2') {
+      cdsacc+= 3-(CDphase-'0');
+      }
+  if (strand=='-') {
+     for (int x=exons.Count()-1;x>=0;x--) {
+        uint sgstart=exons[x]->start;
+        uint sgend=exons[x]->end;
+        if (CDend<sgstart || CDstart>sgend) continue;
+        if (CDstart>=sgstart && CDstart<=sgend)
+              sgstart=CDstart; //cdstart within this segment
+        if (CDend>=sgstart && CDend<=sgend)
+              sgend=CDend; //cdend within this segment
+        cdseg.start=sgstart;
+        cdseg.end=sgend;
+        cdseg.exonidx=x;
+        //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0);
+        cdseg.phase='0'+ (3-cdsacc%3)%3;
+        cdsacc+=sgend-sgstart+1;
+        cds.Add(cdseg);
+       } //for each exon
+     } // - strand
+    else { // + strand
+     for (int x=0;x<exons.Count();x++) {
+       uint sgstart=exons[x]->start;
+       uint sgend=exons[x]->end;
+       if (CDend<sgstart || CDstart>sgend) continue;
+       if (CDstart>=sgstart && CDstart<=sgend)
+             sgstart=CDstart; //seqstart within this segment
+       if (CDend>=sgstart && CDend<=sgend)
+             sgend=CDend; //seqend within this segment
+       cdseg.start=sgstart;
+       cdseg.end=sgend;
+       cdseg.exonidx=x;
+       //cdseg.phase='0'+(cdsacc>0 ? (3-cdsacc%3)%3 : 0);
+       cdseg.phase='0' + (3-cdsacc%3)%3 ;
+       cdsacc+=sgend-sgstart+1;
+       cds.Add(cdseg);
+       } //for each exon
+   } // + strand
+}
diff --git a/src/gff_utils.cpp b/src/gff_utils.cpp
new file mode 100644
index 0000000..419b153
--- /dev/null
+++ b/src/gff_utils.cpp
@@ -0,0 +1,664 @@
+#include "gff_utils.h"
+
+extern bool verbose;
+extern bool debugMode;
+
+//bool debugState=false;
+
+void printFasta(FILE* f, GStr& defline, char* seq, int seqlen) {
+ if (seq==NULL) return;
+ int len=(seqlen>0)?seqlen:strlen(seq);
+ if (len<=0) return;
+ if (!defline.is_empty())
+     fprintf(f, ">%s\n",defline.chars());
+ int ilen=0;
+ for (int i=0; i < len; i++, ilen++) {
+   if (ilen == 70) {
+     fputc('\n', f);
+     ilen = 0;
+     }
+   putc(seq[i], f);
+   } //for
+ fputc('\n', f);
+}
+
+int qsearch_gloci(uint x, GList<GffLocus>& loci) {
+  //binary search
+  //do the simplest tests first:
+  if (loci[0]->start>x) return 0;
+  if (loci.Last()->start<x) return -1;
+  uint istart=0;
+  int i=0;
+  int idx=-1;
+  int maxh=loci.Count()-1;
+  int l=0;
+  int h = maxh;
+  while (l <= h) {
+     i = (l+h)>>1;
+     istart=loci[i]->start;
+     if (istart < x)  l = i + 1;
+          else {
+             if (istart == x) { //found matching coordinate here
+                  idx=i;
+                  while (idx<=maxh && loci[idx]->start==x) {
+                     idx++;
+                     }
+                  return (idx>maxh) ? -1 : idx;
+                  }
+             h = i - 1;
+             }
+     } //while
+ idx = l;
+ while (idx<=maxh && loci[idx]->start<=x) {
+    idx++;
+    }
+ return (idx>maxh) ? -1 : idx;
+}
+
+int qsearch_rnas(uint x, GList<GffObj>& rnas) {
+  //binary search
+  //do the simplest tests first:
+  if (rnas[0]->start>x) return 0;
+  if (rnas.Last()->start<x) return -1;
+  uint istart=0;
+  int i=0;
+  int idx=-1;
+  int maxh=rnas.Count()-1;
+  int l=0;
+  int h = maxh;
+  while (l <= h) {
+     i = (l+h)>>1;
+     istart=rnas[i]->start;
+     if (istart < x)  l = i + 1;
+          else {
+             if (istart == x) { //found matching coordinate here
+                  idx=i;
+                  while (idx<=maxh && rnas[idx]->start==x) {
+                     idx++;
+                     }
+                  return (idx>maxh) ? -1 : idx;
+                  }
+             h = i - 1;
+             }
+     } //while
+ idx = l;
+ while (idx<=maxh && rnas[idx]->start<=x) {
+    idx++;
+    }
+ return (idx>maxh) ? -1 : idx;
+}
+
+int cmpRedundant(GffObj& a, GffObj& b) {
+  if (a.exons.Count()==b.exons.Count()) {
+     if (a.covlen==b.covlen) {
+       return strcmp(a.getID(), b.getID());
+       }
+     else return (a.covlen>b.covlen)? 1 : -1;
+     }
+   else return (a.exons.Count()>b.exons.Count())? 1: -1;
+}
+
+
+bool tMatch(GffObj& a, GffObj& b) {
+  //strict intron chain match, or single-exon perfect match
+  int imax=a.exons.Count()-1;
+  int jmax=b.exons.Count()-1;
+  int ovlen=0;
+  if (imax!=jmax) return false; //different number of introns
+
+  if (imax==0) { //single-exon mRNAs
+    //if (equnspl) {
+      //fuzz match for single-exon transfrags: 
+      // it's a match if they overlap at least 80% of max len
+      ovlen=a.exons[0]->overlapLen(b.exons[0]);
+      int maxlen=GMAX(a.covlen,b.covlen);
+      return (ovlen>=maxlen*0.8);
+    /*}
+    else {
+      //only exact match
+      ovlen=a.covlen;
+      return (a.exons[0]->start==b.exons[0]->start &&
+          a.exons[0]->end==b.exons[0]->end);
+      
+       }*/
+     }
+  //check intron overlaps
+  ovlen=a.exons[0]->end-(GMAX(a.start,b.start))+1;
+  ovlen+=(GMIN(a.end,b.end))-a.exons.Last()->start;
+  for (int i=1;i<=imax;i++) {
+    if (i<imax) ovlen+=a.exons[i]->len();
+    if ((a.exons[i-1]->end!=b.exons[i-1]->end) ||
+      (a.exons[i]->start!=b.exons[i]->start)) {
+            return false; //intron mismatch
+    }
+  }
+  return true;
+}
+
+
+bool unsplContained(GffObj& ti, GffObj&  tj, bool fuzzSpan) {
+ //returns true only if ti (which MUST be single-exon) is "almost" contained in any of tj's exons
+ //but it does not cross any intron-exon boundary of tj
+  int imax=ti.exons.Count()-1;
+  int jmax=tj.exons.Count()-1;
+  if (imax>0) GError("Error: bad unsplContained() call, 1st param must be single-exon transcript!\n");
+  int minovl = (int)(0.8 * ti.len()); //minimum overlap for fuzzSpan
+  if (fuzzSpan) {
+    for (int j=0;j<=jmax;j++) {
+       //must NOT overlap the introns
+       if ((j>0 && ti.start<tj.exons[j]->start) 
+          || (j<jmax && ti.end>tj.exons[j]->end))
+         return false;
+       if (ti.exons[0]->overlapLen(tj.exons[j])>=minovl)
+              return true;
+       }
+      } else {
+    for (int j=0;j<=jmax;j++) {
+       //must NOT overlap the introns
+       if ((j>0 && ti.start<tj.exons[j]->start) 
+          || (j<jmax && ti.end>tj.exons[j]->end))
+         return false;
+         //strict containment
+       if (ti.end<=tj.exons[j]->end && ti.start>=tj.exons[j]->start) 
+            return true;
+       }
+      }
+ return false;
+}
+
+GffObj* redundantTranscripts(GffObj& ti, GffObj&  tj, bool matchAllIntrons, bool fuzzSpan) {
+  // matchAllIntrons==true:  transcripts are considered "redundant" only if
+  //                   they have the exact same number of introns and same splice sites (or none)
+  //                 (single-exon transcripts can be also fully contained to be considered matching)
+  // matchAllIntrons==false: an intron chain could be a subset of a "container" chain, 
+  //                   as long as no intron-exon boundaries are violated; also, a single-exon 
+  //                   transcript will be collapsed if it's contained in one of the exons of the other
+  // fuzzSpan==false: the genomic span of one transcript must be contained in or equal with the genomic 
+  //                  span of the other 
+  // 
+  // fuzzSpan==true: then genomic spans of transcripts are no longer required to be fully contained 
+  //                 (i.e. they may extend each-other in opposite directions)
+  
+  //if redundancy is detected, the "bigger" transcript is returned (otherwise NULL is returned)
+ if (ti.start>=tj.end || tj.start>=ti.end || tj.strand!=ti.strand) return NULL; //no span overlap at all
+ int imax=ti.exons.Count()-1;
+ int jmax=tj.exons.Count()-1;
+ GffObj* bigger=NULL;
+ GffObj* smaller=NULL;
+ if (matchAllIntrons) {
+   if (imax!=jmax) return NULL;
+   if (ti.covlen>tj.covlen) {
+       bigger=&ti;
+       if (!fuzzSpan && (ti.start>tj.start || ti.end<tj.end)) return NULL;
+       }
+     else { //ti.covlen<=tj.covlen
+       bigger=&tj;
+       if (!fuzzSpan && (tj.start>ti.start || tj.end<ti.end)) return NULL;
+       }
+   //check that all introns really match
+   for (int i=0;i<imax;i++) {
+     if (ti.exons[i]->end!=tj.exons[i]->end || 
+         ti.exons[i+1]->start!=tj.exons[i+1]->start) return NULL;
+     }
+   return bigger;
+   }
+ //--- matchAllIntrons==false: intron-chain containment is also considered redundancy
+ //int maxlen=0;
+ int minlen=0;
+ if (ti.covlen>tj.covlen) {
+      if (tj.exons.Count()>ti.exons.Count()) {
+          //exon count override
+          bigger=&tj;
+          smaller=&ti;
+          }
+        else {
+          bigger=&ti;
+          smaller=&tj;
+          }
+      //maxlen=ti.covlen;
+      minlen=tj.covlen;
+      }
+   else { //tj has more bases
+      if (ti.exons.Count()>tj.exons.Count()) {
+          //exon count override
+          bigger=&ti;
+          smaller=&tj;
+          }
+        else {
+          bigger=&tj;
+          smaller=&ti;
+          }
+      //maxlen=tj.covlen;
+      minlen=ti.covlen;
+      }
+ if (imax==0 && jmax==0) {
+     //single-exon transcripts: if fuzzSpan, at least 80% of the shortest one must be overlapped by the other
+     if (fuzzSpan) {
+         return (ti.exons[0]->overlapLen(tj.exons[0])>=minlen*0.8) ? bigger : NULL;
+         }
+       else {
+         return (smaller->start>=bigger->start && smaller->end<=bigger->end) ? bigger : NULL;
+         }
+     }
+ //containment is also considered redundancy
+ if (smaller->exons.Count()==1) {
+   //check if this single exon is contained in any of tj exons
+   //without violating any intron-exon boundaries
+   return (unsplContained(*smaller, *bigger, fuzzSpan) ? bigger : NULL);
+   }
+
+ //--from here on: both are multi-exon transcripts, imax>0 && jmax>0
+  if (ti.exons[imax]->start<tj.exons[0]->end ||
+     tj.exons[jmax]->start<ti.exons[0]->end )
+         return NULL; //intron chains do not overlap at all
+ 
+ 
+ //checking full intron chain containment
+ uint eistart=0, eiend=0, ejstart=0, ejend=0; //exon boundaries
+ int i=1; //exon idx to the right of the current intron of ti
+ int j=1; //exon idx to the right of the current intron of tj
+ //find the first intron overlap:
+ while (i<=imax && j<=jmax) {
+    eistart=ti.exons[i-1]->end;
+    eiend=ti.exons[i]->start;
+    ejstart=tj.exons[j-1]->end;
+    ejend=tj.exons[j]->start;
+    if (ejend<eistart) { j++; continue; }
+    if (eiend<ejstart) { i++; continue; }
+    //we found an intron overlap
+    break;
+    }
+ if (!fuzzSpan && (bigger->start>smaller->start || bigger->end < smaller->end)) return NULL;
+ if ((i>1 && j>1) || i>imax || j>jmax) {
+     return NULL; //either no intron overlaps found at all
+                  //or it's not the first intron for at least one of the transcripts
+     }
+ if (eistart!=ejstart || eiend!=ejend) return NULL; //not an exact intron match
+ if (j>i) {
+   //i==1, ti's start must not conflict with the previous intron of tj
+   if (ti.start<tj.exons[j-1]->start) return NULL;
+   //so i's first intron starts AFTER j's first intron
+   // then j must contain i, so i's last intron must end with or before j's last intron
+   if (ti.exons[imax]->start>tj.exons[jmax]->start) return NULL;
+      //comment out the line above if you just want "intron compatibility" (i.e. extension of intron chains )
+   }
+  else if (i>j) {
+     //j==1, tj's start must not conflict with the previous intron of ti
+     if (tj.start<ti.exons[i-1]->start) return NULL;
+     //so j's intron chain starts AFTER i's
+     // then i must contain j, so j's last intron must end with or before j's last intron
+     if (tj.exons[jmax]->start>ti.exons[imax]->start) return NULL;
+        //comment out the line above for just "intronCompatible()" check (allowing extension of intron chain)
+     }
+ //now check if the rest of the introns overlap, in the same sequence
+ i++;
+ j++;
+ while (i<=imax && j<=jmax) {
+  if (ti.exons[i-1]->end!=tj.exons[j-1]->end ||
+      ti.exons[i]->start!=tj.exons[j]->start) return NULL;
+  i++;
+  j++;
+  }
+ i--;
+ j--;
+ if (i==imax && j<jmax) {
+   // tj has more introns to the right, check if ti's end doesn't conflict with the current tj exon boundary
+   if (ti.end>tj.exons[j]->end) return NULL;
+   }
+ else if (j==jmax && i<imax) {
+   if (tj.end>ti.exons[i]->end) return NULL;
+   }
+ return bigger;
+}
+
+
+int gseqCmpName(const pointer p1, const pointer p2) {
+ return strcmp(((GenomicSeqData*)p1)->gseq_name, ((GenomicSeqData*)p2)->gseq_name);
+}
+
+
+void printLocus(GffLocus* loc, const char* pre) {
+  if (pre!=NULL) fprintf(stderr, "%s", pre);
+  GMessage(" [%d-%d] : ", loc->start, loc->end);
+  GMessage("%s",loc->rnas[0]->getID());
+  for (int i=1;i<loc->rnas.Count();i++) {
+    GMessage(",%s",loc->rnas[i]->getID());
+    }
+  GMessage("\n");
+}
+
+void preserveContainedCDS(GffObj* t, GffObj* tfrom) {
+ //transfer CDS info to the container t if it's a larger protein
+ if (tfrom->CDstart==0) return;
+ if (t->CDstart) {
+   if (tfrom->CDstart<t->CDstart && tfrom->CDstart>=t->start)
+      t->CDstart=tfrom->CDstart;
+   if (tfrom->CDend>t->CDend && tfrom->CDend<=t->end)
+      t->CDend=tfrom->CDend;
+   }
+  else { //no CDS info on container, just copy it from the contained
+   t->addCDS(tfrom->CDstart, tfrom->CDend, tfrom->CDphase);
+   }
+}
+
+bool exonOverlap2Gene(GffObj* t, GffObj& g) {
+	if (t->exons.Count()>0) {
+		return t->exonOverlap(g.start, g.end);
+	}
+	else return g.overlap(*t);
+}
+void GffLoader::placeGf(GffObj* t, GenomicSeqData* gdata, bool doCluster, bool collapseRedundant,
+                                               bool matchAllIntrons, bool fuzzSpan) {
+  GTData* tdata=new GTData(t); //additional transcript data
+  gdata->tdata.Add(tdata);
+  //int tidx=-1;
+  /*
+  if (debug) {
+     GMessage(">>Placing transcript %s\n", t->getID());
+     debugState=true;
+     }
+    else debugState=false; 
+   */
+  //dumb TRNA case for RefSeq: gene parent link missing
+  //try to restore it here; BUT this only works if gene feature comes first
+  if (t->parent==NULL && t->isTranscript()) {
+  	int gidx=gdata->gfs.Count()-1;
+  	while (gidx>=0 && gdata->gfs[gidx]->end>=t->start) {
+  		GffObj& g = *(gdata->gfs[gidx]);
+  		if (g.isGene() && t->strand==g.strand && exonOverlap2Gene(t, g)) {
+  			g.children.Add(t);
+  			t->parent=&g;
+  			//disable printing of gene if transcriptsOnly
+  			if (transcriptsOnly) {
+  				g.udata|=4; //tag it as non-printable
+  			}
+  			const char* geneName=g.getAttr("Name");
+  			if (t->getAttr("Name")==NULL && geneName) {
+  				t->addAttr("Name", geneName);
+  				t->addAttr("gene_name", geneName);
+  			}
+  			t->addAttr("geneID", g.getID());
+  			break;
+  		}
+  		gidx--;
+  	}
+  }
+
+  /*
+	if (t->exons.Count()==0  && t->children.Count()==0 && forceExons) {
+		//a non-mRNA feature with no subfeatures
+		//just so we get some sequence functions working, add a dummy "exon"-like subfeature here
+		//--this could be a single "pseudogene" entry or another genomic region without exons
+		//
+		t->addExon(t->start,t->end);
+	}
+  */
+  if (t->exons.Count()>0) {
+              //tidx=
+              gdata->rnas.Add(t); //added it in sorted order
+              }
+            else {
+              if (t->isGene() || !this->transcriptsOnly)
+              	  gdata->gfs.Add(t);
+              return; //nothing to do with these non-transcript objects
+              }
+  if (!doCluster) return;
+  if (gdata->loci.Count()==0) {
+       gdata->loci.Add(new GffLocus(t));
+       //GMessage("  <<make it first locus %d-%d \n",t->start, t->end);
+       return;
+       }
+   /*    
+  //DEBUG: show available loci:
+   if (debug) {
+    GMessage("  [%d loci already:\n", gdata->loci.Count());
+    for (int l=0;l<gdata->loci.Count();l++) {
+       printLocus(gdata->loci[l]);
+       }
+    }
+  */
+  int nidx=qsearch_gloci(t->end, gdata->loci); //get index of nearest locus starting just ABOVE t->end
+  //GMessage("\tlooking up end coord %d in gdata->loci.. (qsearch got nidx=%d)\n", t->end, nidx);
+  if (nidx==0) {
+     //cannot have any overlapping loci
+     //if (debug) GMessage("  <<no ovls possible, create locus %d-%d \n",t->start, t->end);
+     gdata->loci.Add(new GffLocus(t));
+     return;
+     }
+  if (nidx==-1) nidx=gdata->loci.Count();//all loci start below t->end
+  int lfound=0; //count of parent loci
+  GArray<int> mrgloci(false);
+  GList<GffLocus> tloci(true); //candidate parent loci to adopt this
+  //if (debug) GMessage("\tchecking all loci from %d to 0\n",nidx-1);
+  for (int l=nidx-1;l>=0;l--) {
+      GffLocus& loc=*(gdata->loci[l]);
+      if (loc.strand!='.' && t->strand!='.'&& loc.strand!=t->strand) continue;
+      if (t->start>loc.end) {
+           if (t->start-loc.start>GFF_MAX_LOCUS) break; //give up already
+           continue;
+           }
+      if (loc.start>t->end) {
+               //this should never be the case if nidx was found correctly
+               GMessage("Warning: qsearch_gloci found loc.start>t.end!(t=%s)\n", t->getID());
+               continue;
+               }
+      /*
+      if (debug) {
+          GMessage(" !range overlap found with locus ");
+          printLocus(&loc);
+          }
+      */
+      if (loc.add_RNA(t)) {
+         //will add this transcript to loc
+         lfound++;
+         mrgloci.Add(l);
+         if (collapseRedundant) {
+           //compare to every single transcript in this locus
+           for (int ti=0;ti<loc.rnas.Count();ti++) {
+                 if (loc.rnas[ti]==t) continue;
+                 GTData* odata=(GTData*)(loc.rnas[ti]->uptr);
+                 //GMessage("  ..redundant check vs overlapping transcript %s\n",loc.rnas[ti]->getID());
+                 GffObj* container=NULL;
+                 if (odata->replaced_by==NULL && 
+                      (container=redundantTranscripts(*t, *(loc.rnas[ti]), matchAllIntrons, fuzzSpan))!=NULL) {
+                     if (container==t) {
+                        odata->replaced_by=t;
+                        preserveContainedCDS(t, loc.rnas[ti]);
+                        }
+                     else {
+                        tdata->replaced_by=loc.rnas[ti];
+                        preserveContainedCDS(loc.rnas[ti], t);
+                        }
+                     }
+              }//for each transcript in the exon-overlapping locus
+          } //if doCollapseRedundant
+         } //overlapping locus
+      } //for each existing locus
+  if (lfound==0) {
+      //overlapping loci not found, create a locus with only this mRNA
+      /* if (debug) {
+        GMessage("  overlapping locus not found, create locus %d-%d \n",t->start, t->end);
+        }
+      */
+      int addidx=gdata->loci.Add(new GffLocus(t));
+      if (addidx<0) {
+         //should never be the case!
+         GMessage("  WARNING: new GffLocus(%s:%d-%d) not added!\n",t->getID(), t->start, t->end);
+         }
+      }
+   else { //found at least one overlapping locus
+     lfound--;
+     int locidx=mrgloci[lfound];
+     GffLocus& loc=*(gdata->loci[locidx]);
+     //last locus index found is also the smallest index
+     if (lfound>0) {
+       //more than one loci found parenting this mRNA, merge loci
+       /* if (debug)
+          GMessage(" merging %d loci \n",lfound);
+       */
+       for (int l=0;l<lfound;l++) {
+          int mlidx=mrgloci[l]; 
+          loc.addMerge(*(gdata->loci[mlidx]), t);
+          gdata->loci.Delete(mlidx); //highest indices first, so it's safe to remove
+          }
+       }
+     int i=locidx;  
+     while (i>0 && loc<*(gdata->loci[i-1])) {
+       //bubble down until it's in the proper order
+       i--;
+       gdata->loci.Swap(i,i+1);
+       }
+     }//found at least one overlapping locus
+}
+
+void collectLocusData(GList<GenomicSeqData>& ref_data) {
+  int locus_num=0;
+  for (int g=0;g<ref_data.Count();g++) {
+    GenomicSeqData* gdata=ref_data[g];
+    for (int l=0;l<gdata->loci.Count();l++) {
+      GffLocus& loc=*(gdata->loci[l]);
+      GHash<int> gnames(true); //gene names in this locus
+      GHash<int> geneids(true); //Entrez GeneID: numbers
+      for (int i=0;i<loc.rnas.Count();i++) {
+        GffObj& t=*(loc.rnas[i]);
+        GStr gname(t.getGeneName());
+        if (!gname.is_empty()) {
+           gname.upper();
+           int* prevg=gnames.Find(gname.chars());
+           if (prevg!=NULL) (*prevg)++;
+                  else gnames.Add(gname, new int(1));
+           }
+        //parse GeneID xrefs, if any:
+        GStr xrefs(t.getAttr("xrefs"));
+        if (!xrefs.is_empty()) {
+          xrefs.startTokenize(",");
+          GStr token;
+          while (xrefs.nextToken(token)) {
+            token.upper();
+            if (token.startsWith("GENEID:")) {
+              token.cut(0,token.index(':')+1);
+              int* prevg=geneids.Find(token.chars());
+              if (prevg!=NULL) (*prevg)++;
+                     else geneids.Add(token, new int(1));
+              }
+            } //for each xref
+          } //xrefs parsing
+        }//for each transcript
+      locus_num++;
+      loc.locus_num=locus_num;
+      if (gnames.Count()>0) { //collect all gene names associated to this locus
+         gnames.startIterate();
+         int* gfreq=NULL;
+         char* key=NULL;
+         while ((gfreq=gnames.NextData(key))!=NULL) {
+            loc.gene_names.AddIfNew(new CGeneSym(key,*gfreq));
+            }
+         } //added collected gene_names
+      if (loc.gene_ids.Count()>0) { //collect all GeneIDs names associated to this locus
+         geneids.startIterate();
+         int* gfreq=NULL;
+         char* key=NULL;
+         while ((gfreq=geneids.NextData(key))!=NULL) {
+           loc.gene_ids.AddIfNew(new CGeneSym(key,*gfreq));
+            }
+          }
+      } //for each locus
+  }//for each genomic sequence
+}
+
+
+void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate, 
+                          bool doCluster, bool doCollapseRedundant, 
+                          bool matchAllIntrons, bool fuzzSpan, bool forceExons) {
+   GffReader* gffr=new GffReader(f, this->transcriptsOnly, false); //not only mRNA features, not sorted
+   gffr->showWarnings(this->showWarnings);
+   //           keepAttrs   mergeCloseExons  noExonAttr
+   gffr->readAll(this->fullAttributes,    this->mergeCloseExons,  this->noExonAttrs);
+   GVec<int> pseudoAttrIds;
+   GVec<int> pseudoFeatureIds;
+   if (this->noPseudo) {
+   	 GffNameList& fnames = gffr->names->feats;
+   	 for (int i=0;i<fnames.Count();i++) {
+   		char* n=fnames[i]->name;
+   		if (startsWith(n, "pseudo")) {
+   			pseudoFeatureIds.Add(fnames[i]->idx);
+   		}
+   	 }
+  	 GffNameList& attrnames = gffr->names->attrs;
+  	 for (int i=0;i<attrnames.Count();i++) {
+  		char* n=attrnames[i]->name;
+  		char* p=strifind(n, "pseudo");
+  		if (p==n || (p==n+2 && tolower(n[0])=='i' && tolower(n[1])=='s')) {
+  			pseudoAttrIds.Add(attrnames[i]->idx);
+  		}
+  	}
+   }
+
+  //int redundant=0; //redundant annotation discarded
+  if (verbose) GMessage("   .. loaded %d genomic features from %s\n", gffr->gflst.Count(), fname.chars());
+  //int rna_deleted=0;
+  //add to GenomicSeqData, adding to existing loci and identifying intron-chain duplicates
+  for (int k=0;k<gffr->gflst.Count();k++) {
+     GffObj* m=gffr->gflst[k];
+     if (strcmp(m->getFeatureName(), "locus")==0 && 
+          m->getAttr("transcripts")!=NULL) {
+        continue; //discard locus meta-features
+        }
+     if (this->noPseudo) {
+    	 bool is_pseudo=false;
+    	 for (int i=0;i<pseudoFeatureIds.Count();++i) {
+    		 if (pseudoFeatureIds[i]==m->ftype_id) {
+    			 is_pseudo=true;
+    			 break;
+    		 }
+    	 }
+    	 if (is_pseudo) continue;
+    	 for (int i=0;i<pseudoAttrIds.Count();++i) {
+    		 char* attrv=NULL;
+    		 if (m->attrs!=NULL) attrv=m->attrs->getAttr(pseudoAttrIds[i]);
+    		 if (attrv!=NULL) {
+    			 char fc=tolower(attrv[0]);
+    			 if (fc=='t' || fc=='y' || fc=='1') {
+    				 is_pseudo=true;
+    				 break;
+    			 }
+    		 }
+    	 }
+    	 if (is_pseudo) continue;
+     }
+     char* rloc=m->getAttr("locus");
+     if (rloc!=NULL && startsWith(rloc, "RLOC_")) {
+        m->removeAttr("locus", rloc);
+        }
+    /*
+     if (m->exons.Count()==0 && m->children.Count()==0) {
+       //a non-mRNA feature with no subfeatures
+       //add a dummy exon just to have the generic exon checking work
+       m->addExon(m->start,m->end);
+       }
+     */
+     if (forceExons) {  // && m->children.Count()==0) {
+       m->exon_ftype_id=gff_fid_exon;
+       }
+     GList<GffObj> gfadd(false,false);
+     if (gf_validate!=NULL && !(*gf_validate)(m, &gfadd)) {
+       continue;
+       }
+     m->isUsed(true); //so the gffreader won't destroy it
+     int i=-1;
+     GenomicSeqData f(m->gseq_id);
+     GenomicSeqData* gdata=NULL;
+     if (seqdata.Found(&f,i)) gdata=seqdata[i];
+         else { //entry not created yet for this genomic seq
+           gdata=new GenomicSeqData(m->gseq_id);
+           seqdata.Add(gdata);
+           }
+    for (int k=0;k<gfadd.Count();k++) {
+      placeGf(gfadd[k], gdata, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan);
+      }
+    placeGf(m, gdata, doCluster, doCollapseRedundant, matchAllIntrons, fuzzSpan);
+    } //for each read gffObj
+   //if (verbose) GMessage("  .. %d records from %s clustered into loci.\n", gffr->gflst.Count(), fname.chars());
+   if (f!=stdin) { fclose(f); f=NULL; }
+   delete gffr;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libgff.git



More information about the debian-med-commit mailing list