[med-svn] [Git][med-team/libgclib][upstream] New upstream version 0.12.1+ds

Andreas Tille gitlab at salsa.debian.org
Mon Oct 26 09:20:43 GMT 2020



Andreas Tille pushed to branch upstream at Debian Med / libgclib


Commits:
fe459c03 by Andreas Tille at 2020-09-18T11:04:57+02:00
New upstream version 0.12.1+ds
- - - - -


16 changed files:

- GBam.cpp
- GBam.h
- GBase.cpp
- GBase.h
- GFastaIndex.cpp
- GFastaIndex.h
- + GHashMap.hh
- GIntHash.hh
- GThreads.cpp
- GThreads.h
- gff.cpp
- gff.h
- htest.cpp
- + khashl.hh
- tag_git.sh
- + xxhash.h


Changes:

=====================================
GBam.cpp
=====================================
@@ -29,7 +29,7 @@ uint8_t* dupalloc_bdata(bam1_t *b, int size) {
 GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
                  int pos, bool reverse, const char* qseq,
                  const char* cigar, const char* quals):iflags(0), exons(1),
-                		 clipL(0), clipR(0), mapped_len(0) {
+                		 clipL(0), clipR(0), mapped_len(0), uval(0) {
    novel=true;
    bam_header=NULL;
    b=bam_init1();
@@ -57,7 +57,7 @@ GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
 GBamRecord::GBamRecord(const char* qname, int32_t samflags, int32_t g_tid,
              int pos, int map_qual, const char* cigar, int32_t mg_tid, int mate_pos,
              int insert_size, const char* qseq, const char* quals,
-             GVec<char*>* aux_strings):iflags(0), exons(1)  {
+             GVec<char*>* aux_strings):iflags(0), exons(1), uval(0)  {
   novel=true;
   bam_header=NULL;
   b=bam_init1();


=====================================
GBam.h
=====================================
@@ -36,13 +36,14 @@ class GBamRecord: public GSeg {
    int clipL; //soft clipping data, as seen in the CIGAR string
    int clipR;
    int mapped_len; //sum of exon lengths
+   int uval; //user value (e.g. file index)
    bool isHardClipped() { return hard_Clipped; }
    bool isSoftClipped() { return soft_Clipped; }
    bool hasIntrons() { return has_Introns; }
    //created from a reader:
    void bfree_on_delete(bool b_free=true) { novel=b_free; }
    GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL, bool b_free=true):iflags(0), exons(1),
-		   clipL(0), clipR(0), mapped_len(0) {
+		   clipL(0), clipR(0), mapped_len(0), uval(0) {
       bam_header=NULL;
       if (from_b==NULL) {
            b=bam_init1();
@@ -58,7 +59,7 @@ class GBamRecord: public GSeg {
    }
 
    GBamRecord(GBamRecord& r):GSeg(r.start, r.end), iflags(0), exons(r.exons),
-		   clipL(r.clipL), clipR(r.clipR), mapped_len(r.mapped_len) { //copy constructor
+		   clipL(r.clipL), clipR(r.clipR), mapped_len(r.mapped_len), uval(0) { //copy constructor
 	      //makes a new copy of the bam1_t record etc.
 	      clear();
 	      b=bam_dup1(r.b);
@@ -77,6 +78,7 @@ class GBamRecord: public GSeg {
       exons = r.exons;
       clipL = r.clipL;
       clipR = r.clipR;
+      uval = r.uval;
       mapped_len=r.mapped_len;
       return *this;
       }
@@ -317,6 +319,7 @@ class GBamReader {
 class GBamWriter {
    samfile_t* bam_file;
    bam_header_t* bam_header;
+   bool sharedHeader;
  public:
    void create(const char* fname, bool uncompressed=false) {
       if (bam_header==NULL)
@@ -330,16 +333,21 @@ class GBamWriter {
       if (bam_file==NULL)
          GError("Error: could not create BAM file %s!\n",fname);
       }
+
    void create(const char* fname, bam_header_t* bh, bool uncompressed=false) {
      bam_header=bh;
      create(fname,uncompressed);
      }
 
-   GBamWriter(const char* fname, bam_header_t* bh, bool uncompressed=false) {
+   GBamWriter(const char* fname, bam_header_t* bh, bool uncompressed=false):sharedHeader(false) {
       create(fname, bh, uncompressed);
-      }
+   }
 
-   GBamWriter(const char* fname, const char* samfname, bool uncompressed=false) {
+   GBamWriter(bam_header_t* bh, const char* fname, bool uncompressed=false):sharedHeader(true) {
+	   create(fname, bh, uncompressed);
+   }
+
+   GBamWriter(const char* fname, const char* samfname, bool uncompressed=false):sharedHeader(false) {
       tamFile samf_in=sam_open(samfname);
       if (samf_in==NULL)
          GError("Error: could not open SAM file %s\n", samfname);
@@ -352,8 +360,8 @@ class GBamWriter {
 
     ~GBamWriter() {
       samclose(bam_file);
-      bam_header_destroy(bam_header);
-      }
+      if (!sharedHeader) bam_header_destroy(bam_header);
+    }
    bam_header_t* get_header() { return bam_header; }
    int32_t get_tid(const char *seq_name) {
       if (bam_header==NULL)


=====================================
GBase.cpp
=====================================
@@ -157,7 +157,7 @@ int G_mkdir(const char* path, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH
    //int perms=(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) ) {
  #ifdef _WIN32
      //return _mkdir(path);
-	 return CreateDirectoryA(path, NULL);
+     return !CreateDirectoryA(path, NULL);
  #else
      return mkdir(path, perms);
  #endif
@@ -167,8 +167,16 @@ int G_mkdir(const char* path, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH
 void Gmktempdir(char* templ) {
 #ifdef _WIN32
   int blen=strlen(templ);
-  if (_mktemp_s(templ, blen)!=0)
-	  GError("Error creating temp dir %s!\n", templ);
+  char* pt=templ+blen-1;
+  //on Windows this needs a plain file name template, without directory prefix
+  blen=1;
+  while (pt!=templ) {
+    if (*pt=='/' || *pt=='\\') { pt++; break;}
+    --pt; blen++;
+  }
+  if (_mktemp_s(pt, blen)!=0)
+     GError("Error creating template file name %s!\n", pt);
+  Gmkdir(templ, true);
 #else
   char* cdir=mkdtemp(templ);
   if (cdir==NULL)
@@ -239,12 +247,40 @@ FILE* Gfopen(const char *path, char *mode) {
 
 bool GstrEq(const char* a, const char* b) {
 	 if (a==NULL || b==NULL) return false;
-	 return (strcmp(a,b)==0);
+	 return (strcmp(a, b)==0);
+}
+
+
+#ifdef __CYGWIN__
+int strcasecmp (const char *s1, const char *s2) {
+  int d = 0;
+  for ( ; ; ) {
+     const int c1 = tolower(*s1++);
+     const int c2 = tolower(*s2++);
+     if (((d = c1 - c2) != 0) || (c2 == '\0'))
+       break;
+  }
+  return d;
 }
 
+int strncasecmp (const char *s1, const char *s2,
+                  size_t n) {
+  int d = 0;
+  for ( ; n != 0; n--) {
+     const int c1 = tolower(*s1++);
+     const int c2 = tolower(*s2++);
+     if (((d = c1 - c2) != 0) || (c2 == '\0'))
+       break;
+  }
+  return d;
+}
+
+#endif
+
+
 bool GstriEq(const char* a, const char* b) {
 	 if (a==NULL || b==NULL) return false;
-	 return (strcasecmp(a,b)==0);
+	 return (strcasecmp(a, b)==0);
 }
 
 int Gstricmp(const char* a, const char* b, int n) {
@@ -920,6 +956,8 @@ void writeFasta(FILE *fw, const char* seqid, const char* descr,
   fflush(fw);
  }
 
+
+
 char* commaprintnum(uint64 n) {
   char retbuf[48];
   int comma = ',';


=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
 #ifndef G_BASE_DEFINED
 #define G_BASE_DEFINED
-#define GCLIB_VERSION "0.11.10"
+#define GCLIB_VERSION "0.12.1"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -17,15 +17,8 @@
   //#define __ISO_C_VISIBLE 1999
 #endif
 
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdint.h>
-#include <stdarg.h>
+#define XSTR(x) STR(x)
+#define STR(x) #x
 
 #ifdef _WIN32
   #include <windows.h>
@@ -36,6 +29,7 @@
   #ifndef popen
    #define popen _popen
   #endif
+  /*
   #ifndef fseeko
 		#ifdef _fseeki64
 			#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
@@ -43,38 +37,53 @@
 			#define fseeko fseek
 		#endif
   #endif
- #ifndef ftello
-  #ifdef _ftelli64
-    #define ftello(stream) _ftelli64(stream)
-  #else
-    #define ftello ftell
-  #endif
+  #ifndef ftello
+    #ifdef _ftelli64
+      #define ftello(stream) _ftelli64(stream)
+    #else
+      #define ftello ftell
+    #endif
  #endif
+ */
  #else
   #define CHPATHSEP '/'
+  #ifdef __CYGWIN__
+    #define _BSD_SOURCE
+  #endif 
   #include <unistd.h>
 #endif
 
-#ifndef fseeko
- #define fseeko fseek
-#endif
-#ifndef ftello
- #define ftello ftell
-#endif
-
 #ifdef DEBUG
 #undef NDEBUG
 #define _DEBUG 1
 #define _DEBUG_ 1
 #endif
 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <math.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <type_traits>
+
+typedef int64_t int64;
+typedef uint64_t uint64;
 typedef int32_t int32;
 typedef uint32_t uint32;
 typedef int16_t int16;
 typedef uint16_t uint16;
 
 typedef unsigned char uchar;
-typedef unsigned char byte;
+typedef uint8_t byte;
+typedef unsigned int uint;
+
+typedef void* pointer;
+
 
 #ifndef MAXUINT
 #define MAXUINT ((unsigned int)-1)
@@ -92,9 +101,6 @@ typedef unsigned char byte;
 #define MAX_INT INT_MAX
 #endif
 
-typedef int64_t int64;
-typedef uint64_t uint64;
-
 /****************************************************************************/
 
 #ifndef EXIT_FAILURE
@@ -155,9 +161,6 @@ GEXIT(#condition);}
 // Clamp value x to range [lo..hi]
 #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
 
-typedef void* pointer;
-typedef unsigned int uint;
-
 typedef int GCompareProc(const pointer item1, const pointer item2);
 typedef long GFStoreProc(const pointer item1, FILE* fstorage); //for serialization
 typedef pointer GFLoadProc(FILE* fstorage); //for deserialization
@@ -167,6 +170,7 @@ typedef void GFreeProc(pointer item); //usually just delete,
 
 #define GMALLOC(ptr,size)  if (!GMalloc((pointer*)(&ptr),size)) \
                                      GError(ERR_ALLOC)
+
 #define GCALLOC(ptr,size)  if (!GCalloc((pointer*)(&ptr),size)) \
                                      GError(ERR_ALLOC)
 #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
@@ -209,12 +213,28 @@ template<class T> void Gswap(T& lhs, T& rhs) {
  rhs=tmp;
 }
 
+// use std::is_pointer from <type_traits> in C++11 instead
+/*
+template<typename T>
+  struct isPointer { static const bool value = false; };
+
+template<typename T>
+  struct isPointer<T*> { static const bool value = true; };
+*/
+//check if type T is resolved as a pointer to char
+template<class T>
+  struct is_char_ptr : std::integral_constant <
+      bool,
+      std::is_same<char const *, typename std::decay<T>::type>::value ||
+        std::is_same<char *, typename std::decay<T>::type>::value
+  > {};
 
 /**************** Memory management ***************************/
 
 bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
 bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
 bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
+
 void GFree(pointer* ptr); // Free memory, resets ptr to NULL
 
 //int saprintf(char **retp, const char *fmt, ...);
@@ -224,6 +244,15 @@ void GMessage(const char* format,...);// Log message to stderr
 // Assert failed routine:- usually not called directly but through GASSERT
 void GAssert(const char* expression, const char* filename, unsigned int lineno);
 
+
+template<class T> T* GDupAlloc(T& data) {
+	T* tmp=NULL;
+	if (!GMalloc((pointer*) tmp, sizeof(T)))
+			GError(ERR_ALLOC);
+	memcpy((void*)tmp, (void*)&data, sizeof(T));
+	return tmp;
+}
+
 // ****************** basic string manipulation *************************
 char *Gstrdup(const char* str, int xtracap=0); //string duplication with extra capacity added
 //duplicate a string by allocating a copy for it (+xtracap heap room) and returning the new pointer


=====================================
GFastaIndex.cpp
=====================================
@@ -20,7 +20,8 @@ void GFastaIndex::addRecord(const char* seqname, uint seqlen, off_t foffs, int l
      else {
          farec=new GFastaRec(seqlen,foffs,llen,llen_full);
          records.Add(seqname,farec);
-         farec->seqname=records.getLastKey();
+         //farec->seqname=records.getLastKey();
+         farec->seqname=seqname;
          }
 }
 
@@ -52,7 +53,7 @@ int GFastaIndex::loadIndex(const char* finame) { //load record info from existin
          sscanf(p, "%d%ld%d%d", &len, &offset, &line_len, &line_blen);
 #else
          long long offset=-1;
-         sscanf(p, "%d%lld%d%d", &len, &offset, &line_len, &line_blen);
+         sscanf(p, "%u%lld%d%d", &len, &offset, &line_len, &line_blen);
 #endif
       if (len==0 || line_len==0 || line_blen==0 || line_blen<line_len)
           GError(ERR_FAIDXLINE,p);
@@ -161,7 +162,7 @@ int GFastaIndex::storeIndex(FILE* fai) {
   GFastaRec* rec=NULL;
   while ((rec=records.NextData())!=NULL) {
     reclist.Add(rec);
-    }
+  }
   //reclist has records sorted by file offset
   for (int i=0;i<reclist.Count();i++) {
 #ifdef _WIN32


=====================================
GFastaIndex.h
=====================================
@@ -8,12 +8,12 @@
 #ifndef GFAIDX_H_
 #define GFAIDX_H_
 
-#include "GHash.hh"
+#include "GHashMap.hh"
 #include "GList.hh"
 
 class GFastaRec {
  public:
-  char* seqname;
+  const char* seqname;
   uint seqlen;
   off_t fpos;
   int line_len; //effective line length (without EoL)
@@ -42,7 +42,7 @@ class GFastaIndex {
   char* fai_name;
   bool haveFai;
  public:
-  GHash<GFastaRec> records;
+  GHash<GFastaRec*> records;
   void addRecord(const char* seqname, uint seqlen,
                     off_t foffs, int llen, int llen_full);
 


=====================================
GHashMap.hh
=====================================
@@ -0,0 +1,453 @@
+/********************************************************************************
+ *                  Hash map class templates
+ *********************************************************************************/
+
+#ifndef GHashMap_HH
+#define GHashMap_HH
+#include "GBase.h"
+#include "khashl.hh"
+#include <type_traits>
+#include <typeinfo>
+
+#define XXH_INLINE_ALL 1
+#include "xxhash.h"
+
+template <typename K> struct GHashKey_xxHash32 { //K generic (class, primitive, pointer except const char* )
+  //template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
+ uint32_t operator()(const K& s) const { //only works for trivial types!
+      static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
+      return XXH32((const void *) &s, sizeof(K), 0);
+    }
+};
+
+template <> struct GHashKey_xxHash32<const char*> {
+   inline uint32_t operator()(const char* s) const {
+      return XXH32(s, strlen(s), 0);
+   }
+};
+
+template <typename K> struct GHashKey_xxHash { //K generic (class, primitive, pointer except const char* )
+  //template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
+ uint64_t operator()(const K& s) const { //only works for trivial types!
+      static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
+      return XXH64((const void *) &s, sizeof(K), 0);
+    }
+};
+
+template <> struct GHashKey_xxHash<const char*> {
+   inline uint32_t operator()(const char* s) const {
+      return XXH64(s, strlen(s), 0);
+   }
+};
+
+template <typename K> struct GHashKey_Eq { //K is a type having the == operator defined
+    inline bool operator()(const K& x, const K& y) const {
+      return (x == y); //requires == operator to be defined for K
+    }
+};
+
+template <> struct GHashKey_Eq<const char*> {
+    inline bool operator()(const char* x, const char* y) const {
+      return (strcmp(x, y) == 0);
+    }
+};
+
+//GHashSet is never making a deep copy of the char* key, it only stores the pointer
+template <typename K=const char*, class Hash=GHashKey_xxHash<K>, class Eq=GHashKey_Eq<K>, typename khInt_t=uint64_t >
+  class GHashSet: public std::conditional< is_char_ptr<K>::value,
+    klib::KHashSetCached< K, Hash,  Eq, khInt_t >,
+	klib::KHashSet< K, Hash,  Eq, khInt_t > >::type  {
+protected:
+	khInt_t i_iter=0;
+public:
+	inline khInt_t Add(const K ky) { // return -1 if the key already exists
+		int absent=-1;
+		khInt_t i=this->put(ky, &absent);
+		if (absent==1) //key was actually added
+		   return i;
+		return -1;
+	}
+
+	inline khInt_t Remove(K ky) { //return index being removed, or -1 if no such key exists
+		khInt_t i=this->get(ky);
+		  if (i!=this->end()) {
+			  this->del(i);
+	    	  return i;
+		  }
+		  return -1;
+	}
+
+	inline void Clear() {
+		this->clear(); //does not shrink !
+	}
+
+	inline void Reset() {
+		this->clear();
+		GFREE(this->used); GFREE(this->keys);
+		this->bits=0; this->count=0;
+	}
+
+    ~GHashSet() {
+    	this->Reset();
+    }
+
+	inline bool operator[](K ky) { //RH only (read-only), cannot assign (use Add instead)
+		return (this->get(ky)!=this->end());
+	}
+
+	inline bool hasKey(K ky) {
+		return (this->get(ky)!=this->end());
+	}
+
+	int Find(K ky) {//return internal slot location if found,
+	                // or -1 if not found
+	  khInt_t r=this->get(ky);
+  	  if (r==this->end()) return -1;
+  	  return (int)r;
+	}
+
+	void startIterate() { //iterator-like initialization
+	  i_iter=0;
+	}
+
+	K* Next() {
+		//returns a pointer to next valid key in the table (NULL if no more)
+		if (this->count==0) return NULL;
+		uint32_t nb=this->n_buckets();
+		while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+		if (i_iter==nb) return NULL;
+		K* k=&(this->key(i_iter-1));
+		++i_iter;
+		return k;
+	}
+
+	inline uint32_t Count() { return this->count; }
+
+};
+
+//GStrSet always allocates a copy of each added string;
+// if you don't want that (keys are shared), just use GHashSet<const char*> instead
+template <class Hash=GHashKey_xxHash<const char*>, class Eq=GHashKey_Eq<const char*>, typename khInt_t=uint64_t>
+  class GStrSet: public GHashSet<const char*, Hash, Eq, khInt_t> {
+  public:
+	inline int Add(const char* ky) { // return -1 if the key already exists
+		int absent=-1;
+		khInt_t i=this->put(ky, &absent);
+		if (absent==1) {//key was actually added
+		   const char* s=Gstrdup(ky);
+		   this->key(i)=s; //store a copy of the key string
+		   return i;
+		}
+		//key was already there
+		return -1;
+	}
+
+	int Remove(const char* ky) { //return index being removed, or -1 if no such key exists
+		  khInt_t i=this->get(ky);
+		  if (i!=this->end()) {
+			  GFREE(this->key(i)); //free string copy
+			  this->del(i);
+	    	  return i;
+		  }
+		  return -1;
+	}
+
+	inline void Clear() {
+		khInt_t nb=this->n_buckets();
+		for (khInt_t i = 0; i != nb; ++i) {
+			if (!this->__kh_used(this->used, i)) continue;
+			//deallocate string copy
+			GFREE(this->key(i));
+		}
+		this->clear(); //does not shrink !
+	}
+
+	inline void Reset() {
+		this->Clear();
+		GFREE(this->used); GFREE(this->keys);
+		this->bits=0; this->count=0;
+	}
+
+    ~GStrSet() {
+    	this->Reset();
+    }
+
+};
+
+//generic hash map where keys and values can be of any type
+template <class K, class V, class Hash=GHashKey_xxHash<K>, class Eq=GHashKey_Eq<K>, typename khInt_t=uint64_t>
+  class GHashMap:public std::conditional< is_char_ptr<K>::value,
+    klib::KHashMapCached< K, V, Hash,  Eq, khInt_t>,
+    klib::KHashMap< K, V, Hash,  Eq, khInt_t> >::type  {
+protected:
+	khInt_t i_iter=0;
+	bool freeItems=false;
+public:
+	//---- these should be reimplemented for GHash
+	inline int Add(const K ky, const V val) { // if a key does not exist allocate a copy of the key
+		// return -1 if the key already exists
+		int absent=-1;
+		khInt_t i=this->put(ky, &absent);
+		if (absent==1) { //key was actually added
+			this->value(i)=val; //value is always copied
+			return i;
+		}
+		return -1;
+	}
+	template <typename T=V> inline
+		typename std::enable_if< std::is_pointer<T>::value, int>::type
+			Remove(K ky) { //return index being removed
+		khInt_t i=this->get(ky);
+		  if (i!=this->end()) {
+			  if (freeItems) delete this->value(i);
+			  this->del(i);
+	    	  return i;
+		  }
+		  return -1;
+	}
+
+	template <typename T=V> inline
+		typename std::enable_if< !std::is_pointer<T>::value, int>::type
+			Remove(K ky) { //return index being removed
+		  khInt_t i=this->get(ky);
+		  if (i!=this->end()) {
+			  this->del(i);
+	    	  return i;
+		  }
+		  return -1;
+	}
+
+
+	template <typename T=V> inline
+		typename std::enable_if< std::is_pointer<T>::value, void>::type
+		Clear() {
+		if (!freeItems) {
+			this->clear(); //does not shrink !
+			return;
+		}
+		khInt_t nb=this->n_buckets();
+		for (khInt_t i = 0; i != nb; ++i) {
+			if (!this->__kh_used(this->used, i)) continue;
+			if (freeItems) delete this->value(i);
+		}
+		this->clear();
+	}
+
+	template <typename T=V> inline
+		typename std::enable_if< !std::is_pointer<T>::value, void>::type
+		Clear() {
+		if (!freeItems) {
+			this->clear(); //does not shrink !
+			return;
+		}
+		khInt_t nb=this->n_buckets();
+		for (khInt_t i = 0; i != nb; ++i) {
+			if (!this->__kh_used(this->used, i)) continue;
+		}
+		this->clear();
+	}
+
+	inline void Reset() {
+		this->Clear();
+		GFREE(this->used); GFREE(this->keys);
+		this->bits=0; this->count=0;
+	}
+
+    ~GHashMap() {
+    	this->Reset();
+    }
+
+  // -- these can be shared with GHash:
+
+	GHashMap(bool doFree=std::is_pointer<V>::value):freeItems(doFree) {
+		static_assert(std::is_trivial<K>::value,
+				"Error: cannot use this for non-trivial types!\n");
+		if (!std::is_pointer<V>::value) doFree=false;
+	};
+	//return pointer to stored value if found, NULL otherwise
+	// if the stored value is a pointer, it's going to be a pointer to that
+	template <typename T=V> inline
+			typename std::enable_if< std::is_pointer<T>::value, T>::type
+	        Find(const K ky) {
+	  khInt_t r=this->get(ky);
+	  if (r==this->end()) return NULL;
+	  return this->value(r);
+	}
+
+	template <typename T=V> inline
+			typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+	        Find(const K ky) {
+	  khInt_t r=this->get(ky);
+	  if (r==this->end()) return NULL;
+	  return &(this->value(r));
+	}
+
+	//-- operator[] should be defined just like Find?
+	template <typename T=V> inline
+			typename std::enable_if< std::is_pointer<T>::value, T>::type
+	        operator[](const K ky) {
+	  khInt_t r=this->get(ky);
+	  if (r==this->end()) return NULL;
+	  return this->value(r);
+	}
+
+	template <typename T=V> inline
+			typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+	        operator[](const K ky) {
+	  khInt_t r=this->get(ky);
+	  if (r==this->end()) return NULL;
+	  return &(this->value(r));
+	}
+
+	inline bool hasKey(K ky) {
+		return (this->get(ky)!=this->end());
+	}
+
+	inline void startIterate() { //iterator-like initialization
+	  i_iter=0;
+	}
+
+	template <typename T=K> inline
+				typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+	  Next (V& val) {
+		//returns a pointer to next key entry in the table (NULL if no more)
+		if (this->count==0) return NULL;
+		khInt_t nb=this->n_buckets();
+		while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+		if (i_iter==nb) return NULL;
+		val=this->value(i_iter);
+		K* k=&(this->key(i_iter));
+		++i_iter;
+		return k;
+	}
+
+	template <typename T=K> inline
+				typename std::enable_if< std::is_pointer<T>::value, T>::type
+	  Next (V& val) {
+		//returns a pointer to next key entry in the table (NULL if no more)
+		if (this->count==0) return NULL;
+		khInt_t nb=this->n_buckets();
+		while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+		if (i_iter==nb) return NULL;
+		val=this->value(i_iter);
+		K k = this->key(i_iter);
+		++i_iter;
+		return k;
+	}
+
+	template <typename T=V> inline
+				typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+	  NextData () {
+		//returns a pointer to next key entry in the table (NULL if no more)
+		if (this->count==0) return NULL;
+		khInt_t nb=this->n_buckets();
+		while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+		if (i_iter==nb) return NULL;
+		T* val=&(this->value(i_iter));
+		++i_iter;
+		return val;
+	}
+
+	template <typename T=V> inline
+				typename std::enable_if< std::is_pointer<T>::value, T>::type
+	  NextData () {
+		//returns a pointer to next key entry in the table (NULL if no more)
+		if (this->count==0) return NULL;
+		khInt_t nb=this->n_buckets();
+		while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+		if (i_iter==nb) return NULL;
+		T val=this->value(i_iter);
+		++i_iter;
+		return val;
+	}
+
+
+
+	inline uint32_t Count() { return this->count; }
+
+};
+
+template <class V, class Hash=GHashKey_xxHash<const char*>, class Eq=GHashKey_Eq<const char*>, typename khInt_t=uint64_t >
+  class GHash:public GHashMap<const char*, V, Hash, Eq, khInt_t>  {
+protected:
+
+public:
+	GHash(bool doFree=true) {
+		this->freeItems=doFree;
+	};
+	//---- these should be now reimplemented
+	inline int Add(const char* ky, const V val) { // if a key does not exist allocate a copy of the key
+		// return -1 if the key already exists
+		int absent=-1;
+		khInt_t i=this->put(ky, &absent);
+		if (absent==1) { //key was actually added
+			const char* s=Gstrdup(ky);
+			this->key(i)=s; //store a copy of the key string
+			this->value(i)=val; //value is always copied
+			return i;
+		}
+		return -1;
+	}
+	template <typename T=V> inline
+		typename std::enable_if< std::is_pointer<T>::value, int>::type
+	Remove(const char* ky) { //return index being removed
+		  khInt_t i=this->get(ky);
+		  if (i!=this->end()) {
+			  GFREE(this->key(i)); //free string copy
+			  if (this->freeItems) delete this->value(i);
+	    	  this->del(i);
+	    	  return i;
+		  }
+		  return -1;
+	}
+
+	template <typename T=V> inline
+		typename std::enable_if< !std::is_pointer<T>::value, int>::type
+	Remove(const char* ky) { //return index being removed
+		  khInt_t i=this->get(ky);
+		  if (i!=this->end()) {
+			  GFREE(this->key(i)); //free string copy
+	    	  this->del(i);
+	    	  return i;
+		  }
+		  return -1;
+	}
+
+	template <typename T=V> inline
+		typename std::enable_if< std::is_pointer<T>::value, void>::type
+		Clear() {
+		khInt_t nb=this->n_buckets();
+		for (khInt_t i = 0; i != nb; ++i) {
+			if (!this->__kh_used(this->used, i)) continue;
+			if (this->freeItems) delete this->value(i);
+			GFREE(this->key(i));
+		}
+		this->clear();
+	}
+
+	template <typename T=V> inline
+		typename std::enable_if< !std::is_pointer<T>::value, void>::type
+		Clear() {
+		khInt_t nb=this->n_buckets();
+		for (khInt_t i = 0; i != nb; ++i) {
+			if (!this->__kh_used(this->used, i)) continue;
+			GFREE(this->key(i));
+		}
+		this->clear();
+	}
+
+	inline void Reset() {
+		this->Clear();
+		GFREE(this->used); GFREE(this->keys);
+		this->bits=0; this->count=0;
+	}
+
+    ~GHash() {
+    	this->Reset();
+    }
+};
+
+template<typename T>
+ using GIntHash = GHashMap<int, T, GHashKey_xxHash32<int>, GHashKey_Eq<int>, uint32_t>;
+
+#endif


=====================================
GIntHash.hh
=====================================
@@ -2,15 +2,15 @@
 #define _GHASHT_HH
 #include "GBase.h"
 //----------------------------------------------
-//  Hash table templates based on Jeff Preshing's code
+//  Int Hash table templates
 // ---------------------------------------------
 //  Maps 32-bit integers to user data
 //  Uses open addressing with linear probing.
 //  In the m_cells array, key = 0 is reserved to indicate an unused cell.
 //  Actual value for key 0 (if any) is stored in m_zeroCell.
 //  The hash table automatically doubles in size when it becomes 75% full.
-//  The hash table never shrinks in size, even after Clear(),
-//  unless you explicitly call Compact().
+//  The hash table never shrinks in size
+//  unless you explicitly call Clear() or Compact().
 //----------------------------------------------
 inline uint32_t upper_power_of_two(uint32_t v) {
     v--;
@@ -228,7 +228,7 @@ public:
 };
 
 
-// -- from code.google.com/p/smhasher/wiki/MurmurHash3
+// from code.google.com/p/smhasher/wiki/MurmurHash3
 inline uint32_t integerHash(uint32_t h)
 {
 	h ^= h >> 16;
@@ -239,19 +239,15 @@ inline uint32_t integerHash(uint32_t h)
 	return h;
 }
 
-inline int32_t int_hashfunc_Wang(int32_t key) {
- key += ~(key << 15);
- key ^=  (key >> 10);
- key +=  (key << 3);
- key ^=  (key >> 6);
- key += ~(key << 11);
- key ^=  (key >> 16);
- return key;
-}
-
-// -- from Heng Li's khash.h:
-inline uint32_t int64_hashfunc(uint64_t k) {
-	return (uint32_t)(k>>33^k^k<<11);
+// from code.google.com/p/smhasher/wiki/MurmurHash3
+inline uint64_t integerHash(uint64_t k)
+{
+	k ^= k >> 33;
+	k *= 0xff51afd7ed558ccd;
+	k ^= k >> 33;
+	k *= 0xc4ceb9fe1a85ec53;
+	k ^= k >> 33;
+	return k;
 }
 
 #define GIHASH_FIRST_CELL(hash) (m_cells + ((hash) & (m_arraySize - 1)))


=====================================
GThreads.cpp
=====================================
@@ -373,7 +373,7 @@ void GThread::detach()
 
 void GThread::wait_all() {
   while (GThread::num_running()>0)
-	current_thread::sleep_for(2);
+	current_thread::sleep_for(1);
 }
 
 
@@ -445,7 +445,7 @@ void current_thread::yield() {
 // Example usage:
 // // Sleep for 100 milliseconds:
 // current_thread::sleep_for(100);
-void current_thread::sleep_for(const int32_t mstime) {
+void current_thread::sleep_for(const int mstime) {
 #if defined(_GTHREADS_WIN32_)
   Sleep(mstime);
 #else


=====================================
GThreads.h
=====================================
@@ -95,6 +95,9 @@ freely, subject to the following restrictions:
     #undef __UNDEF_LEAN_AND_MEAN
   #endif
 #else
+  #ifdef __CYGWIN__
+    #define _BSD_SOURCE
+  #endif 
   #include <pthread.h>
   #include <signal.h>
   #include <sched.h>
@@ -772,6 +775,7 @@ public:
     	int r=tcounter;
     	return r;
     }
+#ifdef _GTHREADS_POSIX_
     static size_t defaultStackSize() {
     	pthread_attr_t attr;
     	size_t stacksize;
@@ -780,6 +784,7 @@ public:
     	pthread_attr_destroy(&attr);
     	return stacksize;
     }
+#endif
     static int liveCount() {
       //return number of running (live) threads
       return num_running();
@@ -821,7 +826,7 @@ namespace current_thread {
   // Example usage:
   // // Sleep for 100 milliseconds:
   // current_thread::sleep_for(100);
-  void sleep_for(const int32_t mstime);
+  void sleep_for(const int mstime);
 }
 
 // Define/macro cleanup


=====================================
gff.cpp
=====================================
@@ -1436,7 +1436,7 @@ GffObj* GffReader::newGffRec(GffLine* gffline, GffObj* parent, GffExon* pexon, G
 		  gffline->exontype==exgffNone && !gffline->is_gene && !gffline->is_transcript) {
 	  //unrecognized non-exon entity, should be discarded
 	  newgfo->isDiscarded(true);
-	  this->discarded_ids.Add(gffline->ID, new int(1));
+	  this->discarded_ids.Add(gffline->ID, 1);
   }
   if (replace_parent && glst) {
 	r=gfoReplace(*glst, newgfo, parent);
@@ -1479,7 +1479,7 @@ GffObj* GffReader::updateGffRec(GffObj* prevgfo, GffLine* gffline) {
 }
 
 
-bool GffReader::readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon>* pex) {
+bool GffReader::readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex) {
 	//this should only be called before prevgfo->finalize()!
 	bool r=true;
 	if (gffline->strand!=prevgfo->strand) {
@@ -1511,7 +1511,7 @@ bool GffReader::readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExo
 	return r;
 }
 
-CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name) {
+CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon*>& pex, char*& subp_name) {
   CNonExon* subp=NULL;
   subp_name=NULL;
   for (int i=0;i<gffline->num_parents;i++) {
@@ -1526,7 +1526,7 @@ CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*
   return NULL;
 }
 
-void GffReader::subfPoolAdd(GHash<CNonExon>& pex, GffObj* newgfo) {
+void GffReader::subfPoolAdd(GHash<CNonExon*>& pex, GffObj* newgfo) {
 //this might become a parent feature later
 if (newgfo->exons.Count()>0) {
    char* xbuf=gfoBuildId(gffline->ID, gffline->gseqname);
@@ -1535,7 +1535,7 @@ if (newgfo->exons.Count()>0) {
    }
 }
 
-GffObj* GffReader::promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex) {
+GffObj* GffReader::promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon*>& pex) {
   GffObj* prevp=subp->parent; //grandparent of gffline (e.g. gene)
   //if (prevp!=gflst[subp->idx])
   //  GError("Error promoting subfeature %s, gflst index mismatch?!\n", subp->gffline->ID);
@@ -1657,7 +1657,7 @@ void GffReader::readAll() {
 	}
 	else { //regular GFF/GTF or perhaps TLF?
 		//loc_debug=false;
-		GHash<CNonExon> pex; //keep track of any parented (i.e. exon-like) features that have an ID
+		GHash<CNonExon*> pex; //keep track of any parented (i.e. exon-like) features that have an ID
 		//and thus could become promoted to parent features
 		while (nextGffLine()!=NULL) {
 			GffObj* prevseen=NULL;
@@ -1969,7 +1969,7 @@ bool GffObj::processGeneSegments(GffReader* gfr) {
      4)for each GeneCDSChain, pick best _gene_segment match (if any) and transfer CDSs to it
 	*/
     GVec<int> geneSegs; //X_gene_segment features (children transcripts of this gene)
-    GHash<GeneCDSChain> cdsChainById(false); // hash of CDS chains: CDS feature grouped by ID
+    GHashMap<const char*, GeneCDSChain*> cdsChainById(false); // hash of CDS chains: CDS feature grouped by ID
     GPVec<GeneCDSChain> cdsChains; // CDS chains storage
 	if (cdss==NULL || cdss->Count()==0 || children.Count()==0)
 		return false; //we shouldn't be here
@@ -1998,7 +1998,7 @@ bool GffObj::processGeneSegments(GffReader* gfr) {
     	else { //new CDS chain:
     	     gcc=new GeneCDSChain(i, cdss->Get(i)->start, cdss->Get(i)->end);
     	     cdsChains.Add(gcc);
-    	     cdsChainById.shkAdd(id, gcc);
+    	     cdsChainById.Add(id, gcc);
     	}
     }
     for (int i=0;i<cdss->Count();i++) {


=====================================
gff.h
=====================================
@@ -8,7 +8,8 @@
 #include "codons.h"
 #include "GFaSeqGet.h"
 #include "GList.hh"
-#include "GHash.hh"
+//#include "GHash.hh"
+#include "GHashMap.hh"
 
 #ifdef CUFFLINKS
 #include <boost/crc.hpp>  // for boost::crc_32_type
@@ -18,6 +19,7 @@
 extern int gff_fid_mRNA; // "mRNA" feature name
 extern int gff_fid_transcript; // *RNA, *transcript feature name
 extern int gff_fid_exon;
+extern int gff_fid_CDS;
 
 extern const uint GFF_MAX_LOCUS;
 extern const uint GFF_MAX_EXON;
@@ -403,13 +405,13 @@ class GffNameList:public GPVec<GffNameInfo> {
   friend class GffNameInfo;
   friend class GffNames;
 protected:
-  GHash<GffNameInfo> byName;//hash with shared keys
+  GHashMap<const char*, GffNameInfo*> byName;//hash with shared keys
   int idlast; //fList index of last added/reused name
   int addStatic(const char* tname) {// fast add
      GffNameInfo* f=new GffNameInfo(tname);
      idlast=this->Add(f);
      f->idx=idlast;
-     byName.shkAdd(f->name,f);
+     byName.Add(f->name,f);
      return idlast;
      }
 public:
@@ -437,7 +439,7 @@ public:
       f=new GffNameInfo(tname);
       fidx=this->Add(f);
       f->idx=fidx;
-      byName.shkAdd(f->name,f);
+      byName.Add(f->name,f);
       }
    idlast=fidx;
    return fidx;
@@ -447,7 +449,7 @@ public:
     GffNameInfo* f=new GffNameInfo(tname);
     int fidx=this->Add(f);
     f->idx=fidx;
-    byName.shkAdd(f->name,f);
+    byName.Add(f->name,f);
     return fidx;
     }
 
@@ -472,11 +474,11 @@ class GffNames {
    GffNames():tracks(),gseqs(),attrs(), feats() {
     numrefs=0;
     //the order below is critical!
-    //has to match: gff_fid_mRNA, gff_fid_exon
+    //has to match: gff_fid_mRNA, gff_fid_exon, gff_fid_CDS
     gff_fid_mRNA = feats.addStatic("mRNA");//index 0=gff_fid_mRNA
     gff_fid_transcript=feats.addStatic("transcript");//index 1=gff_fid_transcript
-    gff_fid_exon=feats.addStatic("exon");//index 1=gff_fid_exon
-    //feats.addStatic("CDS"); //index 2=gff_fid_CDS
+    gff_fid_exon=feats.addStatic("exon");//index 2=gff_fid_exon
+    gff_fid_CDS=feats.addStatic("CDS"); //index 3=gff_fid_CDS
     }
 };
 
@@ -842,8 +844,6 @@ public:
                //complete parsing: must be called in order to merge adjacent/close proximity subfeatures
    void parseAttrs(GffAttrs*& atrlist, char* info, bool isExon=false, bool CDSsrc=false);
    const char* getSubfName() { //returns the generic feature type of the entries in exons array
-     //int sid=exon_ftype_id;
-     //if (sid==gff_fid_exon && isCDS) sid=gff_fid_CDS;
      return names->feats.getName(subftype_id);
      }
    void setCDS(uint cd_start, uint cd_end, char phase=0);
@@ -1179,8 +1179,7 @@ class GffReader {
   //bool gene2exon;  // for childless genes: add an exon as the entire gene span
   GHash<int> discarded_ids; //for transcriptsOnly mode, keep track
                             // of discarded parent IDs
-  GHash< GPVec<GffObj> > phash; //transcript_id => GPVec<GffObj>(false)
-  //GHash<int> tids; //just for transcript_id uniqueness
+  GHash< GPVec<GffObj>* > phash; //transcript_id => GPVec<GffObj>(false)
   char* gfoBuildId(const char* id, const char* ctg);
   //void gfoRemove(const char* id, const char* ctg);
   GffObj* gfoAdd(GffObj* gfo);
@@ -1190,9 +1189,9 @@ class GffReader {
   bool pFind(const char* id, GPVec<GffObj>*& glst);
   GffObj* gfoFind(const char* id, GPVec<GffObj>* & glst, const char* ctg=NULL,
 	                                         char strand=0, uint start=0, uint end=0);
-  CNonExon* subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name);
-  void subfPoolAdd(GHash<CNonExon>& pex, GffObj* newgfo);
-  GffObj* promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex);
+  CNonExon* subfPoolCheck(GffLine* gffline, GHash<CNonExon*>& pex, char*& subp_name);
+  void subfPoolAdd(GHash<CNonExon*>& pex, GffObj* newgfo);
+  GffObj* promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon*>& pex);
 
 #ifdef CUFFLINKS
      boost::crc_32_type  _crc_result;
@@ -1207,7 +1206,7 @@ class GffReader {
   //GffObj* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx);
   GffObj* updateGffRec(GffObj* prevgfo, GffLine* gffline);
   GffObj* updateParent(GffObj* newgfh, GffObj* parent);
-  bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon>* pex=NULL);
+  bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex=NULL);
   GPVec<GSeqStat> gseqStats; //populated after finalize() with only the ref seqs in this file
   GffReader(FILE* f=NULL, bool t_only=false, bool sort=false):linebuf(NULL), fpos(0),
 		  buflen(0), flags(0), fh(f), fname(NULL), commentParser(NULL), gffline(NULL),


=====================================
htest.cpp
=====================================
@@ -2,21 +2,31 @@
 #include "GArgs.h"
 #include "GStr.h"
 #include "GVec.hh"
-#include "GHash.hh"
+namespace old {
+ #include "GHash.hh"
+}
 #include "GResUsage.h"
 #include <cstdint>
 #include <iostream>
-#include <string>
-//#include <tsl/hopscotch_map.h>
-//#include <tsl/hopscotch_set.h>
+//#include "tsl/hopscotch_map.h"
+//#include "tsl/robin_map.h"
+#include <unordered_map>
+//#include "ska/bytell_hash_map.hpp"
+
+//#include "khashl.hh"
+//#include "city.h"
+#include "GHashMap.hh"
 
 #define USAGE "Usage:\n\
-  htest textfile.. \n\
+  htest [-Q] [-C] [-n num_clusters] textfile.. \n\
+  E.g. quick query test: ./htest -Q qtest_str.dta\n\
   \n\
  "
-static void strFreeProc(pointer item) {
-      GFREE(item);
-}
+//quick query test: ./htest -Q qtest_str.dta
+
+bool qryMode=false;
+bool checkRM=false;
+int numClusters=500;
 
 struct HStrData {
 	int cmd; // 0=add, 1=remove, 2=clear
@@ -24,13 +34,21 @@ struct HStrData {
 	HStrData(char* s=NULL, int c=0):cmd(c), str(s) { }
 };
 
-int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int toLoad=0) {
+int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int toLoad) {
   int num=0;
   GLineReader lr(f);
   char* line=NULL;
+  int numcl=0;
   while ((line=lr.nextLine())!=NULL) {
 	  int len=strlen(line);
-	  if (len<4) continue;
+	  if (len<3) continue;
+	  if (line[0]=='>') {
+		  numcl++;
+		  if (toLoad && numcl>toLoad) {
+			  break;
+		  }
+		  continue;
+	  }
 	  if (strcmp(line, "HCLR")==0) {
 		  strgs.Add(new HStrData(NULL, 2));
 		  strgsuf.Add(new HStrData(NULL, 2));
@@ -46,7 +64,6 @@ int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int t
       line[len-3]=0;
       strgs.Add(new HStrData(line));
 	  num++;
-	  if (toLoad && num>=toLoad) break;
   } //while line
   return num;
 }
@@ -55,93 +72,520 @@ void showTimings(GResUsage swatch) {
  char *wtime=commaprintnum((uint64_t)swatch.elapsed());
  char *utime=commaprintnum((uint64_t)swatch.u_elapsed());
  char *stime=commaprintnum((uint64_t)swatch.s_elapsed());
+ char *smem=commaprintnum((uint64_t)swatch.memoryUsed());
  GMessage("Elapsed time (microseconds): %12s us\n", wtime);
  GMessage("                  user time: %12s us\n", utime);
  GMessage("                system time: %12s us\n", stime);
- GFREE(wtime);GFREE(utime);GFREE(stime);
+ GMessage("                  mem usage: %12s KB\n", smem);
+
+ GFREE(wtime);GFREE(utime);GFREE(stime); GFREE(smem);
 }
 
+// default values recommended by http://isthe.com/chongo/tech/comp/fnv/
+const uint32_t Prime = 0x01000193; // 16777619
+const uint32_t Seed  = 0x811C9DC5; // 2166136261
+/// hash a single byte
+inline uint32_t fnv1a(unsigned char b, uint32_t h = Seed) {   return (b ^ h) * Prime; }
+
+/// hash a C-style string
+uint32_t fnv1a(const char* text, uint32_t hash = Seed) {
+	while (*text)
+		hash = fnv1a((unsigned char)*text++, hash);
+	return hash;
+}
+
+struct cstr_eq {
+    inline bool operator()(const char* x, const char* y) const {
+        return (strcmp(x, y) == 0);
+    }
+};
+
+struct cstr_hash {
+	inline uint32_t operator()(const char* s) const {
+		return XXH32(s, std::strlen(s),0);
+		//return fnv1a(s);
+	}
+};
 
 void run_GHash(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
-	GHash<int> ghash;
+	old::GHash<int> ghash;
 	int num_add=0, num_rm=0, num_clr=0;
 	GMessage("----------------- %s ----------------\n", label);
 	ghash.Clear();
 	swatch.start();
+	int cl_i=0;
+	int prevcmd=2;
 	for (int i=0;i<hstrs.Count();i++) {
+			  if (hstrs[i]->cmd==prevcmd) {
+				  if (prevcmd==2) continue;
+			  } else prevcmd=hstrs[i]->cmd;
 			  switch (hstrs[i]->cmd) {
-				case 0:ghash.fAdd(hstrs[i]->str.chars(), new int(1)); num_add++; break;
-				case 1:ghash.Remove(hstrs[i]->str.chars()); num_rm++; break;
-				case 2:ghash.Clear(); num_clr++; break;
+				case 0:
+					if (cl_i==0) cl_i=i;
+					ghash.fAdd(hstrs[i]->str.chars(), new int(i));
+					num_add++;
+					break;
+				case 1:
+					if (qryMode) break;
+					ghash.Remove(hstrs[i]->str.chars());
+					num_rm++;
+					break;
+				case 2:
+					//run tests here
+					if (qryMode) {
+						//run some query tests here
+						for(int j=cl_i;j<i;j+=3) {
+							if (hstrs[j]->cmd) continue;
+							int* v=ghash[hstrs[j]->str.chars()];
+							if (v==NULL)
+								GError("Error at <%s>, key %s not found (count:%d, cl_i=%d, i=%d)!\n",label, hstrs[j]->str.chars(),
+										ghash.Count(), cl_i, i );
+							if (*v!=j)
+								GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+						}
+					}
+					cl_i=0;
+					ghash.Clear();
+					num_clr++;
+					break;
 			  }
 	}
-	ghash.Clear();
 	swatch.stop();
+	ghash.Clear();
 	GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
 }
 /*
 void run_Hopscotch(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
   int num_add=0, num_rm=0, num_clr=0;
-  tsl::hopscotch_map<std::string, int> hsmap;
+  //tsl::hopscotch_map<const char*, int, cstr_hash, cstr_eq> hsmap;
+  tsl::hopscotch_map<const char*, int, cstr_hash, cstr_eq,
+      std::allocator<std::pair<const char*, int>>,  30, true> hsmap;
   GMessage("----------------- %s ----------------\n", label);
   swatch.start();
+  int cl_i=0;
+  int prevcmd=2;
   for (int i=0;i<hstrs.Count();i++) {
+	  if (hstrs[i]->cmd==prevcmd) {
+		  if (prevcmd==2) continue;
+	  } else prevcmd=hstrs[i]->cmd;
 	  switch (hstrs[i]->cmd) {
-		case 0:hsmap.insert({hstrs[i]->str.chars(), 1}); num_add++; break;
-		case 1:hsmap.erase(hstrs[i]->str.chars()); num_rm++; break;
-		case 2:hsmap.clear(); num_clr++; break;
+		case 0:
+			if (cl_i==0) cl_i=i;
+			hsmap.insert({hstrs[i]->str.chars(), i});
+			num_add++;
+			break;
+		case 1:
+			if (qryMode) break;
+			hsmap.erase(hstrs[i]->str.chars());
+			num_rm++; break;
+		case 2:
+			if (qryMode) {
+				//run some query tests here
+				//with strings from hstrs[cl_i .. i-1] range
+				for(int j=cl_i;j<i;j+=3) {
+					if (hstrs[j]->cmd) continue;
+					int v=hsmap[hstrs[j]->str.chars()];
+					if (v!=j)
+						GError("Error at <%s>, invalid value for key %s! (got %d, expected %d)\n",label,
+								hstrs[j]->str.chars(), v, j );
+				}
+			}
+			cl_i=0;
+			hsmap.clear();
+			num_clr++;
+			break;
 	  }
   }
+  swatch.stop();
   hsmap.clear();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_Robin(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  //tsl::hopscotch_map<const char*, int, cstr_hash, cstr_eq> hsmap;
+  tsl::robin_map<const char*, int, cstr_hash, cstr_eq,
+      std::allocator<std::pair<const char*, int>>,  true> rmap;
+  GMessage("----------------- %s ----------------\n", label);
+  swatch.start();
+  int cl_i=0;
+  int prevcmd=2;
+  for (int i=0;i<hstrs.Count();i++) {
+	  if (hstrs[i]->cmd==prevcmd) {
+		  if (prevcmd==2) continue;
+	  } else prevcmd=hstrs[i]->cmd;
+	  switch (hstrs[i]->cmd) {
+		case 0:
+			if (cl_i==0) cl_i=i;
+			rmap.insert({hstrs[i]->str.chars(), i});
+			num_add++;
+			break;
+		case 1: if (qryMode) break;
+			rmap.erase(hstrs[i]->str.chars()); num_rm++; break;
+		case 2:
+			if (qryMode) {
+				//run some query tests here
+				//with strings from hstrs[cl_i .. i-1] range
+				for(int j=cl_i;j<i;j+=3) {
+					if (hstrs[j]->cmd) continue;
+					int v=rmap[hstrs[j]->str.chars()];
+					if (v!=j)
+						GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+				}
+			}
+			cl_i=0;
+			rmap.clear(); num_clr++; break;
+	  }
+  }
   swatch.stop();
+  rmap.clear();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_Bytell(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  ska::bytell_hash_map<const char*, int, cstr_hash, cstr_eq> bmap;
+  GMessage("----------------- %s ----------------\n", label);
+  swatch.start();
+  for (int i=0;i<hstrs.Count();i++) {
+	  switch (hstrs[i]->cmd) {
+		case 0:bmap.insert({hstrs[i]->str.chars(), 1}); num_add++; break;
+		case 1:bmap.erase(hstrs[i]->str.chars()); num_rm++; break;
+		case 2:bmap.clear(); num_clr++; break;
+	  }
+  }
+  swatch.stop();
+  bmap.clear();
   GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
 }
 */
+void run_Khashl(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  klib::KHashMapCached<const char*, int, cstr_hash, cstr_eq > khmap;
+  GMessage("----------------- %s ----------------\n", label);
+  swatch.start();
+  int cl_i=0;
+  int prevcmd=2;
+  for (int i=0;i<hstrs.Count();i++) {
+	  if (hstrs[i]->cmd==prevcmd) {
+		  if (prevcmd==2) continue;
+	  } else prevcmd=hstrs[i]->cmd;
+	  switch (hstrs[i]->cmd) {
+		case 0:if (cl_i==0) cl_i=i;
+			khmap[hstrs[i]->str.chars()]=i; num_add++; break;
+		case 1:if (qryMode) break;
+			khmap.del(khmap.get(hstrs[i]->str.chars())); num_rm++; break;
+		case 2:
+			if (qryMode) {
+				//run some query tests here
+				for(int j=cl_i;j<i;j+=3) {
+					if (hstrs[j]->cmd) continue;
+					int v=khmap[hstrs[j]->str.chars()];
+					if (v!=j)
+						GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+				}
+			}
+			cl_i=0;
+			khmap.clear(); num_clr++; break;
+	  }
+  }
+  swatch.stop();
+  khmap.clear();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_GHashMap(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  //GKHashSet<const char*> khset;
+  //GHashSet<> khset;
+  GHash<int, cstr_hash, GHashKey_Eq<const char*>, uint32_t> khset;
+  GMessage("----------------- %s ----------------\n", label);
+  int cl_i=0;
+  swatch.start();
+  int prevcmd=2;
+  for (int i=0;i<hstrs.Count();i++) {
+	  if (hstrs[i]->cmd==prevcmd) {
+		  if (prevcmd==2) continue;
+	  } else prevcmd=hstrs[i]->cmd;
+	  switch (hstrs[i]->cmd) {
+		case 0: if (cl_i==0) cl_i=i;
+			khset.Add(hstrs[i]->str.chars(), i); num_add++; break;
+		case 1:if (qryMode) break;
+			if (khset.Remove(hstrs[i]->str.chars())<0)
+				if (checkRM) GMessage("Warning: key %s could not be removed!\n", hstrs[i]->str.chars());
+			num_rm++;
+			break;
+		case 2:
+			if (qryMode) {
+				//run some query tests here
+				//with strings from hstrs[cl_i .. i-1] range
+				for(int j=cl_i;j<i;j+=3) {
+					if (hstrs[j]->cmd) continue;
+					int* v=khset[hstrs[j]->str.chars()];
+					if (*v!=j)
+						GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+				}
+			}
+			cl_i=0;
+			khset.Clear(); num_clr++; break;
+	  }
+  }
+  swatch.stop();
+  khset.Clear();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_GxxHashMap(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  GHash<int> khset;
+  GMessage("----------------- %s ----------------\n", label);
+  int cl_i=0;
+  swatch.start();
+  int prevcmd=2;
+  for (int i=0;i<hstrs.Count();i++) {
+	  if (hstrs[i]->cmd==prevcmd) {
+		  if (prevcmd==2) continue;
+	  } else prevcmd=hstrs[i]->cmd;
+	  switch (hstrs[i]->cmd) {
+		case 0: if (cl_i==0) cl_i=i;
+			khset.Add(hstrs[i]->str.chars(), i); num_add++; break;
+		case 1:if (qryMode) break;
+			if (khset.Remove(hstrs[i]->str.chars())<0)
+				if (checkRM) GMessage("Warning: key %s could not be removed!\n", hstrs[i]->str.chars());
+			num_rm++;
+			break;
+		case 2:
+			if (qryMode) {
+				//run some query tests here
+				//with strings from hstrs[cl_i .. i-1] range
+				for(int j=cl_i;j<i;j+=3) {
+					if (hstrs[j]->cmd) continue;
+					int* v=khset[hstrs[j]->str.chars()];
+					if (*v!=j)
+						GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+				}
+			}
+			cl_i=0;
+			khset.Clear(); num_clr++; break;
+	  }
+  }
+  swatch.stop();
+  khset.Clear();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_GHashMapShk(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+  int num_add=0, num_rm=0, num_clr=0;
+  GHashMap<const char*, int> khset;
+  GMessage("----------------- %s ----------------\n", label);
+  int cl_i=0;
+  swatch.start();
+  int prevcmd=2;
+  for (int i=0;i<hstrs.Count();i++) {
+	  if (hstrs[i]->cmd==prevcmd) {
+		  if (prevcmd==2) continue;
+	  } else prevcmd=hstrs[i]->cmd;
+	  switch (hstrs[i]->cmd) {
+		case 0: if (cl_i==0) cl_i=i;
+			khset.Add(hstrs[i]->str.chars(),i); num_add++; break;
+		case 1:if (qryMode) break;
+			if (khset.Remove(hstrs[i]->str.chars())<0)
+				if (checkRM) GMessage("Warning: key %s could not be removed!\n", hstrs[i]->str.chars());
+			num_rm++;
+			break;
+		case 2:
+			if (qryMode) {
+				//run some query tests here
+				//with strings from hstrs[cl_i .. i-1] range
+				for(int j=cl_i;j<i;j+=3) {
+					if (hstrs[j]->cmd) continue;
+					int* v=khset[hstrs[j]->str.chars()];
+					if (*v!=j)
+						GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+				}
+			}
+			cl_i=0;
+			khset.Clear(); num_clr++; break;
+	  }
+  }
+  swatch.stop();
+  khset.Clear();
+  GMessage("  (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+struct SObj {
+  GStr atr;
+  int val;
+  SObj(const char* a=NULL, const int v=0):atr(a),val(v) { }
+  bool operator<(const SObj& o) const { return val<o.val; }
+  bool operator==(const SObj& o) const {
+	  return (atr==o.atr && val==o.val);
+  }
+};
+
 int main(int argc, char* argv[]) {
  GPVec<HStrData> strs;
  GPVec<HStrData> sufstrs;
- strs.setFreeItem(strFreeProc);
- sufstrs.setFreeItem(strFreeProc);
  //GArgs args(argc, argv, "hg:c:s:t:o:p:help;genomic-fasta=COV=PID=seq=out=disable-flag;test=");
- GArgs args(argc, argv, "h");
+ GArgs args(argc, argv, "hQCn:");
  //fprintf(stderr, "Command line was:\n");
  //args.printCmdLine(stderr);
  args.printError(USAGE, true);
  if (args.getOpt('h') || args.getOpt("help")) GMessage(USAGE);
+ GStr s=args.getOpt('n');
+ if (!s.is_empty()) {
+	 numClusters=s.asInt();
+	 if (numClusters<=0)
+		 GError("%s\nError: invalid value for -n !\n", USAGE);
+ }
+ qryMode=(args.getOpt('Q'));
+ checkRM=(args.getOpt('C'));
  int numargs=args.startNonOpt();
  const char* a=NULL;
  FILE* f=NULL;
  int total=0;
+//==== quick test area
+ /*
+ std::unordered_map<SObj*, int > umap;
+ GHash<int> gh;
+ GPVec<SObj> ptrs(false);
+ GQHash<int, SObj*> ihash(false);
+ GQHash<SObj*, int> phash;
+ GQStrHash<SObj*> shash;
+ const char* tstrs[6] = {"twelve", "five", "nine", "eleven", "three", "nope"};
+ int  vals[6]     = { 12,  5, 9, 11, 3, 777 };
+ char buf[20];
+ for (int i=0;i<5;i++) {
+   SObj* o=new SObj(tstrs[i], vals[i]*10);
+   ptrs.Add(o);
+   sprintf(buf, "%lx", o);
+   GMessage("SObj (%s, %d) pointer added: %s\n",tstrs[i], o->val, buf);
+   gh.Add(buf, new int(vals[i]));
+   shash.Add(tstrs[i], o);
+   ihash.Add(vals[i], o);
+   phash.Add(o, vals[i]);
+   umap[o]=vals[i];
+ }
+ ptrs.Sort();
+ GMessage("shash has now %d entries.\n", shash.Count());
+ //enumerate shash entries:
+ {
+   shash.startIterate();
+   SObj* iv=NULL;
+   while (const char* k=shash.Next(iv)) {
+	   GMessage("Enumerating shash entry: (%s => %lx)\n",
+			    k, iv);
+   }
+ }
+ //qry:
+ for (int i=0;i<ptrs.Count();i++) {
+   SObj* o=ptrs[i];
+   //test tset
+   SObj* v=shash.Find(o->atr.chars());
+   if (v==NULL)
+	   GMessage("key <%s> not found in shash!\n", o->atr.chars());
+   int* iv=phash.Find(o);
+   if (iv==NULL)
+	   GMessage("key <%lx> not found in phash!\n", o);
+   //if (!oset[*o])
+//   GMessage("struct {%s, %d} not found in oset!\n", o->atr.chars(), o->val);
 
+   //sprintf(buf, "%lx", o);
+   //int* hv=gh[buf];
+   //GMessage("Item {%s, %d} : GHash retrieved flag = %d, umap retrieved flag = %d\n",
+   //  o->atr.chars(), o->val, *hv, umap[o]);
+ }
+//SObj* n=new SObj("test", 10);
+//if (!pset[n])
+//	   GMessage("key <%lx> not found in pset!\n", n);
+
+ for (int i=0;i<6;i++) {
+	SObj* o=shash[tstrs[i]];
+	if (o==NULL) GMessage("key <%s> not found in shash!\n", tstrs[i]);
+	if (o && i<5) {
+		if (o->atr!=tstrs[i])
+			GMessage("shash value does not match key <%s!\n", tstrs[i]);
+	}
+ }
+
+ //delete n;
+ //int v=umap[n];
+ //GMessage("Non-existing test entry returned value %d\n", v);
+  */
+ /*
+ auto found=umap.find(n);
+ if (found!=umap.end()) {
+   GMessage("Found flags %d for entry {\"%s\", %d}\n", found->second,
+         n->atr.chars(), found->first->val );
+ } else  GMessage("New test obj not found !\n");
+
+ return(0);
+*/
+//==== quick test area end
  if (numargs==0) {
-	 a="htest_data.lst";
+	 //a="htest_data.lst";
+	 a="htest_over500.lst";
 	 f=fopen(a, "r");
 	 if (f==NULL) GError("Error: could not open file %s !\n", a);
-	 int num=loadStrings(f, sufstrs, strs, 600000);
+	 GMessage("loading %d clusters from file..\n", numClusters);
+	 int num=loadStrings(f, sufstrs, strs, numClusters);
 	 total+=num;
-	 GMessage("..loaded %d strings from file %s\n", total, a);
+	 fclose(f);
  }
  else {
 	   while ((a=args.nextNonOpt())) {
 		   f=fopen(a, "r");
 		   if (f==NULL) GError("Error: could not open file %s !\n", a);
-		   int num=loadStrings(f, sufstrs, strs, 600000);
+		   int num=loadStrings(f, sufstrs, strs, numClusters);
 		   total+=num;
+		   fclose(f);
 	   }
   }
    GResUsage swatch;
 
 
-   run_GHash(swatch, strs, "GHash no suffix");
+   run_GHash(swatch, sufstrs, "GHash w/ suffix");
    showTimings(swatch);
+   //run_GHash(swatch, strs, "GHash no suffix");
+   //showTimings(swatch);
 
-   run_GHash(swatch, sufstrs, "GHash w/ suffix");
+/*
+   run_Hopscotch(swatch, sufstrs, "hopscotch w/ suffix");
    showTimings(swatch);
-   /*
    run_Hopscotch(swatch, strs, "hopscotch no suffix");
    showTimings(swatch);
+*/
+/*
+   run_Robin(swatch, sufstrs, "robin w/ suffix");
+   showTimings(swatch);
+   run_Robin(swatch, strs, "robin no suffix");
+   showTimings(swatch);
+*/
+/*
+   run_Khashl(swatch, sufstrs, "khashl w/ suffix");
+   showTimings(swatch);
+   run_Khashl(swatch, strs, "khashl no suffix");
+   showTimings(swatch);
+*/
+   run_GHashMap(swatch, sufstrs, "GHashMap xxHash32 w/ suffix");
+   showTimings(swatch);
 
-   run_Hopscotch(swatch, sufstrs, "hopscotch w/ suffix");
+   run_GxxHashMap(swatch, sufstrs, "GHashMap xxHash64 w/ suffix");
+   showTimings(swatch);
+
+   //run_GHashMap(swatch, strs, "GHashMap no suffix");
+   //showTimings(swatch);
+
+   //run_GHashMapShk(swatch, sufstrs, "GHashSetShk w/ suffix");
+   //showTimings(swatch);
+   //run_GHashMapShk(swatch, strs, "GHashSetShk no suffix");
+   //showTimings(swatch);
+
+/*
+   run_Bytell(swatch, sufstrs, "bytell w/ suffix");
    showTimings(swatch);
-   */
+   run_Bytell(swatch, strs, "bytell no suffix");
+   showTimings(swatch);
+*/
+
 }


=====================================
khashl.hh
=====================================
@@ -0,0 +1,261 @@
+#ifndef __AC_KHASHL_HPP
+#define __AC_KHASHL_HPP
+
+#include <functional> // for std::equal_to
+#include <cstdlib>    // for malloc() etc
+#include <cstring>    // for memset()
+#include <stdint.h>   // for uint32_t
+
+/* // ==> Code example <==
+#include <cstdio>
+#include "khashl.hpp"
+
+int main(void)
+{
+	klib::KHashMap<uint32_t, int, std::hash<uint32_t> > h; // NB: C++98 doesn't have std::hash
+	uint32_t k;
+	int absent;
+	h[43] = 1, h[53] = 2, h[63] = 3, h[73] = 4;       // one way to insert
+	k = h.put(53, &absent), h.value(k) = -2;          // another way to insert
+	if (!absent) printf("already in the table\n");    //   which allows to test presence
+	if (h.get(33) == h.end()) printf("not found!\n"); // test presence without insertion
+	h.del(h.get(43));               // deletion
+	for (k = 0; k != h.end(); ++k)  // traversal
+		if (h.occupied(k))          // some buckets are not occupied; skip them
+			printf("%u => %d\n", h.key(k), h.value(k));
+	return 0;
+}
+*/
+
+namespace klib {
+
+/***********
+ * HashSet *
+ ***********/
+
+template<class T, class Hash, class Eq = std::equal_to<T>, typename khint_t = uint32_t>
+class KHashSet {
+protected:
+	khint_t bits, count;
+	uint32_t *used;
+	T *keys;
+	static inline uint32_t __kh_used(const uint32_t *flag, khint_t i) { return flag[i>>5] >> (i&0x1fU) & 1U; };
+	static inline void __kh_set_used(uint32_t *flag, khint_t i) { flag[i>>5] |= 1U<<(i&0x1fU); };
+	static inline void __kh_set_unused(uint32_t *flag, khint_t i) { flag[i>>5] &= ~(1U<<(i&0x1fU)); };
+	static inline khint_t __kh_fsize(khint_t m) { return m<32? 1 : m>>5; }
+	static inline khint_t __kh_h2b(uint32_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+	static inline khint_t __kh_h2b(uint64_t hash, khint_t bits) { return hash * 11400714819323198485ULL >> (64 - bits); }
+public:
+	KHashSet() : bits(0), count(0), used(0), keys(0) {};
+	~KHashSet() { std::free(used); std::free(keys); };
+	inline khint_t n_buckets() const { return used? khint_t(1) << bits : 0; }
+	inline khint_t end() const { return n_buckets(); }
+	inline khint_t size() const { return count; }
+	inline T &key(khint_t x) { return keys[x]; };
+	inline bool occupied(khint_t x) const { return (__kh_used(used, x) != 0); }
+	void clear(void) {
+		if (!used) return;
+		memset(used, 0, __kh_fsize(n_buckets()) * sizeof(uint32_t));
+		count = 0;
+	}
+	khint_t get(const T &key) const {
+		khint_t i, last, mask, nb;
+		if (keys == 0) return 0;
+		nb = n_buckets();
+		mask = nb - khint_t(1);
+		i = last = __kh_h2b(Hash()(key), bits);
+		while (__kh_used(used, i) && !Eq()(keys[i], key)) {
+			i = (i + khint_t(1)) & mask;
+			if (i == last) return nb;
+		}
+		return !__kh_used(used, i)? nb : i;
+	}
+	int resize(khint_t new_nb) {
+		uint32_t *new_used = 0;
+		khint_t j = 0, x = new_nb, nb, new_bits, new_mask;
+		while ((x >>= khint_t(1)) != 0) ++j;
+		if (new_nb & (new_nb - 1)) ++j;
+		new_bits = j > 2? j : 2;
+		new_nb = khint_t(1) << new_bits;
+		if (count > (new_nb>>1) + (new_nb>>2)) return 0; // requested size is too small
+		new_used = (uint32_t*)std::malloc(__kh_fsize(new_nb) * sizeof(uint32_t));
+		memset(new_used, 0, __kh_fsize(new_nb) * sizeof(uint32_t));
+		if (!new_used) return -1; /* not enough memory */
+		nb = n_buckets();
+		if (nb < new_nb) { /* expand */
+			T *new_keys = (T*)std::realloc(keys, new_nb * sizeof(T));
+			if (!new_keys) { std::free(new_used); return -1; }
+			keys = new_keys;
+		} /* otherwise shrink */
+		new_mask = new_nb - 1;
+		for (j = 0; j != nb; ++j) {
+			if (!__kh_used(used, j)) continue;
+			T key = keys[j];
+			__kh_set_unused(used, j);
+			while (1) { /* kick-out process; sort of like in Cuckoo hashing */
+				khint_t i;
+				i = __kh_h2b(Hash()(key), new_bits);
+				while (__kh_used(new_used, i)) i = (i + khint_t(1)) & new_mask;
+				__kh_set_used(new_used, i);
+				if (i < nb && __kh_used(used, i)) { /* kick out the existing element */
+					{ T tmp = keys[i]; keys[i] = key; key = tmp; }
+					__kh_set_unused(used, i); /* mark it as deleted in the old hash table */
+				} else { /* write the element and jump out of the loop */
+					keys[i] = key;
+					break;
+				}
+			}
+		}
+		if (nb > new_nb) /* shrink the hash table */
+			keys = (T*)std::realloc(keys, new_nb * sizeof(T));
+		std::free(used); /* free the working space */
+		used = new_used, bits = new_bits;
+		return 0;
+	}
+	khint_t put(const T &key, int *absent_ = 0) {
+		khint_t nb, i, last, mask;
+		int absent = -1;
+		nb = n_buckets();
+		if (count >= (nb>>1) + (nb>>2)) { // rehashing
+			if (resize(nb + khint_t(1)) < 0) {
+				if (absent_) *absent_ = -1;
+				return nb;
+			}
+			nb = n_buckets();
+		} // TODO: to implement automatically shrinking; resize() already support shrinking
+		mask = nb - 1;
+		i = last = __kh_h2b(Hash()(key), bits);
+		while (__kh_used(used, i) && !Eq()(keys[i], key)) {
+			i = (i + 1U) & mask;
+			if (i == last) break;
+		}
+		if (!__kh_used(used, i)) { // not present at all
+			keys[i] = key;
+			__kh_set_used(used, i);
+			++count, absent = 1;
+		} else absent = 0; /* Don't touch keys[i] if present */
+		if (absent_) *absent_ = absent;
+		return i;
+	}
+	int del(khint_t i) {
+		khint_t j = i, k, mask, nb = n_buckets();
+		if (keys == 0 || i >= nb) return 0;
+		mask = nb - khint_t(1);
+		while (1) {
+			j = (j + khint_t(1)) & mask;
+			if (j == i || !__kh_used(used, j)) break; // j==i only when the table is completely full
+			k = __kh_h2b(Hash()(keys[j]), bits);
+			if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j)))
+				keys[i] = keys[j], i = j;
+		}
+		__kh_set_unused(used, i);
+		--count;
+		return 1;
+	}
+};
+
+/***********
+ * HashMap *
+ ***********/
+
+template<class KType, class VType>
+struct KHashMapBucket { KType key; VType val; };
+
+template<class T, class Hash, typename khint_t>
+struct KHashMapHash { khint_t operator() (const T &a) const { return Hash()(a.key); } };
+
+template<class T, class Eq>
+struct KHashMapEq { bool operator() (const T &a, const T &b) const { return Eq()(a.key, b.key); } };
+
+template<class KType, class VType, class Hash, class Eq=std::equal_to<KType>, typename khint_t=uint32_t>
+class KHashMap : public KHashSet<KHashMapBucket<KType, VType>,
+		KHashMapHash<KHashMapBucket<KType, VType>, Hash, khint_t>,
+		KHashMapEq<KHashMapBucket<KType, VType>, Eq>, khint_t>
+{
+protected:
+	typedef KHashMapBucket<KType, VType> bucket_t;
+	typedef KHashSet<bucket_t, KHashMapHash<bucket_t, Hash, khint_t>, KHashMapEq<bucket_t, Eq>, khint_t> hashset_t;
+public:
+	khint_t get(const KType &key) const {
+		bucket_t t = { key, VType() };
+		return hashset_t::get(t);
+	}
+	khint_t put(const KType &key, int *absent) {
+		bucket_t t = { key, VType() };
+		return hashset_t::put(t, absent);
+	}
+	inline KType &key(khint_t i) { return hashset_t::key(i).key; }
+	inline VType &value(khint_t i) { return hashset_t::key(i).val; }
+	inline VType &operator[] (const KType &key) {
+		bucket_t t = { key, VType() };
+		return value(hashset_t::put(t));
+	}
+};
+
+/****************************
+ * HashSet with cached hash *
+ ****************************/
+
+template<class KType, typename khint_t>
+struct KHashSetCachedBucket { KType key; khint_t hash; };
+
+template<class T, typename khint_t>
+struct KHashCachedHash { khint_t operator() (const T &a) const { return a.hash; } };
+
+template<class T, class Eq>
+struct KHashCachedEq { bool operator() (const T &a, const T &b) const { return a.hash == b.hash && Eq()(a.key, b.key); } };
+
+template<class KType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
+class KHashSetCached : public KHashSet<KHashSetCachedBucket<KType, khint_t>,
+		KHashCachedHash<KHashSetCachedBucket<KType, khint_t>, khint_t>,
+		KHashCachedEq<KHashSetCachedBucket<KType, khint_t>, Eq>, khint_t>
+{
+	typedef KHashSetCachedBucket<KType, khint_t> bucket_t;
+	typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
+public:
+	khint_t get(const KType &key) const {
+		bucket_t t = { key, Hash()(key) };
+		return hashset_t::get(t);
+	}
+	khint_t put(const KType &key, int *absent) {
+		bucket_t t = { key, Hash()(key) };
+		return hashset_t::put(t, absent);
+	}
+	inline KType &key(khint_t i) { return hashset_t::key(i).key; }
+};
+
+/****************************
+ * HashMap with cached hash *
+ ****************************/
+
+template<class KType, class VType, typename khint_t>
+struct KHashMapCachedBucket { KType key; VType val; khint_t hash; };
+
+template<class KType, class VType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
+class KHashMapCached : public KHashSet<KHashMapCachedBucket<KType, VType, khint_t>,
+		KHashCachedHash<KHashMapCachedBucket<KType, VType, khint_t>, khint_t>,
+		KHashCachedEq<KHashMapCachedBucket<KType, VType, khint_t>, Eq>, khint_t>
+{
+protected:
+	typedef KHashMapCachedBucket<KType, VType, khint_t> bucket_t;
+	typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
+public:
+	khint_t get(const KType &key) const {
+		bucket_t t = { key, VType(), Hash()(key) };
+		return hashset_t::get(t);
+	}
+	khint_t put(const KType &key, int *absent) {
+		bucket_t t = { key, VType(), Hash()(key) };
+		return hashset_t::put(t, absent);
+	}
+	inline KType &key(khint_t i) { return hashset_t::key(i).key; }
+	inline VType &value(khint_t i) { return hashset_t::key(i).val; }
+	inline VType &operator[] (const KType &key) {
+		bucket_t t = { key, VType(), Hash()(key) };
+		return value(hashset_t::put(t));
+	}
+};
+
+}
+
+#endif /* __AC_KHASHL_HPP */


=====================================
tag_git.sh
=====================================
@@ -3,7 +3,7 @@ git checkout master
 ver=$(fgrep '#define GCLIB_VERSION ' GBase.h)
 ver=${ver#*\"}
 ver=${ver%%\"*}
-git fetch --tags
+#git fetch --tags
 if [[ "$1" == "delete" || "$1" == "del" ]]; then
   echo "Deleting tag v$ver .."
   git tag -d v$ver


=====================================
xxhash.h
=====================================
The diff for this file was not included because it is too large.


View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/fe459c031c0e0ad141be34d228d2e0af9594903e

-- 
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/fe459c031c0e0ad141be34d228d2e0af9594903e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201026/102bec19/attachment-0001.html>


More information about the debian-med-commit mailing list