[med-svn] [Git][med-team/libgclib][upstream] New upstream version 0.12.1+ds
Andreas Tille
gitlab at salsa.debian.org
Mon Oct 26 09:20:43 GMT 2020
Andreas Tille pushed to branch upstream at Debian Med / libgclib
Commits:
fe459c03 by Andreas Tille at 2020-09-18T11:04:57+02:00
New upstream version 0.12.1+ds
- - - - -
16 changed files:
- GBam.cpp
- GBam.h
- GBase.cpp
- GBase.h
- GFastaIndex.cpp
- GFastaIndex.h
- + GHashMap.hh
- GIntHash.hh
- GThreads.cpp
- GThreads.h
- gff.cpp
- gff.h
- htest.cpp
- + khashl.hh
- tag_git.sh
- + xxhash.h
Changes:
=====================================
GBam.cpp
=====================================
@@ -29,7 +29,7 @@ uint8_t* dupalloc_bdata(bam1_t *b, int size) {
GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
int pos, bool reverse, const char* qseq,
const char* cigar, const char* quals):iflags(0), exons(1),
- clipL(0), clipR(0), mapped_len(0) {
+ clipL(0), clipR(0), mapped_len(0), uval(0) {
novel=true;
bam_header=NULL;
b=bam_init1();
@@ -57,7 +57,7 @@ GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
GBamRecord::GBamRecord(const char* qname, int32_t samflags, int32_t g_tid,
int pos, int map_qual, const char* cigar, int32_t mg_tid, int mate_pos,
int insert_size, const char* qseq, const char* quals,
- GVec<char*>* aux_strings):iflags(0), exons(1) {
+ GVec<char*>* aux_strings):iflags(0), exons(1), uval(0) {
novel=true;
bam_header=NULL;
b=bam_init1();
=====================================
GBam.h
=====================================
@@ -36,13 +36,14 @@ class GBamRecord: public GSeg {
int clipL; //soft clipping data, as seen in the CIGAR string
int clipR;
int mapped_len; //sum of exon lengths
+ int uval; //user value (e.g. file index)
bool isHardClipped() { return hard_Clipped; }
bool isSoftClipped() { return soft_Clipped; }
bool hasIntrons() { return has_Introns; }
//created from a reader:
void bfree_on_delete(bool b_free=true) { novel=b_free; }
GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL, bool b_free=true):iflags(0), exons(1),
- clipL(0), clipR(0), mapped_len(0) {
+ clipL(0), clipR(0), mapped_len(0), uval(0) {
bam_header=NULL;
if (from_b==NULL) {
b=bam_init1();
@@ -58,7 +59,7 @@ class GBamRecord: public GSeg {
}
GBamRecord(GBamRecord& r):GSeg(r.start, r.end), iflags(0), exons(r.exons),
- clipL(r.clipL), clipR(r.clipR), mapped_len(r.mapped_len) { //copy constructor
+ clipL(r.clipL), clipR(r.clipR), mapped_len(r.mapped_len), uval(0) { //copy constructor
//makes a new copy of the bam1_t record etc.
clear();
b=bam_dup1(r.b);
@@ -77,6 +78,7 @@ class GBamRecord: public GSeg {
exons = r.exons;
clipL = r.clipL;
clipR = r.clipR;
+ uval = r.uval;
mapped_len=r.mapped_len;
return *this;
}
@@ -317,6 +319,7 @@ class GBamReader {
class GBamWriter {
samfile_t* bam_file;
bam_header_t* bam_header;
+ bool sharedHeader;
public:
void create(const char* fname, bool uncompressed=false) {
if (bam_header==NULL)
@@ -330,16 +333,21 @@ class GBamWriter {
if (bam_file==NULL)
GError("Error: could not create BAM file %s!\n",fname);
}
+
void create(const char* fname, bam_header_t* bh, bool uncompressed=false) {
bam_header=bh;
create(fname,uncompressed);
}
- GBamWriter(const char* fname, bam_header_t* bh, bool uncompressed=false) {
+ GBamWriter(const char* fname, bam_header_t* bh, bool uncompressed=false):sharedHeader(false) {
create(fname, bh, uncompressed);
- }
+ }
- GBamWriter(const char* fname, const char* samfname, bool uncompressed=false) {
+ GBamWriter(bam_header_t* bh, const char* fname, bool uncompressed=false):sharedHeader(true) {
+ create(fname, bh, uncompressed);
+ }
+
+ GBamWriter(const char* fname, const char* samfname, bool uncompressed=false):sharedHeader(false) {
tamFile samf_in=sam_open(samfname);
if (samf_in==NULL)
GError("Error: could not open SAM file %s\n", samfname);
@@ -352,8 +360,8 @@ class GBamWriter {
~GBamWriter() {
samclose(bam_file);
- bam_header_destroy(bam_header);
- }
+ if (!sharedHeader) bam_header_destroy(bam_header);
+ }
bam_header_t* get_header() { return bam_header; }
int32_t get_tid(const char *seq_name) {
if (bam_header==NULL)
=====================================
GBase.cpp
=====================================
@@ -157,7 +157,7 @@ int G_mkdir(const char* path, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH
//int perms=(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) ) {
#ifdef _WIN32
//return _mkdir(path);
- return CreateDirectoryA(path, NULL);
+ return !CreateDirectoryA(path, NULL);
#else
return mkdir(path, perms);
#endif
@@ -167,8 +167,16 @@ int G_mkdir(const char* path, int perms = (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH
void Gmktempdir(char* templ) {
#ifdef _WIN32
int blen=strlen(templ);
- if (_mktemp_s(templ, blen)!=0)
- GError("Error creating temp dir %s!\n", templ);
+ char* pt=templ+blen-1;
+ //on Windows this needs a plain file name template, without directory prefix
+ blen=1;
+ while (pt!=templ) {
+ if (*pt=='/' || *pt=='\\') { pt++; break;}
+ --pt; blen++;
+ }
+ if (_mktemp_s(pt, blen)!=0)
+ GError("Error creating template file name %s!\n", pt);
+ Gmkdir(templ, true);
#else
char* cdir=mkdtemp(templ);
if (cdir==NULL)
@@ -239,12 +247,40 @@ FILE* Gfopen(const char *path, char *mode) {
bool GstrEq(const char* a, const char* b) {
if (a==NULL || b==NULL) return false;
- return (strcmp(a,b)==0);
+ return (strcmp(a, b)==0);
+}
+
+
+#ifdef __CYGWIN__
+int strcasecmp (const char *s1, const char *s2) {
+ int d = 0;
+ for ( ; ; ) {
+ const int c1 = tolower(*s1++);
+ const int c2 = tolower(*s2++);
+ if (((d = c1 - c2) != 0) || (c2 == '\0'))
+ break;
+ }
+ return d;
}
+int strncasecmp (const char *s1, const char *s2,
+ size_t n) {
+ int d = 0;
+ for ( ; n != 0; n--) {
+ const int c1 = tolower(*s1++);
+ const int c2 = tolower(*s2++);
+ if (((d = c1 - c2) != 0) || (c2 == '\0'))
+ break;
+ }
+ return d;
+}
+
+#endif
+
+
bool GstriEq(const char* a, const char* b) {
if (a==NULL || b==NULL) return false;
- return (strcasecmp(a,b)==0);
+ return (strcasecmp(a, b)==0);
}
int Gstricmp(const char* a, const char* b, int n) {
@@ -920,6 +956,8 @@ void writeFasta(FILE *fw, const char* seqid, const char* descr,
fflush(fw);
}
+
+
char* commaprintnum(uint64 n) {
char retbuf[48];
int comma = ',';
=====================================
GBase.h
=====================================
@@ -1,6 +1,6 @@
#ifndef G_BASE_DEFINED
#define G_BASE_DEFINED
-#define GCLIB_VERSION "0.11.10"
+#define GCLIB_VERSION "0.12.1"
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -17,15 +17,8 @@
//#define __ISO_C_VISIBLE 1999
#endif
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdint.h>
-#include <stdarg.h>
+#define XSTR(x) STR(x)
+#define STR(x) #x
#ifdef _WIN32
#include <windows.h>
@@ -36,6 +29,7 @@
#ifndef popen
#define popen _popen
#endif
+ /*
#ifndef fseeko
#ifdef _fseeki64
#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
@@ -43,38 +37,53 @@
#define fseeko fseek
#endif
#endif
- #ifndef ftello
- #ifdef _ftelli64
- #define ftello(stream) _ftelli64(stream)
- #else
- #define ftello ftell
- #endif
+ #ifndef ftello
+ #ifdef _ftelli64
+ #define ftello(stream) _ftelli64(stream)
+ #else
+ #define ftello ftell
+ #endif
#endif
+ */
#else
#define CHPATHSEP '/'
+ #ifdef __CYGWIN__
+ #define _BSD_SOURCE
+ #endif
#include <unistd.h>
#endif
-#ifndef fseeko
- #define fseeko fseek
-#endif
-#ifndef ftello
- #define ftello ftell
-#endif
-
#ifdef DEBUG
#undef NDEBUG
#define _DEBUG 1
#define _DEBUG_ 1
#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <math.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <type_traits>
+
+typedef int64_t int64;
+typedef uint64_t uint64;
typedef int32_t int32;
typedef uint32_t uint32;
typedef int16_t int16;
typedef uint16_t uint16;
typedef unsigned char uchar;
-typedef unsigned char byte;
+typedef uint8_t byte;
+typedef unsigned int uint;
+
+typedef void* pointer;
+
#ifndef MAXUINT
#define MAXUINT ((unsigned int)-1)
@@ -92,9 +101,6 @@ typedef unsigned char byte;
#define MAX_INT INT_MAX
#endif
-typedef int64_t int64;
-typedef uint64_t uint64;
-
/****************************************************************************/
#ifndef EXIT_FAILURE
@@ -155,9 +161,6 @@ GEXIT(#condition);}
// Clamp value x to range [lo..hi]
#define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
-typedef void* pointer;
-typedef unsigned int uint;
-
typedef int GCompareProc(const pointer item1, const pointer item2);
typedef long GFStoreProc(const pointer item1, FILE* fstorage); //for serialization
typedef pointer GFLoadProc(FILE* fstorage); //for deserialization
@@ -167,6 +170,7 @@ typedef void GFreeProc(pointer item); //usually just delete,
#define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
GError(ERR_ALLOC)
+
#define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
GError(ERR_ALLOC)
#define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
@@ -209,12 +213,28 @@ template<class T> void Gswap(T& lhs, T& rhs) {
rhs=tmp;
}
+// use std::is_pointer from <type_traits> in C++11 instead
+/*
+template<typename T>
+ struct isPointer { static const bool value = false; };
+
+template<typename T>
+ struct isPointer<T*> { static const bool value = true; };
+*/
+//check if type T is resolved as a pointer to char
+template<class T>
+ struct is_char_ptr : std::integral_constant <
+ bool,
+ std::is_same<char const *, typename std::decay<T>::type>::value ||
+ std::is_same<char *, typename std::decay<T>::type>::value
+ > {};
/**************** Memory management ***************************/
bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
+
void GFree(pointer* ptr); // Free memory, resets ptr to NULL
//int saprintf(char **retp, const char *fmt, ...);
@@ -224,6 +244,15 @@ void GMessage(const char* format,...);// Log message to stderr
// Assert failed routine:- usually not called directly but through GASSERT
void GAssert(const char* expression, const char* filename, unsigned int lineno);
+
+template<class T> T* GDupAlloc(T& data) {
+ T* tmp=NULL;
+ if (!GMalloc((pointer*) tmp, sizeof(T)))
+ GError(ERR_ALLOC);
+ memcpy((void*)tmp, (void*)&data, sizeof(T));
+ return tmp;
+}
+
// ****************** basic string manipulation *************************
char *Gstrdup(const char* str, int xtracap=0); //string duplication with extra capacity added
//duplicate a string by allocating a copy for it (+xtracap heap room) and returning the new pointer
=====================================
GFastaIndex.cpp
=====================================
@@ -20,7 +20,8 @@ void GFastaIndex::addRecord(const char* seqname, uint seqlen, off_t foffs, int l
else {
farec=new GFastaRec(seqlen,foffs,llen,llen_full);
records.Add(seqname,farec);
- farec->seqname=records.getLastKey();
+ //farec->seqname=records.getLastKey();
+ farec->seqname=seqname;
}
}
@@ -52,7 +53,7 @@ int GFastaIndex::loadIndex(const char* finame) { //load record info from existin
sscanf(p, "%d%ld%d%d", &len, &offset, &line_len, &line_blen);
#else
long long offset=-1;
- sscanf(p, "%d%lld%d%d", &len, &offset, &line_len, &line_blen);
+ sscanf(p, "%u%lld%d%d", &len, &offset, &line_len, &line_blen);
#endif
if (len==0 || line_len==0 || line_blen==0 || line_blen<line_len)
GError(ERR_FAIDXLINE,p);
@@ -161,7 +162,7 @@ int GFastaIndex::storeIndex(FILE* fai) {
GFastaRec* rec=NULL;
while ((rec=records.NextData())!=NULL) {
reclist.Add(rec);
- }
+ }
//reclist has records sorted by file offset
for (int i=0;i<reclist.Count();i++) {
#ifdef _WIN32
=====================================
GFastaIndex.h
=====================================
@@ -8,12 +8,12 @@
#ifndef GFAIDX_H_
#define GFAIDX_H_
-#include "GHash.hh"
+#include "GHashMap.hh"
#include "GList.hh"
class GFastaRec {
public:
- char* seqname;
+ const char* seqname;
uint seqlen;
off_t fpos;
int line_len; //effective line length (without EoL)
@@ -42,7 +42,7 @@ class GFastaIndex {
char* fai_name;
bool haveFai;
public:
- GHash<GFastaRec> records;
+ GHash<GFastaRec*> records;
void addRecord(const char* seqname, uint seqlen,
off_t foffs, int llen, int llen_full);
=====================================
GHashMap.hh
=====================================
@@ -0,0 +1,453 @@
+/********************************************************************************
+ * Hash map class templates
+ *********************************************************************************/
+
+#ifndef GHashMap_HH
+#define GHashMap_HH
+#include "GBase.h"
+#include "khashl.hh"
+#include <type_traits>
+#include <typeinfo>
+
+#define XXH_INLINE_ALL 1
+#include "xxhash.h"
+
+template <typename K> struct GHashKey_xxHash32 { //K generic (class, primitive, pointer except const char* )
+ //template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
+ uint32_t operator()(const K& s) const { //only works for trivial types!
+ static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
+ return XXH32((const void *) &s, sizeof(K), 0);
+ }
+};
+
+template <> struct GHashKey_xxHash32<const char*> {
+ inline uint32_t operator()(const char* s) const {
+ return XXH32(s, strlen(s), 0);
+ }
+};
+
+template <typename K> struct GHashKey_xxHash { //K generic (class, primitive, pointer except const char* )
+ //template <typename T=K> inline typename std::enable_if< std::is_trivial<T>::value, uint32_t>::type
+ uint64_t operator()(const K& s) const { //only works for trivial types!
+ static_assert(std::is_trivial<K>::value, "Error: cannot use this for non-trivial types!\n");
+ return XXH64((const void *) &s, sizeof(K), 0);
+ }
+};
+
+template <> struct GHashKey_xxHash<const char*> {
+ inline uint32_t operator()(const char* s) const {
+ return XXH64(s, strlen(s), 0);
+ }
+};
+
+template <typename K> struct GHashKey_Eq { //K is a type having the == operator defined
+ inline bool operator()(const K& x, const K& y) const {
+ return (x == y); //requires == operator to be defined for K
+ }
+};
+
+template <> struct GHashKey_Eq<const char*> {
+ inline bool operator()(const char* x, const char* y) const {
+ return (strcmp(x, y) == 0);
+ }
+};
+
+//GHashSet is never making a deep copy of the char* key, it only stores the pointer
+template <typename K=const char*, class Hash=GHashKey_xxHash<K>, class Eq=GHashKey_Eq<K>, typename khInt_t=uint64_t >
+ class GHashSet: public std::conditional< is_char_ptr<K>::value,
+ klib::KHashSetCached< K, Hash, Eq, khInt_t >,
+ klib::KHashSet< K, Hash, Eq, khInt_t > >::type {
+protected:
+ khInt_t i_iter=0;
+public:
+ inline khInt_t Add(const K ky) { // return -1 if the key already exists
+ int absent=-1;
+ khInt_t i=this->put(ky, &absent);
+ if (absent==1) //key was actually added
+ return i;
+ return -1;
+ }
+
+ inline khInt_t Remove(K ky) { //return index being removed, or -1 if no such key exists
+ khInt_t i=this->get(ky);
+ if (i!=this->end()) {
+ this->del(i);
+ return i;
+ }
+ return -1;
+ }
+
+ inline void Clear() {
+ this->clear(); //does not shrink !
+ }
+
+ inline void Reset() {
+ this->clear();
+ GFREE(this->used); GFREE(this->keys);
+ this->bits=0; this->count=0;
+ }
+
+ ~GHashSet() {
+ this->Reset();
+ }
+
+ inline bool operator[](K ky) { //RH only (read-only), cannot assign (use Add instead)
+ return (this->get(ky)!=this->end());
+ }
+
+ inline bool hasKey(K ky) {
+ return (this->get(ky)!=this->end());
+ }
+
+ int Find(K ky) {//return internal slot location if found,
+ // or -1 if not found
+ khInt_t r=this->get(ky);
+ if (r==this->end()) return -1;
+ return (int)r;
+ }
+
+ void startIterate() { //iterator-like initialization
+ i_iter=0;
+ }
+
+ K* Next() {
+ //returns a pointer to next valid key in the table (NULL if no more)
+ if (this->count==0) return NULL;
+ uint32_t nb=this->n_buckets();
+ while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+ if (i_iter==nb) return NULL;
+ K* k=&(this->key(i_iter-1));
+ ++i_iter;
+ return k;
+ }
+
+ inline uint32_t Count() { return this->count; }
+
+};
+
+//GStrSet always allocates a copy of each added string;
+// if you don't want that (keys are shared), just use GHashSet<const char*> instead
+template <class Hash=GHashKey_xxHash<const char*>, class Eq=GHashKey_Eq<const char*>, typename khInt_t=uint64_t>
+ class GStrSet: public GHashSet<const char*, Hash, Eq, khInt_t> {
+ public:
+ inline int Add(const char* ky) { // return -1 if the key already exists
+ int absent=-1;
+ khInt_t i=this->put(ky, &absent);
+ if (absent==1) {//key was actually added
+ const char* s=Gstrdup(ky);
+ this->key(i)=s; //store a copy of the key string
+ return i;
+ }
+ //key was already there
+ return -1;
+ }
+
+ int Remove(const char* ky) { //return index being removed, or -1 if no such key exists
+ khInt_t i=this->get(ky);
+ if (i!=this->end()) {
+ GFREE(this->key(i)); //free string copy
+ this->del(i);
+ return i;
+ }
+ return -1;
+ }
+
+ inline void Clear() {
+ khInt_t nb=this->n_buckets();
+ for (khInt_t i = 0; i != nb; ++i) {
+ if (!this->__kh_used(this->used, i)) continue;
+ //deallocate string copy
+ GFREE(this->key(i));
+ }
+ this->clear(); //does not shrink !
+ }
+
+ inline void Reset() {
+ this->Clear();
+ GFREE(this->used); GFREE(this->keys);
+ this->bits=0; this->count=0;
+ }
+
+ ~GStrSet() {
+ this->Reset();
+ }
+
+};
+
+//generic hash map where keys and values can be of any type
+template <class K, class V, class Hash=GHashKey_xxHash<K>, class Eq=GHashKey_Eq<K>, typename khInt_t=uint64_t>
+ class GHashMap:public std::conditional< is_char_ptr<K>::value,
+ klib::KHashMapCached< K, V, Hash, Eq, khInt_t>,
+ klib::KHashMap< K, V, Hash, Eq, khInt_t> >::type {
+protected:
+ khInt_t i_iter=0;
+ bool freeItems=false;
+public:
+ //---- these should be reimplemented for GHash
+ inline int Add(const K ky, const V val) { // if a key does not exist allocate a copy of the key
+ // return -1 if the key already exists
+ int absent=-1;
+ khInt_t i=this->put(ky, &absent);
+ if (absent==1) { //key was actually added
+ this->value(i)=val; //value is always copied
+ return i;
+ }
+ return -1;
+ }
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, int>::type
+ Remove(K ky) { //return index being removed
+ khInt_t i=this->get(ky);
+ if (i!=this->end()) {
+ if (freeItems) delete this->value(i);
+ this->del(i);
+ return i;
+ }
+ return -1;
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, int>::type
+ Remove(K ky) { //return index being removed
+ khInt_t i=this->get(ky);
+ if (i!=this->end()) {
+ this->del(i);
+ return i;
+ }
+ return -1;
+ }
+
+
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, void>::type
+ Clear() {
+ if (!freeItems) {
+ this->clear(); //does not shrink !
+ return;
+ }
+ khInt_t nb=this->n_buckets();
+ for (khInt_t i = 0; i != nb; ++i) {
+ if (!this->__kh_used(this->used, i)) continue;
+ if (freeItems) delete this->value(i);
+ }
+ this->clear();
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, void>::type
+ Clear() {
+ if (!freeItems) {
+ this->clear(); //does not shrink !
+ return;
+ }
+ khInt_t nb=this->n_buckets();
+ for (khInt_t i = 0; i != nb; ++i) {
+ if (!this->__kh_used(this->used, i)) continue;
+ }
+ this->clear();
+ }
+
+ inline void Reset() {
+ this->Clear();
+ GFREE(this->used); GFREE(this->keys);
+ this->bits=0; this->count=0;
+ }
+
+ ~GHashMap() {
+ this->Reset();
+ }
+
+ // -- these can be shared with GHash:
+
+ GHashMap(bool doFree=std::is_pointer<V>::value):freeItems(doFree) {
+ static_assert(std::is_trivial<K>::value,
+ "Error: cannot use this for non-trivial types!\n");
+ if (!std::is_pointer<V>::value) doFree=false;
+ };
+ //return pointer to stored value if found, NULL otherwise
+ // if the stored value is a pointer, it's going to be a pointer to that
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, T>::type
+ Find(const K ky) {
+ khInt_t r=this->get(ky);
+ if (r==this->end()) return NULL;
+ return this->value(r);
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+ Find(const K ky) {
+ khInt_t r=this->get(ky);
+ if (r==this->end()) return NULL;
+ return &(this->value(r));
+ }
+
+ //-- operator[] should be defined just like Find?
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, T>::type
+ operator[](const K ky) {
+ khInt_t r=this->get(ky);
+ if (r==this->end()) return NULL;
+ return this->value(r);
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+ operator[](const K ky) {
+ khInt_t r=this->get(ky);
+ if (r==this->end()) return NULL;
+ return &(this->value(r));
+ }
+
+ inline bool hasKey(K ky) {
+ return (this->get(ky)!=this->end());
+ }
+
+ inline void startIterate() { //iterator-like initialization
+ i_iter=0;
+ }
+
+ template <typename T=K> inline
+ typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+ Next (V& val) {
+ //returns a pointer to next key entry in the table (NULL if no more)
+ if (this->count==0) return NULL;
+ khInt_t nb=this->n_buckets();
+ while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+ if (i_iter==nb) return NULL;
+ val=this->value(i_iter);
+ K* k=&(this->key(i_iter));
+ ++i_iter;
+ return k;
+ }
+
+ template <typename T=K> inline
+ typename std::enable_if< std::is_pointer<T>::value, T>::type
+ Next (V& val) {
+ //returns a pointer to next key entry in the table (NULL if no more)
+ if (this->count==0) return NULL;
+ khInt_t nb=this->n_buckets();
+ while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+ if (i_iter==nb) return NULL;
+ val=this->value(i_iter);
+ K k = this->key(i_iter);
+ ++i_iter;
+ return k;
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, T*>::type
+ NextData () {
+ //returns a pointer to next key entry in the table (NULL if no more)
+ if (this->count==0) return NULL;
+ khInt_t nb=this->n_buckets();
+ while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+ if (i_iter==nb) return NULL;
+ T* val=&(this->value(i_iter));
+ ++i_iter;
+ return val;
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, T>::type
+ NextData () {
+ //returns a pointer to next key entry in the table (NULL if no more)
+ if (this->count==0) return NULL;
+ khInt_t nb=this->n_buckets();
+ while (i_iter<nb && !this->occupied(i_iter)) i_iter++;
+ if (i_iter==nb) return NULL;
+ T val=this->value(i_iter);
+ ++i_iter;
+ return val;
+ }
+
+
+
+ inline uint32_t Count() { return this->count; }
+
+};
+
+template <class V, class Hash=GHashKey_xxHash<const char*>, class Eq=GHashKey_Eq<const char*>, typename khInt_t=uint64_t >
+ class GHash:public GHashMap<const char*, V, Hash, Eq, khInt_t> {
+protected:
+
+public:
+ GHash(bool doFree=true) {
+ this->freeItems=doFree;
+ };
+ //---- these should be now reimplemented
+ inline int Add(const char* ky, const V val) { // if a key does not exist allocate a copy of the key
+ // return -1 if the key already exists
+ int absent=-1;
+ khInt_t i=this->put(ky, &absent);
+ if (absent==1) { //key was actually added
+ const char* s=Gstrdup(ky);
+ this->key(i)=s; //store a copy of the key string
+ this->value(i)=val; //value is always copied
+ return i;
+ }
+ return -1;
+ }
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, int>::type
+ Remove(const char* ky) { //return index being removed
+ khInt_t i=this->get(ky);
+ if (i!=this->end()) {
+ GFREE(this->key(i)); //free string copy
+ if (this->freeItems) delete this->value(i);
+ this->del(i);
+ return i;
+ }
+ return -1;
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, int>::type
+ Remove(const char* ky) { //return index being removed
+ khInt_t i=this->get(ky);
+ if (i!=this->end()) {
+ GFREE(this->key(i)); //free string copy
+ this->del(i);
+ return i;
+ }
+ return -1;
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< std::is_pointer<T>::value, void>::type
+ Clear() {
+ khInt_t nb=this->n_buckets();
+ for (khInt_t i = 0; i != nb; ++i) {
+ if (!this->__kh_used(this->used, i)) continue;
+ if (this->freeItems) delete this->value(i);
+ GFREE(this->key(i));
+ }
+ this->clear();
+ }
+
+ template <typename T=V> inline
+ typename std::enable_if< !std::is_pointer<T>::value, void>::type
+ Clear() {
+ khInt_t nb=this->n_buckets();
+ for (khInt_t i = 0; i != nb; ++i) {
+ if (!this->__kh_used(this->used, i)) continue;
+ GFREE(this->key(i));
+ }
+ this->clear();
+ }
+
+ inline void Reset() {
+ this->Clear();
+ GFREE(this->used); GFREE(this->keys);
+ this->bits=0; this->count=0;
+ }
+
+ ~GHash() {
+ this->Reset();
+ }
+};
+
+template<typename T>
+ using GIntHash = GHashMap<int, T, GHashKey_xxHash32<int>, GHashKey_Eq<int>, uint32_t>;
+
+#endif
=====================================
GIntHash.hh
=====================================
@@ -2,15 +2,15 @@
#define _GHASHT_HH
#include "GBase.h"
//----------------------------------------------
-// Hash table templates based on Jeff Preshing's code
+// Int Hash table templates
// ---------------------------------------------
// Maps 32-bit integers to user data
// Uses open addressing with linear probing.
// In the m_cells array, key = 0 is reserved to indicate an unused cell.
// Actual value for key 0 (if any) is stored in m_zeroCell.
// The hash table automatically doubles in size when it becomes 75% full.
-// The hash table never shrinks in size, even after Clear(),
-// unless you explicitly call Compact().
+// The hash table never shrinks in size
+// unless you explicitly call Clear() or Compact().
//----------------------------------------------
inline uint32_t upper_power_of_two(uint32_t v) {
v--;
@@ -228,7 +228,7 @@ public:
};
-// -- from code.google.com/p/smhasher/wiki/MurmurHash3
+// from code.google.com/p/smhasher/wiki/MurmurHash3
inline uint32_t integerHash(uint32_t h)
{
h ^= h >> 16;
@@ -239,19 +239,15 @@ inline uint32_t integerHash(uint32_t h)
return h;
}
-inline int32_t int_hashfunc_Wang(int32_t key) {
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
-}
-
-// -- from Heng Li's khash.h:
-inline uint32_t int64_hashfunc(uint64_t k) {
- return (uint32_t)(k>>33^k^k<<11);
+// from code.google.com/p/smhasher/wiki/MurmurHash3
+inline uint64_t integerHash(uint64_t k)
+{
+ k ^= k >> 33;
+ k *= 0xff51afd7ed558ccd;
+ k ^= k >> 33;
+ k *= 0xc4ceb9fe1a85ec53;
+ k ^= k >> 33;
+ return k;
}
#define GIHASH_FIRST_CELL(hash) (m_cells + ((hash) & (m_arraySize - 1)))
=====================================
GThreads.cpp
=====================================
@@ -373,7 +373,7 @@ void GThread::detach()
void GThread::wait_all() {
while (GThread::num_running()>0)
- current_thread::sleep_for(2);
+ current_thread::sleep_for(1);
}
@@ -445,7 +445,7 @@ void current_thread::yield() {
// Example usage:
// // Sleep for 100 milliseconds:
// current_thread::sleep_for(100);
-void current_thread::sleep_for(const int32_t mstime) {
+void current_thread::sleep_for(const int mstime) {
#if defined(_GTHREADS_WIN32_)
Sleep(mstime);
#else
=====================================
GThreads.h
=====================================
@@ -95,6 +95,9 @@ freely, subject to the following restrictions:
#undef __UNDEF_LEAN_AND_MEAN
#endif
#else
+ #ifdef __CYGWIN__
+ #define _BSD_SOURCE
+ #endif
#include <pthread.h>
#include <signal.h>
#include <sched.h>
@@ -772,6 +775,7 @@ public:
int r=tcounter;
return r;
}
+#ifdef _GTHREADS_POSIX_
static size_t defaultStackSize() {
pthread_attr_t attr;
size_t stacksize;
@@ -780,6 +784,7 @@ public:
pthread_attr_destroy(&attr);
return stacksize;
}
+#endif
static int liveCount() {
//return number of running (live) threads
return num_running();
@@ -821,7 +826,7 @@ namespace current_thread {
// Example usage:
// // Sleep for 100 milliseconds:
// current_thread::sleep_for(100);
- void sleep_for(const int32_t mstime);
+ void sleep_for(const int mstime);
}
// Define/macro cleanup
=====================================
gff.cpp
=====================================
@@ -1436,7 +1436,7 @@ GffObj* GffReader::newGffRec(GffLine* gffline, GffObj* parent, GffExon* pexon, G
gffline->exontype==exgffNone && !gffline->is_gene && !gffline->is_transcript) {
//unrecognized non-exon entity, should be discarded
newgfo->isDiscarded(true);
- this->discarded_ids.Add(gffline->ID, new int(1));
+ this->discarded_ids.Add(gffline->ID, 1);
}
if (replace_parent && glst) {
r=gfoReplace(*glst, newgfo, parent);
@@ -1479,7 +1479,7 @@ GffObj* GffReader::updateGffRec(GffObj* prevgfo, GffLine* gffline) {
}
-bool GffReader::readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon>* pex) {
+bool GffReader::readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex) {
//this should only be called before prevgfo->finalize()!
bool r=true;
if (gffline->strand!=prevgfo->strand) {
@@ -1511,7 +1511,7 @@ bool GffReader::readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExo
return r;
}
-CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name) {
+CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon*>& pex, char*& subp_name) {
CNonExon* subp=NULL;
subp_name=NULL;
for (int i=0;i<gffline->num_parents;i++) {
@@ -1526,7 +1526,7 @@ CNonExon* GffReader::subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*
return NULL;
}
-void GffReader::subfPoolAdd(GHash<CNonExon>& pex, GffObj* newgfo) {
+void GffReader::subfPoolAdd(GHash<CNonExon*>& pex, GffObj* newgfo) {
//this might become a parent feature later
if (newgfo->exons.Count()>0) {
char* xbuf=gfoBuildId(gffline->ID, gffline->gseqname);
@@ -1535,7 +1535,7 @@ if (newgfo->exons.Count()>0) {
}
}
-GffObj* GffReader::promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex) {
+GffObj* GffReader::promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon*>& pex) {
GffObj* prevp=subp->parent; //grandparent of gffline (e.g. gene)
//if (prevp!=gflst[subp->idx])
// GError("Error promoting subfeature %s, gflst index mismatch?!\n", subp->gffline->ID);
@@ -1657,7 +1657,7 @@ void GffReader::readAll() {
}
else { //regular GFF/GTF or perhaps TLF?
//loc_debug=false;
- GHash<CNonExon> pex; //keep track of any parented (i.e. exon-like) features that have an ID
+ GHash<CNonExon*> pex; //keep track of any parented (i.e. exon-like) features that have an ID
//and thus could become promoted to parent features
while (nextGffLine()!=NULL) {
GffObj* prevseen=NULL;
@@ -1969,7 +1969,7 @@ bool GffObj::processGeneSegments(GffReader* gfr) {
4)for each GeneCDSChain, pick best _gene_segment match (if any) and transfer CDSs to it
*/
GVec<int> geneSegs; //X_gene_segment features (children transcripts of this gene)
- GHash<GeneCDSChain> cdsChainById(false); // hash of CDS chains: CDS feature grouped by ID
+ GHashMap<const char*, GeneCDSChain*> cdsChainById(false); // hash of CDS chains: CDS feature grouped by ID
GPVec<GeneCDSChain> cdsChains; // CDS chains storage
if (cdss==NULL || cdss->Count()==0 || children.Count()==0)
return false; //we shouldn't be here
@@ -1998,7 +1998,7 @@ bool GffObj::processGeneSegments(GffReader* gfr) {
else { //new CDS chain:
gcc=new GeneCDSChain(i, cdss->Get(i)->start, cdss->Get(i)->end);
cdsChains.Add(gcc);
- cdsChainById.shkAdd(id, gcc);
+ cdsChainById.Add(id, gcc);
}
}
for (int i=0;i<cdss->Count();i++) {
=====================================
gff.h
=====================================
@@ -8,7 +8,8 @@
#include "codons.h"
#include "GFaSeqGet.h"
#include "GList.hh"
-#include "GHash.hh"
+//#include "GHash.hh"
+#include "GHashMap.hh"
#ifdef CUFFLINKS
#include <boost/crc.hpp> // for boost::crc_32_type
@@ -18,6 +19,7 @@
extern int gff_fid_mRNA; // "mRNA" feature name
extern int gff_fid_transcript; // *RNA, *transcript feature name
extern int gff_fid_exon;
+extern int gff_fid_CDS;
extern const uint GFF_MAX_LOCUS;
extern const uint GFF_MAX_EXON;
@@ -403,13 +405,13 @@ class GffNameList:public GPVec<GffNameInfo> {
friend class GffNameInfo;
friend class GffNames;
protected:
- GHash<GffNameInfo> byName;//hash with shared keys
+ GHashMap<const char*, GffNameInfo*> byName;//hash with shared keys
int idlast; //fList index of last added/reused name
int addStatic(const char* tname) {// fast add
GffNameInfo* f=new GffNameInfo(tname);
idlast=this->Add(f);
f->idx=idlast;
- byName.shkAdd(f->name,f);
+ byName.Add(f->name,f);
return idlast;
}
public:
@@ -437,7 +439,7 @@ public:
f=new GffNameInfo(tname);
fidx=this->Add(f);
f->idx=fidx;
- byName.shkAdd(f->name,f);
+ byName.Add(f->name,f);
}
idlast=fidx;
return fidx;
@@ -447,7 +449,7 @@ public:
GffNameInfo* f=new GffNameInfo(tname);
int fidx=this->Add(f);
f->idx=fidx;
- byName.shkAdd(f->name,f);
+ byName.Add(f->name,f);
return fidx;
}
@@ -472,11 +474,11 @@ class GffNames {
GffNames():tracks(),gseqs(),attrs(), feats() {
numrefs=0;
//the order below is critical!
- //has to match: gff_fid_mRNA, gff_fid_exon
+ //has to match: gff_fid_mRNA, gff_fid_exon, gff_fid_CDS
gff_fid_mRNA = feats.addStatic("mRNA");//index 0=gff_fid_mRNA
gff_fid_transcript=feats.addStatic("transcript");//index 1=gff_fid_transcript
- gff_fid_exon=feats.addStatic("exon");//index 1=gff_fid_exon
- //feats.addStatic("CDS"); //index 2=gff_fid_CDS
+ gff_fid_exon=feats.addStatic("exon");//index 2=gff_fid_exon
+ gff_fid_CDS=feats.addStatic("CDS"); //index 3=gff_fid_CDS
}
};
@@ -842,8 +844,6 @@ public:
//complete parsing: must be called in order to merge adjacent/close proximity subfeatures
void parseAttrs(GffAttrs*& atrlist, char* info, bool isExon=false, bool CDSsrc=false);
const char* getSubfName() { //returns the generic feature type of the entries in exons array
- //int sid=exon_ftype_id;
- //if (sid==gff_fid_exon && isCDS) sid=gff_fid_CDS;
return names->feats.getName(subftype_id);
}
void setCDS(uint cd_start, uint cd_end, char phase=0);
@@ -1179,8 +1179,7 @@ class GffReader {
//bool gene2exon; // for childless genes: add an exon as the entire gene span
GHash<int> discarded_ids; //for transcriptsOnly mode, keep track
// of discarded parent IDs
- GHash< GPVec<GffObj> > phash; //transcript_id => GPVec<GffObj>(false)
- //GHash<int> tids; //just for transcript_id uniqueness
+ GHash< GPVec<GffObj>* > phash; //transcript_id => GPVec<GffObj>(false)
char* gfoBuildId(const char* id, const char* ctg);
//void gfoRemove(const char* id, const char* ctg);
GffObj* gfoAdd(GffObj* gfo);
@@ -1190,9 +1189,9 @@ class GffReader {
bool pFind(const char* id, GPVec<GffObj>*& glst);
GffObj* gfoFind(const char* id, GPVec<GffObj>* & glst, const char* ctg=NULL,
char strand=0, uint start=0, uint end=0);
- CNonExon* subfPoolCheck(GffLine* gffline, GHash<CNonExon>& pex, char*& subp_name);
- void subfPoolAdd(GHash<CNonExon>& pex, GffObj* newgfo);
- GffObj* promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon>& pex);
+ CNonExon* subfPoolCheck(GffLine* gffline, GHash<CNonExon*>& pex, char*& subp_name);
+ void subfPoolAdd(GHash<CNonExon*>& pex, GffObj* newgfo);
+ GffObj* promoteFeature(CNonExon* subp, char*& subp_name, GHash<CNonExon*>& pex);
#ifdef CUFFLINKS
boost::crc_32_type _crc_result;
@@ -1207,7 +1206,7 @@ class GffReader {
//GffObj* replaceGffRec(GffLine* gffline, bool keepAttr, bool noExonAttr, int replaceidx);
GffObj* updateGffRec(GffObj* prevgfo, GffLine* gffline);
GffObj* updateParent(GffObj* newgfh, GffObj* parent);
- bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon>* pex=NULL);
+ bool readExonFeature(GffObj* prevgfo, GffLine* gffline, GHash<CNonExon*>* pex=NULL);
GPVec<GSeqStat> gseqStats; //populated after finalize() with only the ref seqs in this file
GffReader(FILE* f=NULL, bool t_only=false, bool sort=false):linebuf(NULL), fpos(0),
buflen(0), flags(0), fh(f), fname(NULL), commentParser(NULL), gffline(NULL),
=====================================
htest.cpp
=====================================
@@ -2,21 +2,31 @@
#include "GArgs.h"
#include "GStr.h"
#include "GVec.hh"
-#include "GHash.hh"
+namespace old {
+ #include "GHash.hh"
+}
#include "GResUsage.h"
#include <cstdint>
#include <iostream>
-#include <string>
-//#include <tsl/hopscotch_map.h>
-//#include <tsl/hopscotch_set.h>
+//#include "tsl/hopscotch_map.h"
+//#include "tsl/robin_map.h"
+#include <unordered_map>
+//#include "ska/bytell_hash_map.hpp"
+
+//#include "khashl.hh"
+//#include "city.h"
+#include "GHashMap.hh"
#define USAGE "Usage:\n\
- htest textfile.. \n\
+ htest [-Q] [-C] [-n num_clusters] textfile.. \n\
+ E.g. quick query test: ./htest -Q qtest_str.dta\n\
\n\
"
-static void strFreeProc(pointer item) {
- GFREE(item);
-}
+//quick query test: ./htest -Q qtest_str.dta
+
+bool qryMode=false;
+bool checkRM=false;
+int numClusters=500;
struct HStrData {
int cmd; // 0=add, 1=remove, 2=clear
@@ -24,13 +34,21 @@ struct HStrData {
HStrData(char* s=NULL, int c=0):cmd(c), str(s) { }
};
-int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int toLoad=0) {
+int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int toLoad) {
int num=0;
GLineReader lr(f);
char* line=NULL;
+ int numcl=0;
while ((line=lr.nextLine())!=NULL) {
int len=strlen(line);
- if (len<4) continue;
+ if (len<3) continue;
+ if (line[0]=='>') {
+ numcl++;
+ if (toLoad && numcl>toLoad) {
+ break;
+ }
+ continue;
+ }
if (strcmp(line, "HCLR")==0) {
strgs.Add(new HStrData(NULL, 2));
strgsuf.Add(new HStrData(NULL, 2));
@@ -46,7 +64,6 @@ int loadStrings(FILE* f, GPVec<HStrData>& strgsuf, GPVec<HStrData>& strgs, int t
line[len-3]=0;
strgs.Add(new HStrData(line));
num++;
- if (toLoad && num>=toLoad) break;
} //while line
return num;
}
@@ -55,93 +72,520 @@ void showTimings(GResUsage swatch) {
char *wtime=commaprintnum((uint64_t)swatch.elapsed());
char *utime=commaprintnum((uint64_t)swatch.u_elapsed());
char *stime=commaprintnum((uint64_t)swatch.s_elapsed());
+ char *smem=commaprintnum((uint64_t)swatch.memoryUsed());
GMessage("Elapsed time (microseconds): %12s us\n", wtime);
GMessage(" user time: %12s us\n", utime);
GMessage(" system time: %12s us\n", stime);
- GFREE(wtime);GFREE(utime);GFREE(stime);
+ GMessage(" mem usage: %12s KB\n", smem);
+
+ GFREE(wtime);GFREE(utime);GFREE(stime); GFREE(smem);
}
+// default values recommended by http://isthe.com/chongo/tech/comp/fnv/
+const uint32_t Prime = 0x01000193; // 16777619
+const uint32_t Seed = 0x811C9DC5; // 2166136261
+/// hash a single byte
+inline uint32_t fnv1a(unsigned char b, uint32_t h = Seed) { return (b ^ h) * Prime; }
+
+/// hash a C-style string
+uint32_t fnv1a(const char* text, uint32_t hash = Seed) {
+ while (*text)
+ hash = fnv1a((unsigned char)*text++, hash);
+ return hash;
+}
+
+struct cstr_eq {
+ inline bool operator()(const char* x, const char* y) const {
+ return (strcmp(x, y) == 0);
+ }
+};
+
+struct cstr_hash {
+ inline uint32_t operator()(const char* s) const {
+ return XXH32(s, std::strlen(s),0);
+ //return fnv1a(s);
+ }
+};
void run_GHash(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
- GHash<int> ghash;
+ old::GHash<int> ghash;
int num_add=0, num_rm=0, num_clr=0;
GMessage("----------------- %s ----------------\n", label);
ghash.Clear();
swatch.start();
+ int cl_i=0;
+ int prevcmd=2;
for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
switch (hstrs[i]->cmd) {
- case 0:ghash.fAdd(hstrs[i]->str.chars(), new int(1)); num_add++; break;
- case 1:ghash.Remove(hstrs[i]->str.chars()); num_rm++; break;
- case 2:ghash.Clear(); num_clr++; break;
+ case 0:
+ if (cl_i==0) cl_i=i;
+ ghash.fAdd(hstrs[i]->str.chars(), new int(i));
+ num_add++;
+ break;
+ case 1:
+ if (qryMode) break;
+ ghash.Remove(hstrs[i]->str.chars());
+ num_rm++;
+ break;
+ case 2:
+ //run tests here
+ if (qryMode) {
+ //run some query tests here
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int* v=ghash[hstrs[j]->str.chars()];
+ if (v==NULL)
+ GError("Error at <%s>, key %s not found (count:%d, cl_i=%d, i=%d)!\n",label, hstrs[j]->str.chars(),
+ ghash.Count(), cl_i, i );
+ if (*v!=j)
+ GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+ }
+ }
+ cl_i=0;
+ ghash.Clear();
+ num_clr++;
+ break;
}
}
- ghash.Clear();
swatch.stop();
+ ghash.Clear();
GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
}
/*
void run_Hopscotch(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
int num_add=0, num_rm=0, num_clr=0;
- tsl::hopscotch_map<std::string, int> hsmap;
+ //tsl::hopscotch_map<const char*, int, cstr_hash, cstr_eq> hsmap;
+ tsl::hopscotch_map<const char*, int, cstr_hash, cstr_eq,
+ std::allocator<std::pair<const char*, int>>, 30, true> hsmap;
GMessage("----------------- %s ----------------\n", label);
swatch.start();
+ int cl_i=0;
+ int prevcmd=2;
for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
switch (hstrs[i]->cmd) {
- case 0:hsmap.insert({hstrs[i]->str.chars(), 1}); num_add++; break;
- case 1:hsmap.erase(hstrs[i]->str.chars()); num_rm++; break;
- case 2:hsmap.clear(); num_clr++; break;
+ case 0:
+ if (cl_i==0) cl_i=i;
+ hsmap.insert({hstrs[i]->str.chars(), i});
+ num_add++;
+ break;
+ case 1:
+ if (qryMode) break;
+ hsmap.erase(hstrs[i]->str.chars());
+ num_rm++; break;
+ case 2:
+ if (qryMode) {
+ //run some query tests here
+ //with strings from hstrs[cl_i .. i-1] range
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int v=hsmap[hstrs[j]->str.chars()];
+ if (v!=j)
+ GError("Error at <%s>, invalid value for key %s! (got %d, expected %d)\n",label,
+ hstrs[j]->str.chars(), v, j );
+ }
+ }
+ cl_i=0;
+ hsmap.clear();
+ num_clr++;
+ break;
}
}
+ swatch.stop();
hsmap.clear();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_Robin(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ //tsl::hopscotch_map<const char*, int, cstr_hash, cstr_eq> hsmap;
+ tsl::robin_map<const char*, int, cstr_hash, cstr_eq,
+ std::allocator<std::pair<const char*, int>>, true> rmap;
+ GMessage("----------------- %s ----------------\n", label);
+ swatch.start();
+ int cl_i=0;
+ int prevcmd=2;
+ for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
+ switch (hstrs[i]->cmd) {
+ case 0:
+ if (cl_i==0) cl_i=i;
+ rmap.insert({hstrs[i]->str.chars(), i});
+ num_add++;
+ break;
+ case 1: if (qryMode) break;
+ rmap.erase(hstrs[i]->str.chars()); num_rm++; break;
+ case 2:
+ if (qryMode) {
+ //run some query tests here
+ //with strings from hstrs[cl_i .. i-1] range
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int v=rmap[hstrs[j]->str.chars()];
+ if (v!=j)
+ GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+ }
+ }
+ cl_i=0;
+ rmap.clear(); num_clr++; break;
+ }
+ }
swatch.stop();
+ rmap.clear();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_Bytell(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ ska::bytell_hash_map<const char*, int, cstr_hash, cstr_eq> bmap;
+ GMessage("----------------- %s ----------------\n", label);
+ swatch.start();
+ for (int i=0;i<hstrs.Count();i++) {
+ switch (hstrs[i]->cmd) {
+ case 0:bmap.insert({hstrs[i]->str.chars(), 1}); num_add++; break;
+ case 1:bmap.erase(hstrs[i]->str.chars()); num_rm++; break;
+ case 2:bmap.clear(); num_clr++; break;
+ }
+ }
+ swatch.stop();
+ bmap.clear();
GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
}
*/
+void run_Khashl(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ klib::KHashMapCached<const char*, int, cstr_hash, cstr_eq > khmap;
+ GMessage("----------------- %s ----------------\n", label);
+ swatch.start();
+ int cl_i=0;
+ int prevcmd=2;
+ for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
+ switch (hstrs[i]->cmd) {
+ case 0:if (cl_i==0) cl_i=i;
+ khmap[hstrs[i]->str.chars()]=i; num_add++; break;
+ case 1:if (qryMode) break;
+ khmap.del(khmap.get(hstrs[i]->str.chars())); num_rm++; break;
+ case 2:
+ if (qryMode) {
+ //run some query tests here
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int v=khmap[hstrs[j]->str.chars()];
+ if (v!=j)
+ GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+ }
+ }
+ cl_i=0;
+ khmap.clear(); num_clr++; break;
+ }
+ }
+ swatch.stop();
+ khmap.clear();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_GHashMap(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ //GKHashSet<const char*> khset;
+ //GHashSet<> khset;
+ GHash<int, cstr_hash, GHashKey_Eq<const char*>, uint32_t> khset;
+ GMessage("----------------- %s ----------------\n", label);
+ int cl_i=0;
+ swatch.start();
+ int prevcmd=2;
+ for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
+ switch (hstrs[i]->cmd) {
+ case 0: if (cl_i==0) cl_i=i;
+ khset.Add(hstrs[i]->str.chars(), i); num_add++; break;
+ case 1:if (qryMode) break;
+ if (khset.Remove(hstrs[i]->str.chars())<0)
+ if (checkRM) GMessage("Warning: key %s could not be removed!\n", hstrs[i]->str.chars());
+ num_rm++;
+ break;
+ case 2:
+ if (qryMode) {
+ //run some query tests here
+ //with strings from hstrs[cl_i .. i-1] range
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int* v=khset[hstrs[j]->str.chars()];
+ if (*v!=j)
+ GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+ }
+ }
+ cl_i=0;
+ khset.Clear(); num_clr++; break;
+ }
+ }
+ swatch.stop();
+ khset.Clear();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_GxxHashMap(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ GHash<int> khset;
+ GMessage("----------------- %s ----------------\n", label);
+ int cl_i=0;
+ swatch.start();
+ int prevcmd=2;
+ for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
+ switch (hstrs[i]->cmd) {
+ case 0: if (cl_i==0) cl_i=i;
+ khset.Add(hstrs[i]->str.chars(), i); num_add++; break;
+ case 1:if (qryMode) break;
+ if (khset.Remove(hstrs[i]->str.chars())<0)
+ if (checkRM) GMessage("Warning: key %s could not be removed!\n", hstrs[i]->str.chars());
+ num_rm++;
+ break;
+ case 2:
+ if (qryMode) {
+ //run some query tests here
+ //with strings from hstrs[cl_i .. i-1] range
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int* v=khset[hstrs[j]->str.chars()];
+ if (*v!=j)
+ GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+ }
+ }
+ cl_i=0;
+ khset.Clear(); num_clr++; break;
+ }
+ }
+ swatch.stop();
+ khset.Clear();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+void run_GHashMapShk(GResUsage& swatch, GPVec<HStrData> & hstrs, const char* label) {
+ int num_add=0, num_rm=0, num_clr=0;
+ GHashMap<const char*, int> khset;
+ GMessage("----------------- %s ----------------\n", label);
+ int cl_i=0;
+ swatch.start();
+ int prevcmd=2;
+ for (int i=0;i<hstrs.Count();i++) {
+ if (hstrs[i]->cmd==prevcmd) {
+ if (prevcmd==2) continue;
+ } else prevcmd=hstrs[i]->cmd;
+ switch (hstrs[i]->cmd) {
+ case 0: if (cl_i==0) cl_i=i;
+ khset.Add(hstrs[i]->str.chars(),i); num_add++; break;
+ case 1:if (qryMode) break;
+ if (khset.Remove(hstrs[i]->str.chars())<0)
+ if (checkRM) GMessage("Warning: key %s could not be removed!\n", hstrs[i]->str.chars());
+ num_rm++;
+ break;
+ case 2:
+ if (qryMode) {
+ //run some query tests here
+ //with strings from hstrs[cl_i .. i-1] range
+ for(int j=cl_i;j<i;j+=3) {
+ if (hstrs[j]->cmd) continue;
+ int* v=khset[hstrs[j]->str.chars()];
+ if (*v!=j)
+ GError("Error at <%s>, invalid value for key %s!\n",label, hstrs[j]->str.chars() );
+ }
+ }
+ cl_i=0;
+ khset.Clear(); num_clr++; break;
+ }
+ }
+ swatch.stop();
+ khset.Clear();
+ GMessage(" (%d inserts, %d deletions, %d clears)\n", num_add, num_rm, num_clr);
+}
+
+struct SObj {
+ GStr atr;
+ int val;
+ SObj(const char* a=NULL, const int v=0):atr(a),val(v) { }
+ bool operator<(const SObj& o) const { return val<o.val; }
+ bool operator==(const SObj& o) const {
+ return (atr==o.atr && val==o.val);
+ }
+};
+
int main(int argc, char* argv[]) {
GPVec<HStrData> strs;
GPVec<HStrData> sufstrs;
- strs.setFreeItem(strFreeProc);
- sufstrs.setFreeItem(strFreeProc);
//GArgs args(argc, argv, "hg:c:s:t:o:p:help;genomic-fasta=COV=PID=seq=out=disable-flag;test=");
- GArgs args(argc, argv, "h");
+ GArgs args(argc, argv, "hQCn:");
//fprintf(stderr, "Command line was:\n");
//args.printCmdLine(stderr);
args.printError(USAGE, true);
if (args.getOpt('h') || args.getOpt("help")) GMessage(USAGE);
+ GStr s=args.getOpt('n');
+ if (!s.is_empty()) {
+ numClusters=s.asInt();
+ if (numClusters<=0)
+ GError("%s\nError: invalid value for -n !\n", USAGE);
+ }
+ qryMode=(args.getOpt('Q'));
+ checkRM=(args.getOpt('C'));
int numargs=args.startNonOpt();
const char* a=NULL;
FILE* f=NULL;
int total=0;
+//==== quick test area
+ /*
+ std::unordered_map<SObj*, int > umap;
+ GHash<int> gh;
+ GPVec<SObj> ptrs(false);
+ GQHash<int, SObj*> ihash(false);
+ GQHash<SObj*, int> phash;
+ GQStrHash<SObj*> shash;
+ const char* tstrs[6] = {"twelve", "five", "nine", "eleven", "three", "nope"};
+ int vals[6] = { 12, 5, 9, 11, 3, 777 };
+ char buf[20];
+ for (int i=0;i<5;i++) {
+ SObj* o=new SObj(tstrs[i], vals[i]*10);
+ ptrs.Add(o);
+ sprintf(buf, "%lx", o);
+ GMessage("SObj (%s, %d) pointer added: %s\n",tstrs[i], o->val, buf);
+ gh.Add(buf, new int(vals[i]));
+ shash.Add(tstrs[i], o);
+ ihash.Add(vals[i], o);
+ phash.Add(o, vals[i]);
+ umap[o]=vals[i];
+ }
+ ptrs.Sort();
+ GMessage("shash has now %d entries.\n", shash.Count());
+ //enumerate shash entries:
+ {
+ shash.startIterate();
+ SObj* iv=NULL;
+ while (const char* k=shash.Next(iv)) {
+ GMessage("Enumerating shash entry: (%s => %lx)\n",
+ k, iv);
+ }
+ }
+ //qry:
+ for (int i=0;i<ptrs.Count();i++) {
+ SObj* o=ptrs[i];
+ //test tset
+ SObj* v=shash.Find(o->atr.chars());
+ if (v==NULL)
+ GMessage("key <%s> not found in shash!\n", o->atr.chars());
+ int* iv=phash.Find(o);
+ if (iv==NULL)
+ GMessage("key <%lx> not found in phash!\n", o);
+ //if (!oset[*o])
+// GMessage("struct {%s, %d} not found in oset!\n", o->atr.chars(), o->val);
+ //sprintf(buf, "%lx", o);
+ //int* hv=gh[buf];
+ //GMessage("Item {%s, %d} : GHash retrieved flag = %d, umap retrieved flag = %d\n",
+ // o->atr.chars(), o->val, *hv, umap[o]);
+ }
+//SObj* n=new SObj("test", 10);
+//if (!pset[n])
+// GMessage("key <%lx> not found in pset!\n", n);
+
+ for (int i=0;i<6;i++) {
+ SObj* o=shash[tstrs[i]];
+ if (o==NULL) GMessage("key <%s> not found in shash!\n", tstrs[i]);
+ if (o && i<5) {
+ if (o->atr!=tstrs[i])
+ GMessage("shash value does not match key <%s!\n", tstrs[i]);
+ }
+ }
+
+ //delete n;
+ //int v=umap[n];
+ //GMessage("Non-existing test entry returned value %d\n", v);
+ */
+ /*
+ auto found=umap.find(n);
+ if (found!=umap.end()) {
+ GMessage("Found flags %d for entry {\"%s\", %d}\n", found->second,
+ n->atr.chars(), found->first->val );
+ } else GMessage("New test obj not found !\n");
+
+ return(0);
+*/
+//==== quick test area end
if (numargs==0) {
- a="htest_data.lst";
+ //a="htest_data.lst";
+ a="htest_over500.lst";
f=fopen(a, "r");
if (f==NULL) GError("Error: could not open file %s !\n", a);
- int num=loadStrings(f, sufstrs, strs, 600000);
+ GMessage("loading %d clusters from file..\n", numClusters);
+ int num=loadStrings(f, sufstrs, strs, numClusters);
total+=num;
- GMessage("..loaded %d strings from file %s\n", total, a);
+ fclose(f);
}
else {
while ((a=args.nextNonOpt())) {
f=fopen(a, "r");
if (f==NULL) GError("Error: could not open file %s !\n", a);
- int num=loadStrings(f, sufstrs, strs, 600000);
+ int num=loadStrings(f, sufstrs, strs, numClusters);
total+=num;
+ fclose(f);
}
}
GResUsage swatch;
- run_GHash(swatch, strs, "GHash no suffix");
+ run_GHash(swatch, sufstrs, "GHash w/ suffix");
showTimings(swatch);
+ //run_GHash(swatch, strs, "GHash no suffix");
+ //showTimings(swatch);
- run_GHash(swatch, sufstrs, "GHash w/ suffix");
+/*
+ run_Hopscotch(swatch, sufstrs, "hopscotch w/ suffix");
showTimings(swatch);
- /*
run_Hopscotch(swatch, strs, "hopscotch no suffix");
showTimings(swatch);
+*/
+/*
+ run_Robin(swatch, sufstrs, "robin w/ suffix");
+ showTimings(swatch);
+ run_Robin(swatch, strs, "robin no suffix");
+ showTimings(swatch);
+*/
+/*
+ run_Khashl(swatch, sufstrs, "khashl w/ suffix");
+ showTimings(swatch);
+ run_Khashl(swatch, strs, "khashl no suffix");
+ showTimings(swatch);
+*/
+ run_GHashMap(swatch, sufstrs, "GHashMap xxHash32 w/ suffix");
+ showTimings(swatch);
- run_Hopscotch(swatch, sufstrs, "hopscotch w/ suffix");
+ run_GxxHashMap(swatch, sufstrs, "GHashMap xxHash64 w/ suffix");
+ showTimings(swatch);
+
+ //run_GHashMap(swatch, strs, "GHashMap no suffix");
+ //showTimings(swatch);
+
+ //run_GHashMapShk(swatch, sufstrs, "GHashSetShk w/ suffix");
+ //showTimings(swatch);
+ //run_GHashMapShk(swatch, strs, "GHashSetShk no suffix");
+ //showTimings(swatch);
+
+/*
+ run_Bytell(swatch, sufstrs, "bytell w/ suffix");
showTimings(swatch);
- */
+ run_Bytell(swatch, strs, "bytell no suffix");
+ showTimings(swatch);
+*/
+
}
=====================================
khashl.hh
=====================================
@@ -0,0 +1,261 @@
+#ifndef __AC_KHASHL_HPP
+#define __AC_KHASHL_HPP
+
+#include <functional> // for std::equal_to
+#include <cstdlib> // for malloc() etc
+#include <cstring> // for memset()
+#include <stdint.h> // for uint32_t
+
+/* // ==> Code example <==
+#include <cstdio>
+#include "khashl.hpp"
+
+int main(void)
+{
+ klib::KHashMap<uint32_t, int, std::hash<uint32_t> > h; // NB: C++98 doesn't have std::hash
+ uint32_t k;
+ int absent;
+ h[43] = 1, h[53] = 2, h[63] = 3, h[73] = 4; // one way to insert
+ k = h.put(53, &absent), h.value(k) = -2; // another way to insert
+ if (!absent) printf("already in the table\n"); // which allows to test presence
+ if (h.get(33) == h.end()) printf("not found!\n"); // test presence without insertion
+ h.del(h.get(43)); // deletion
+ for (k = 0; k != h.end(); ++k) // traversal
+ if (h.occupied(k)) // some buckets are not occupied; skip them
+ printf("%u => %d\n", h.key(k), h.value(k));
+ return 0;
+}
+*/
+
+namespace klib {
+
+/***********
+ * HashSet *
+ ***********/
+
+template<class T, class Hash, class Eq = std::equal_to<T>, typename khint_t = uint32_t>
+class KHashSet {
+protected:
+ khint_t bits, count;
+ uint32_t *used;
+ T *keys;
+ static inline uint32_t __kh_used(const uint32_t *flag, khint_t i) { return flag[i>>5] >> (i&0x1fU) & 1U; };
+ static inline void __kh_set_used(uint32_t *flag, khint_t i) { flag[i>>5] |= 1U<<(i&0x1fU); };
+ static inline void __kh_set_unused(uint32_t *flag, khint_t i) { flag[i>>5] &= ~(1U<<(i&0x1fU)); };
+ static inline khint_t __kh_fsize(khint_t m) { return m<32? 1 : m>>5; }
+ static inline khint_t __kh_h2b(uint32_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+ static inline khint_t __kh_h2b(uint64_t hash, khint_t bits) { return hash * 11400714819323198485ULL >> (64 - bits); }
+public:
+ KHashSet() : bits(0), count(0), used(0), keys(0) {};
+ ~KHashSet() { std::free(used); std::free(keys); };
+ inline khint_t n_buckets() const { return used? khint_t(1) << bits : 0; }
+ inline khint_t end() const { return n_buckets(); }
+ inline khint_t size() const { return count; }
+ inline T &key(khint_t x) { return keys[x]; };
+ inline bool occupied(khint_t x) const { return (__kh_used(used, x) != 0); }
+ void clear(void) {
+ if (!used) return;
+ memset(used, 0, __kh_fsize(n_buckets()) * sizeof(uint32_t));
+ count = 0;
+ }
+ khint_t get(const T &key) const {
+ khint_t i, last, mask, nb;
+ if (keys == 0) return 0;
+ nb = n_buckets();
+ mask = nb - khint_t(1);
+ i = last = __kh_h2b(Hash()(key), bits);
+ while (__kh_used(used, i) && !Eq()(keys[i], key)) {
+ i = (i + khint_t(1)) & mask;
+ if (i == last) return nb;
+ }
+ return !__kh_used(used, i)? nb : i;
+ }
+ int resize(khint_t new_nb) {
+ uint32_t *new_used = 0;
+ khint_t j = 0, x = new_nb, nb, new_bits, new_mask;
+ while ((x >>= khint_t(1)) != 0) ++j;
+ if (new_nb & (new_nb - 1)) ++j;
+ new_bits = j > 2? j : 2;
+ new_nb = khint_t(1) << new_bits;
+ if (count > (new_nb>>1) + (new_nb>>2)) return 0; // requested size is too small
+ new_used = (uint32_t*)std::malloc(__kh_fsize(new_nb) * sizeof(uint32_t));
+ memset(new_used, 0, __kh_fsize(new_nb) * sizeof(uint32_t));
+ if (!new_used) return -1; /* not enough memory */
+ nb = n_buckets();
+ if (nb < new_nb) { /* expand */
+ T *new_keys = (T*)std::realloc(keys, new_nb * sizeof(T));
+ if (!new_keys) { std::free(new_used); return -1; }
+ keys = new_keys;
+ } /* otherwise shrink */
+ new_mask = new_nb - 1;
+ for (j = 0; j != nb; ++j) {
+ if (!__kh_used(used, j)) continue;
+ T key = keys[j];
+ __kh_set_unused(used, j);
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */
+ khint_t i;
+ i = __kh_h2b(Hash()(key), new_bits);
+ while (__kh_used(new_used, i)) i = (i + khint_t(1)) & new_mask;
+ __kh_set_used(new_used, i);
+ if (i < nb && __kh_used(used, i)) { /* kick out the existing element */
+ { T tmp = keys[i]; keys[i] = key; key = tmp; }
+ __kh_set_unused(used, i); /* mark it as deleted in the old hash table */
+ } else { /* write the element and jump out of the loop */
+ keys[i] = key;
+ break;
+ }
+ }
+ }
+ if (nb > new_nb) /* shrink the hash table */
+ keys = (T*)std::realloc(keys, new_nb * sizeof(T));
+ std::free(used); /* free the working space */
+ used = new_used, bits = new_bits;
+ return 0;
+ }
+ khint_t put(const T &key, int *absent_ = 0) {
+ khint_t nb, i, last, mask;
+ int absent = -1;
+ nb = n_buckets();
+ if (count >= (nb>>1) + (nb>>2)) { // rehashing
+ if (resize(nb + khint_t(1)) < 0) {
+ if (absent_) *absent_ = -1;
+ return nb;
+ }
+ nb = n_buckets();
+ } // TODO: to implement automatically shrinking; resize() already support shrinking
+ mask = nb - 1;
+ i = last = __kh_h2b(Hash()(key), bits);
+ while (__kh_used(used, i) && !Eq()(keys[i], key)) {
+ i = (i + 1U) & mask;
+ if (i == last) break;
+ }
+ if (!__kh_used(used, i)) { // not present at all
+ keys[i] = key;
+ __kh_set_used(used, i);
+ ++count, absent = 1;
+ } else absent = 0; /* Don't touch keys[i] if present */
+ if (absent_) *absent_ = absent;
+ return i;
+ }
+ int del(khint_t i) {
+ khint_t j = i, k, mask, nb = n_buckets();
+ if (keys == 0 || i >= nb) return 0;
+ mask = nb - khint_t(1);
+ while (1) {
+ j = (j + khint_t(1)) & mask;
+ if (j == i || !__kh_used(used, j)) break; // j==i only when the table is completely full
+ k = __kh_h2b(Hash()(keys[j]), bits);
+ if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j)))
+ keys[i] = keys[j], i = j;
+ }
+ __kh_set_unused(used, i);
+ --count;
+ return 1;
+ }
+};
+
+/***********
+ * HashMap *
+ ***********/
+
+template<class KType, class VType>
+struct KHashMapBucket { KType key; VType val; };
+
+template<class T, class Hash, typename khint_t>
+struct KHashMapHash { khint_t operator() (const T &a) const { return Hash()(a.key); } };
+
+template<class T, class Eq>
+struct KHashMapEq { bool operator() (const T &a, const T &b) const { return Eq()(a.key, b.key); } };
+
+template<class KType, class VType, class Hash, class Eq=std::equal_to<KType>, typename khint_t=uint32_t>
+class KHashMap : public KHashSet<KHashMapBucket<KType, VType>,
+ KHashMapHash<KHashMapBucket<KType, VType>, Hash, khint_t>,
+ KHashMapEq<KHashMapBucket<KType, VType>, Eq>, khint_t>
+{
+protected:
+ typedef KHashMapBucket<KType, VType> bucket_t;
+ typedef KHashSet<bucket_t, KHashMapHash<bucket_t, Hash, khint_t>, KHashMapEq<bucket_t, Eq>, khint_t> hashset_t;
+public:
+ khint_t get(const KType &key) const {
+ bucket_t t = { key, VType() };
+ return hashset_t::get(t);
+ }
+ khint_t put(const KType &key, int *absent) {
+ bucket_t t = { key, VType() };
+ return hashset_t::put(t, absent);
+ }
+ inline KType &key(khint_t i) { return hashset_t::key(i).key; }
+ inline VType &value(khint_t i) { return hashset_t::key(i).val; }
+ inline VType &operator[] (const KType &key) {
+ bucket_t t = { key, VType() };
+ return value(hashset_t::put(t));
+ }
+};
+
+/****************************
+ * HashSet with cached hash *
+ ****************************/
+
+template<class KType, typename khint_t>
+struct KHashSetCachedBucket { KType key; khint_t hash; };
+
+template<class T, typename khint_t>
+struct KHashCachedHash { khint_t operator() (const T &a) const { return a.hash; } };
+
+template<class T, class Eq>
+struct KHashCachedEq { bool operator() (const T &a, const T &b) const { return a.hash == b.hash && Eq()(a.key, b.key); } };
+
+template<class KType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
+class KHashSetCached : public KHashSet<KHashSetCachedBucket<KType, khint_t>,
+ KHashCachedHash<KHashSetCachedBucket<KType, khint_t>, khint_t>,
+ KHashCachedEq<KHashSetCachedBucket<KType, khint_t>, Eq>, khint_t>
+{
+ typedef KHashSetCachedBucket<KType, khint_t> bucket_t;
+ typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
+public:
+ khint_t get(const KType &key) const {
+ bucket_t t = { key, Hash()(key) };
+ return hashset_t::get(t);
+ }
+ khint_t put(const KType &key, int *absent) {
+ bucket_t t = { key, Hash()(key) };
+ return hashset_t::put(t, absent);
+ }
+ inline KType &key(khint_t i) { return hashset_t::key(i).key; }
+};
+
+/****************************
+ * HashMap with cached hash *
+ ****************************/
+
+template<class KType, class VType, typename khint_t>
+struct KHashMapCachedBucket { KType key; VType val; khint_t hash; };
+
+template<class KType, class VType, class Hash, class Eq = std::equal_to<KType>, typename khint_t = uint32_t>
+class KHashMapCached : public KHashSet<KHashMapCachedBucket<KType, VType, khint_t>,
+ KHashCachedHash<KHashMapCachedBucket<KType, VType, khint_t>, khint_t>,
+ KHashCachedEq<KHashMapCachedBucket<KType, VType, khint_t>, Eq>, khint_t>
+{
+protected:
+ typedef KHashMapCachedBucket<KType, VType, khint_t> bucket_t;
+ typedef KHashSet<bucket_t, KHashCachedHash<bucket_t, khint_t>, KHashCachedEq<bucket_t, Eq>, khint_t> hashset_t;
+public:
+ khint_t get(const KType &key) const {
+ bucket_t t = { key, VType(), Hash()(key) };
+ return hashset_t::get(t);
+ }
+ khint_t put(const KType &key, int *absent) {
+ bucket_t t = { key, VType(), Hash()(key) };
+ return hashset_t::put(t, absent);
+ }
+ inline KType &key(khint_t i) { return hashset_t::key(i).key; }
+ inline VType &value(khint_t i) { return hashset_t::key(i).val; }
+ inline VType &operator[] (const KType &key) {
+ bucket_t t = { key, VType(), Hash()(key) };
+ return value(hashset_t::put(t));
+ }
+};
+
+}
+
+#endif /* __AC_KHASHL_HPP */
=====================================
tag_git.sh
=====================================
@@ -3,7 +3,7 @@ git checkout master
ver=$(fgrep '#define GCLIB_VERSION ' GBase.h)
ver=${ver#*\"}
ver=${ver%%\"*}
-git fetch --tags
+#git fetch --tags
if [[ "$1" == "delete" || "$1" == "del" ]]; then
echo "Deleting tag v$ver .."
git tag -d v$ver
=====================================
xxhash.h
=====================================
The diff for this file was not included because it is too large.
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/fe459c031c0e0ad141be34d228d2e0af9594903e
--
View it on GitLab: https://salsa.debian.org/med-team/libgclib/-/commit/fe459c031c0e0ad141be34d228d2e0af9594903e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201026/102bec19/attachment-0001.html>
More information about the debian-med-commit
mailing list