[med-svn] [Git][med-team/mash][upstream] New upstream version 2.3+dfsg

Andreas Tille (@tille) gitlab at salsa.debian.org
Tue Oct 12 08:29:52 BST 2021



Andreas Tille pushed to branch upstream at Debian Med / mash


Commits:
87a2ce0e by Andreas Tille at 2021-10-12T09:00:39+02:00
New upstream version 2.3+dfsg
- - - - -


24 changed files:

- LICENSE.txt
- Makefile.in
- + doc/man/mash-dist.1
- + doc/man/mash-info.1
- + doc/man/mash-paste.1
- + doc/man/mash-screen.1
- + doc/man/mash-sketch.1
- + doc/man/mash-triangle.1
- + doc/man/mash.1
- doc/sphinx/index.rst
- src/mash/CommandFind.cpp
- src/mash/CommandList.cpp
- src/mash/CommandScreen.cpp
- src/mash/CommandScreen.h
- + src/mash/CommandTaxScreen.cpp
- + src/mash/CommandTaxScreen.h
- src/mash/HashSet.cpp
- src/mash/HashSet.h
- src/mash/Sketch.cpp
- src/mash/Sketch.h
- src/mash/mash.cpp
- + src/mash/robin_hood.h
- + src/mash/taxdb.hpp
- src/mash/version.h


Changes:

=====================================
LICENSE.txt
=====================================
@@ -17,6 +17,10 @@ Open Bloom Filter
   https://code.google.com/p/bloom/source/browse/trunk/bloom_filter.hpp
   Common Public License
 
+Robin_Hood Unordered Map and Set
+  https://github.com/martinus/robin-hood-hashing
+  MIT License
+
 COPYRIGHT LICENSE
 
 Copyright © 2015, Battelle National Biodefense Institute (BNBI);


=====================================
Makefile.in
=====================================
@@ -15,6 +15,7 @@ SOURCES=\
 	src/mash/CommandBounds.cpp \
 	src/mash/CommandContain.cpp \
 	src/mash/CommandDistance.cpp \
+	src/mash/CommandTaxScreen.cpp \
 	src/mash/CommandScreen.cpp \
 	src/mash/CommandTriangle.cpp \
 	src/mash/CommandFind.cpp \
@@ -57,7 +58,12 @@ src/mash/memcpyWrap.o : src/mash/memcpyWrap.c
 src/mash/capnp/MinHash.capnp.c++ src/mash/capnp/MinHash.capnp.h : src/mash/capnp/MinHash.capnp
 	cd src/mash/capnp;export PATH=@capnp@/bin/:${PATH};capnp compile -I @capnp@/include -oc++ MinHash.capnp
 
-install : mash
+.PHONY: install-man install
+install-man:
+	mkdir -p @prefix@/share/man/man1
+	cp `pwd`/doc/man/*.1 @prefix@/share/man/man1
+
+install : mash install-man
 	mkdir -p @prefix@/bin/
 	mkdir -p @prefix@/lib/
 	mkdir -p @prefix@/include/
@@ -68,12 +74,15 @@ install : mash
 	cp `pwd`/src/mash/*.h @prefix@/include/mash/
 	cp `pwd`/src/mash/capnp/MinHash.capnp.h @prefix@/include/mash/capnp/
 
-.PHONY: uninstall
-uninstall:
+.PHONY: uninstall uninstall-man
+uninstall: uninstall-man
 	rm -f @prefix@/bin/mash
 	rm -f @prefix@/lib/libmash.a
 	rm -rf @prefix@/include/mash
 
+uninstall-man:
+	rm -f @prefix@/share/man/man1/mash*.1
+
 clean :
 	-rm mash
 	-rm libmash.a


=====================================
doc/man/mash-dist.1
=====================================
@@ -0,0 +1,162 @@
+'\" t
+.\"     Title: mash-dist
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH\-DIST" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash\-dist \- estimate the distance of query sequences to references
+.SH "SYNOPSIS"
+.sp
+\fBmash dist\fP [options] <reference> <query> [<query>] ...
+.SH "DESCRIPTION"
+.sp
+Estimate the distance of each query sequence to the reference. Both the
+reference and queries can be fasta or fastq, gzipped or not, or Mash sketch
+files (.msh) with matching k\-mer sizes. Query files can also be files of file
+names (see \fB\-l\fP). Whole files are compared by default (see \fB\-i\fP). The output
+fields are [reference\-ID, query\-ID, distance, p\-value, shared\-hashes].
+.SH "OPTIONS"
+.sp
+\fB\-h\fP
+.RS 4
+Help
+.RE
+.sp
+\fB\-p\fP <int>
+.RS 4
+Parallelism. This many threads will be spawned for processing. [1]
+.RE
+.SS "Input"
+.sp
+\fB\-l\fP
+.RS 4
+List input. Each query file contains a list of sequence files, one
+per line. The reference file is not affected.
+.RE
+.SS "Output"
+.sp
+\fB\-t\fP
+.RS 4
+Table output (will not report p\-values, but fields will be blank if
+they do not meet the p\-value threshold).
+.RE
+.sp
+\fB\-v\fP <num>
+.RS 4
+Maximum p\-value to report. (0\-1) [1.0]
+.RE
+.sp
+\fB\-d\fP <num>
+.RS 4
+Maximum distance to report. (0\-1) [1.0]
+.RE
+.SS "Sketching"
+.sp
+\fB\-k\fP <int>
+.RS 4
+K\-mer size. Hashes will be based on strings of this many
+nucleotides. Canonical nucleotides are used by default (see
+Alphabet options below). (1\-32) [21]
+.RE
+.sp
+\fB\-s\fP <int>
+.RS 4
+Sketch size. Each sketch will have at most this many non\-redundant
+min\-hashes. [1000]
+.RE
+.sp
+\fB\-i\fP
+.RS 4
+Sketch individual sequences, rather than whole files.
+.RE
+.sp
+\fB\-w\fP <num>
+.RS 4
+Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
+.RE
+.sp
+\fB\-r\fP
+.RS 4
+Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
+.RE
+.SS "Sketching (reads)"
+.sp
+\fB\-b\fP <size>
+.RS 4
+Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
+filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
+uses too much memory. However, some unique k\-mers may pass
+erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
+.RE
+.sp
+\fB\-m\fP <int>
+.RS 4
+Minimum copies of each k\-mer required to pass noise filter for
+reads. Implies \fB\-r\fP. [1]
+.RE
+.sp
+\fB\-c\fP <num>
+.RS 4
+Target coverage. Sketching will conclude if this coverage is
+reached before the end of the input file (estimated by average
+k\-mer multiplicity). Implies \fB\-r\fP.
+.RE
+.sp
+\fB\-g\fP <size>
+.RS 4
+Genome size. If specified, will be used for p\-value calculation
+instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
+.RE
+.SS "Sketching (alphabet)"
+.sp
+\fB\-n\fP
+.RS 4
+Preserve strand (by default, strand is ignored by using canonical
+DNA k\-mers, which are alphabetical minima of forward\-reverse
+pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
+.RE
+.sp
+\fB\-a\fP
+.RS 4
+Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
+.RE
+.sp
+\fB\-z\fP <text>
+.RS 4
+Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
+K\-mers with other characters will be ignored. Implies \fB\-n\fP.
+.RE
+.sp
+\fB\-Z\fP
+.RS 4
+Preserve case in k\-mers and alphabet (case is ignored by default).
+Sequence letters whose case is not in the current alphabet will be
+skipped when sketching.
+.RE
+.SH "SEE ALSO"
+.sp
+mash(1)
\ No newline at end of file


=====================================
doc/man/mash-info.1
=====================================
@@ -0,0 +1,69 @@
+'\" t
+.\"     Title: mash-info
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH\-INFO" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash\-info \- display information about sketch files
+.SH "SYNOPSIS"
+.sp
+\fBmash info\fP [options] <sketch>
+.SH "DESCRIPTION"
+.sp
+Displays information about sketch files.
+.SH "OPTIONS"
+.sp
+\fB\-h\fP
+.RS 4
+Help
+.RE
+.sp
+\fB\-H\fP
+.RS 4
+Only show header info. Do not list each sketch. Incompatible with \fB\-t\fP
+and \fB\-c\fP.
+.RE
+.sp
+\fB\-t\fP
+.RS 4
+Tabular output (rather than padded), with no header. Incompatible with
+\fB\-H\fP and \fB\-c\fP.
+.RE
+.sp
+\fB\-c\fP
+.RS 4
+Show hash count histograms for each sketch. Incompatible with \fB\-H\fP and
+\fB\-t\fP.
+.RE
+.sp
+\fB\-d\fP
+.RS 4
+Dump sketches in JSON format. Incompatible with \fB\-H\fP, \fB\-t\fP, and \fB\-c\fP.
+.RE
+.SH "SEE ALSO"
+.sp
+mash(1)
\ No newline at end of file


=====================================
doc/man/mash-paste.1
=====================================
@@ -0,0 +1,51 @@
+'\" t
+.\"     Title: mash-paste
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH\-PASTE" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash\-paste \- create a single sketch file from multiple sketch files
+.SH "SYNOPSIS"
+.sp
+\fBmash paste\fP [options] <out_prefix> <sketch> [<sketch>] ...
+.SH "DESCRIPTION"
+.sp
+Create a single sketch file from multiple sketch files.
+.SH "OPTIONS"
+.sp
+\fB\-h\fP
+.RS 4
+Help
+.RE
+.sp
+\fB\-l\fP
+.RS 4
+Input files are lists of file names.
+.RE
+.SH "SEE ALSO"
+.sp
+mash(1)
\ No newline at end of file


=====================================
doc/man/mash-screen.1
=====================================
@@ -0,0 +1,81 @@
+'\" t
+.\"     Title: mash-screen
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH\-SCREEN" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash\-screen \- determine whether query sequences are within a larger pool of sequences
+.SH "SYNOPSIS"
+.sp
+\fBmash screen\fP [options] <queries>.msh <pool> [<pool>] ...
+.SH "DESCRIPTION"
+.sp
+Determine how well query sequences are contained within a pool of sequences.
+The queries must be formatted as a single Mash sketch file (.msh), created
+with the \f(CRmash sketch\fP command. The <pool> files can be contigs or reads, in
+fasta or fastq, gzipped or not, and "\-" can be given for <pool> to read from
+standard input. The <pool> sequences are assumed to be nucleotides, and will
+be 6\-frame translated if the <queries> are amino acids. The output fields are
+[identity, shared\-hashes, median\-multiplicity, p\-value, query\-ID, query\-comment],
+where median\-multiplicity is computed for shared hashes, based on the number of
+observations of those hashes within the pool.
+.SH "OPTIONS"
+.sp
+\fB\-h\fP
+.RS 4
+Help
+.RE
+.sp
+\fB\-p\fP <int>
+.RS 4
+Parallelism. This many threads will be spawned for processing.
+.RE
+.sp
+\fB\-w\fP
+.RS 4
+Winner\-takes\-all strategy for identity estimates. After counting
+hashes for each query, hashes that appear in multiple queries will
+be removed from all except the one with the best identity (ties
+broken by larger query), and other identities will be reduced. This
+removes output redundancy, providing a rough compositional outline.
+.RE
+.SS "Output"
+.sp
+\fB\-i\fP <num>
+.RS 4
+Minimum identity to report. Inclusive unless set to zero, in which
+case only identities greater than zero (i.e. with at least one
+shared hash) will be reported. Set to \-1 to output everything.
+.RE
+.sp
+\fB\-v\fP <num>
+.RS 4
+Maximum p\-value to report.
+.RE
+.SH "SEE ALSO"
+.sp
+mash(1)
\ No newline at end of file


=====================================
doc/man/mash-sketch.1
=====================================
@@ -0,0 +1,154 @@
+'\" t
+.\"     Title: mash-sketch
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH\-SKETCH" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash\-sketch \- create sketches (reduced representations for fast operations)
+.SH "SYNOPSIS"
+.sp
+\fBmash sketch\fP [options] fast(a|q)[.gz] ...
+.SH "DESCRIPTION"
+.sp
+Create a sketch file, which is a reduced representation of a sequence or set
+of sequences (based on min\-hashes) that can be used for fast distance
+estimations. Input can be fasta or fastq files (gzipped or not), and "\-" can
+be given to read from standard input. Input files can also be files of file
+names (see \fB\-l\fP). For output, one sketch file will be generated, but it can have
+multiple sketches within it, divided by sequences or files (see \fB\-i\fP). By
+default, the output file name will be the first input file with a \(aq.msh\(aq
+extension, or \(aqstdin.msh\(aq if standard input is used (see \fB\-o\fP).
+.SH "OPTIONS"
+.sp
+\fB\-h\fP
+.RS 4
+Help
+.RE
+.sp
+\fB\-p\fP <int>
+.RS 4
+Parallelism. This many threads will be spawned for processing. [1]
+.RE
+.SS "Input"
+.sp
+\fB\-l\fP
+.RS 4
+List input. Each file contains a list of sequence files, one per line.
+.RE
+.SS "Output"
+.sp
+\fB\-o\fP <path>
+.RS 4
+Output prefix (first input file used if unspecified). The suffix
+\(aq.msh\(aq will be appended.
+.RE
+.SS "Sketching"
+.sp
+\fB\-k\fP <int>
+.RS 4
+K\-mer size. Hashes will be based on strings of this many
+nucleotides. Canonical nucleotides are used by default (see
+Alphabet options below). (1\-32) [21]
+.RE
+.sp
+\fB\-s\fP <int>
+.RS 4
+Sketch size. Each sketch will have at most this many non\-redundant
+min\-hashes. [1000]
+.RE
+.sp
+\fB\-i\fP
+.RS 4
+Sketch individual sequences, rather than whole files.
+.RE
+.sp
+\fB\-w\fP <num>
+.RS 4
+Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
+.RE
+.sp
+\fB\-r\fP
+.RS 4
+Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
+.RE
+.SS "Sketching (reads)"
+.sp
+\fB\-b\fP <size>
+.RS 4
+Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
+filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
+uses too much memory. However, some unique k\-mers may pass
+erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
+.RE
+.sp
+\fB\-m\fP <int>
+.RS 4
+Minimum copies of each k\-mer required to pass noise filter for
+reads. Implies \fB\-r\fP. [1]
+.RE
+.sp
+\fB\-c\fP <num>
+.RS 4
+Target coverage. Sketching will conclude if this coverage is
+reached before the end of the input file (estimated by average
+k\-mer multiplicity). Implies \fB\-r\fP.
+.RE
+.sp
+\fB\-g\fP <size>
+.RS 4
+Genome size. If specified, will be used for p\-value calculation
+instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
+.RE
+.SS "Sketching (alphabet)"
+.sp
+\fB\-n\fP
+.RS 4
+Preserve strand (by default, strand is ignored by using canonical
+DNA k\-mers, which are alphabetical minima of forward\-reverse
+pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
+.RE
+.sp
+\fB\-a\fP
+.RS 4
+Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
+.RE
+.sp
+\fB\-z\fP <text>
+.RS 4
+Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
+K\-mers with other characters will be ignored. Implies \fB\-n\fP.
+.RE
+.sp
+\fB\-Z\fP
+.RS 4
+Preserve case in k\-mers and alphabet (case is ignored by default).
+Sequence letters whose case is not in the current alphabet will be
+skipped when sketching.
+.RE
+.SH "SEE ALSO"
+.sp
+mash(1)
\ No newline at end of file


=====================================
doc/man/mash-triangle.1
=====================================
@@ -0,0 +1,169 @@
+'\" t
+.\"     Title: mash-triangle
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH\-TRIANGLE" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash\-triangle \- estimate a lower\-triangular distance matrix
+.SH "SYNOPSIS"
+.sp
+\fBmash triangle\fP [options] <seq1> [<seq2>] ...
+.SH "DESCRIPTION"
+.sp
+Estimate the distance of each input sequence to every other input
+sequence.  Outputs a lower\-triangular distance matrix in relaxed Phylip
+format. The input sequences can be fasta or fastq, gzipped or not, or
+Mash sketch files (.msh) with matching k\-mer sizes. Input files can also
+be files of file names (see \-l). If more than one input file is provided,
+whole files are compared by default (see \-i).
+.SH "OPTIONS"
+.sp
+\fB\-h\fP
+.RS 4
+Help
+.RE
+.sp
+\fB\-p\fP <int>
+.RS 4
+Parallelism. This many threads will be spawned for processing. [1]
+.RE
+.SS "Input"
+.sp
+\fB\-l\fP
+.RS 4
+List input. Each query file contains a list of sequence files, one
+per line. The reference file is not affected.
+.RE
+.SS "Output"
+.sp
+\fB\-C\fP
+.RS 4
+Use comment fields for sequence names instead of IDs.
+.RE
+.sp
+\fB\-E\fP
+.RS 4
+Output edge list instead of Phylip matrix, with fields [seq1, seq2,
+dist, p\-val, shared\-hashes].
+.RE
+.sp
+\fB\-v\fP <num>
+.RS 4
+Maximum p\-value to report in edge list. Implies \-E. (0\-1) [1.0]
+.RE
+.sp
+\fB\-d\fP <num>
+.RS 4
+Maximum distance to report in edge list. Implies \-E. (0\-1) [1.0]
+.RE
+.SS "Sketching"
+.sp
+\fB\-k\fP <int>
+.RS 4
+K\-mer size. Hashes will be based on strings of this many
+nucleotides. Canonical nucleotides are used by default (see
+Alphabet options below). (1\-32) [21]
+.RE
+.sp
+\fB\-s\fP <int>
+.RS 4
+Sketch size. Each sketch will have at most this many non\-redundant
+min\-hashes. [1000]
+.RE
+.sp
+\fB\-i\fP
+.RS 4
+Sketch individual sequences, rather than whole files, e.g. for
+multi\-fastas of single\-chromosome genomes or pair\-wise gene comparisons.
+.RE
+.sp
+\fB\-w\fP <num>
+.RS 4
+Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
+.RE
+.sp
+\fB\-r\fP
+.RS 4
+Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
+.RE
+.SS "Sketching (reads)"
+.sp
+\fB\-b\fP <size>
+.RS 4
+Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
+filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
+uses too much memory. However, some unique k\-mers may pass
+erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
+.RE
+.sp
+\fB\-m\fP <int>
+.RS 4
+Minimum copies of each k\-mer required to pass noise filter for
+reads. Implies \fB\-r\fP. [1]
+.RE
+.sp
+\fB\-c\fP <num>
+.RS 4
+Target coverage. Sketching will conclude if this coverage is
+reached before the end of the input file (estimated by average
+k\-mer multiplicity). Implies \fB\-r\fP.
+.RE
+.sp
+\fB\-g\fP <size>
+.RS 4
+Genome size. If specified, will be used for p\-value calculation
+instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
+.RE
+.SS "Sketching (alphabet)"
+.sp
+\fB\-n\fP
+.RS 4
+Preserve strand (by default, strand is ignored by using canonical
+DNA k\-mers, which are alphabetical minima of forward\-reverse
+pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
+.RE
+.sp
+\fB\-a\fP
+.RS 4
+Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
+.RE
+.sp
+\fB\-z\fP <text>
+.RS 4
+Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
+K\-mers with other characters will be ignored. Implies \fB\-n\fP.
+.RE
+.sp
+\fB\-Z\fP
+.RS 4
+Preserve case in k\-mers and alphabet (case is ignored by default).
+Sequence letters whose case is not in the current alphabet will be
+skipped when sketching.
+.RE
+.SH "SEE ALSO"
+.sp
+mash(1)
\ No newline at end of file


=====================================
doc/man/mash.1
=====================================
@@ -0,0 +1,77 @@
+'\" t
+.\"     Title: mash
+.\"    Author: [see the "AUTHOR(S)" section]
+.\" Generator: Asciidoctor 2.0.10
+.\"      Date: 2019-12-13
+.\"    Manual: \ \&
+.\"    Source: \ \&
+.\"  Language: English
+.\"
+.TH "MASH" "1" "2019-12-13" "\ \&" "\ \&"
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.ss \n[.ss] 0
+.nh
+.ad l
+.de URL
+\fI\\$2\fP <\\$1>\\$3
+..
+.als MTO URL
+.if \n[.g] \{\
+.  mso www.tmac
+.  am URL
+.    ad l
+.  .
+.  am MTO
+.    ad l
+.  .
+.  LINKSTYLE blue R < >
+.\}
+.SH "NAME"
+mash \- fast genome and metagenome distance estimation using MinHash
+.SH "SYNOPSIS"
+.sp
+\fBmash\fP <command> [options] [arguments ...]
+.SH "DESCRIPTION"
+.sp
+\fBmash\fP is the main executable for the \fBMash\fP software. The actual
+functionality is provided by the subtools (\(aqcommands\(aq):
+.SS "Commands"
+.sp
+\fBbounds\fP
+.RS 4
+Print a table of Mash error bounds.
+.RE
+.sp
+\fBdist\fP
+.RS 4
+Estimate the distance of query sequences to references.
+.RE
+.sp
+\fBinfo\fP
+.RS 4
+Display information about sketch files.
+.RE
+.sp
+\fBpaste\fP
+.RS 4
+Create a single sketch file from multiple sketch files.
+.RE
+.sp
+\fBscreen\fP
+.RS 4
+Determine whether query sequences are within a larger pool of sequences.
+.RE
+.sp
+\fBsketch\fP
+.RS 4
+Create sketches (reduced representations for fast operations).
+.RE
+.sp
+\fBtriangle\fP
+.RS 4
+Estimate a lower\-triangular distance matrix.
+.RE
+.SH "SEE ALSO"
+.sp
+mash\-dist(1), mash\-info(1), mash\-paste(1), mash\-screen(1), mash\-sketch(1), mash\-triangle(1)
\ No newline at end of file


=====================================
doc/sphinx/index.rst
=====================================
@@ -13,11 +13,13 @@ Fast genome and metagenome distance estimation using MinHash
 
 |
 
-Publication
-===========
+Publications
+============
 `Mash: fast genome and metagenome distance estimation using MinHash. Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM. Genome Biol. 2016 Jun 20;17(1):132. doi: 10.1186/s13059-016-0997-x. <http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0997-x>`_
 
-`Mash Screen: High-throughput sequence containment estimation for genome discovery. Ondov BD, Starrett GJ, Sappington A, Kostic A, Koren S, Buck CB, Phillippy AM. BioRxiv. 2019 Mar. doi: 10.1101/557314 <https://doi.org/10.1101/557314>`_
+`Mash Screen: high-throughput sequence containment estimation for genome discovery.
+Ondov BD, Starrett GJ, Sappington A, Kostic A, Koren S, Buck CB, Phillippy AM.
+Genome Biol. 2019 Nov 5;20(1):232. doi: 10.1186/s13059-019-1841-x. <https://doi.org/10.1186/s13059-019-1841-x>`_
 
 .. toctree::
    :maxdepth: 1


=====================================
src/mash/CommandFind.cpp
=====================================
@@ -10,7 +10,7 @@
 #include "kseq.h"
 #include <iostream>
 #include <set>
-#include <unordered_set>
+#include "robin_hood.h"
 #include "ThreadPool.h"
 #include "sketchParameterSetup.h"
 
@@ -229,11 +229,9 @@ CommandFind::FindOutput * find(CommandFind::FindInput * data)
 
 void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput * output, bool minusStrand)
 {
-    typedef std::unordered_map < uint32_t, std::set<uint32_t> > PositionsBySequence_umap;
-    
     bool verbose = false;
     
-    Sketch::Hash_set minHashes;
+    robin_hood::unordered_set<Sketch::hash_t> minHashes;
     
     const Sketch & sketch = input->sketch;
     int kmerSize = sketch.getKmerSize();
@@ -302,9 +300,9 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
     // get sorted lists of positions, per reference sequence, that have
     // mutual min-hashes with the query
     //
-    PositionsBySequence_umap hits;
-    //
-    for ( Sketch::Hash_set::const_iterator i = minHashes.begin(); i != minHashes.end(); i++ )
+    robin_hood::unordered_map < uint32_t, std::set<uint32_t> >   hits;
+
+    for ( auto i = minHashes.begin(); i != minHashes.end(); i++ )
     {
         Sketch::hash_t hash = *i;
         
@@ -326,7 +324,7 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
         }
     }
     
-    for ( PositionsBySequence_umap::iterator i = hits.begin(); i != hits.end(); i++ )
+    for ( auto i = hits.begin(); i != hits.end(); i++ )
     {
     	using std::set;
     	


=====================================
src/mash/CommandList.cpp
=====================================
@@ -118,6 +118,10 @@ MurmurHash3\n\
 Open Bloom Filter\n\
   https://code.google.com/p/bloom/source/browse/trunk/bloom_filter.hpp\n\
   Common Public License\n\
+\n\
+Robin_Hood Unordered Map and Set\n\
+  https://github.com/martinus/robin-hood-hashing\n\
+  MIT License\n\
 \n";
 #ifdef DIST_LICENSE
 	cout << "\n\


=====================================
src/mash/CommandScreen.cpp
=====================================
@@ -12,7 +12,7 @@
 #include <zlib.h>
 #include "ThreadPool.h"
 #include <math.h>
-#include <set>
+#include "robin_hood.h"
 
 #ifdef USE_BOOST
 	#include <boost/math/distributions/binomial.hpp>
@@ -29,8 +29,6 @@ using std::cout;
 using std::endl;
 using std::list;
 using std::string;
-using std::unordered_map;
-using std::unordered_set;
 using std::vector;
 
 namespace mash {
@@ -93,8 +91,8 @@ int CommandScreen::run() const
 	parameters.minHashesPerWindow = sketch.getMinHashesPerWindow();
 	
 	HashTable hashTable;
-	unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
-	unordered_map<uint64_t, list<uint32_t> > saturationByIndex;
+	robin_hood::unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
+	robin_hood::unordered_map<uint64_t, list<uint32_t> > saturationByIndex;
 	
 	cerr << "Loading " << arguments[0] << "..." << endl;
 	
@@ -117,7 +115,7 @@ int CommandScreen::run() const
 	
 	cerr << "   " << hashTable.size() << " distinct hashes." << endl;
 	
-	unordered_set<MinHashHeap *> minHashHeaps;
+	robin_hood::unordered_set<MinHashHeap *> minHashHeaps;
 	
 	bool trans = (alphabet == alphabetProtein);
 	
@@ -289,7 +287,7 @@ int CommandScreen::run() const
 	
 	MinHashHeap minHashHeap(sketch.getUse64(), sketch.getMinHashesPerWindow());
 	
-	for ( unordered_set<MinHashHeap *>::const_iterator i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
+	for ( auto i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
 	{
 		HashList hashList(parameters.use64);
 		
@@ -337,13 +335,13 @@ int CommandScreen::run() const
 	
 	memset(shared, 0, sizeof(uint64_t) * sketch.getReferenceCount());
 	
-	for ( unordered_map<uint64_t, std::atomic<uint32_t> >::const_iterator i = hashCounts.begin(); i != hashCounts.end(); i++ )
+	for ( auto i = hashCounts.begin(); i != hashCounts.end(); i++ )
 	{
 		if ( i->second >= minCov )
 		{
-			const unordered_set<uint64_t> & indeces = hashTable.at(i->first);
+			const auto & indeces = hashTable.at(i->first);
 
-			for ( unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
+			for ( auto k = indeces.begin(); k != indeces.end(); k++ )
 			{
 				shared[*k]++;
 				depths[*k].push_back(i->second);
@@ -381,12 +379,12 @@ int CommandScreen::run() const
 				continue;
 			}
 			
-			const unordered_set<uint64_t> & indeces = i->second;
+			const auto & indeces = i->second;
 			double maxScore = 0;
 			uint64_t maxLength = 0;
 			uint64_t maxIndex;
 			
-			for ( unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
+			for ( auto k = indeces.begin(); k != indeces.end(); k++ )
 			{
 				if ( scores[*k] > maxScore )
 				{
@@ -456,6 +454,7 @@ int CommandScreen::run() const
 		}
 	}
 	
+	delete [] depths;
 	delete [] shared;
 	
 	return 0;
@@ -809,7 +808,7 @@ char aaFromCodon(const char * codon)
 	return aa;//(aa == '*') ? 0 : aa;
 }
 
-void useThreadOutput(CommandScreen::HashOutput * output, unordered_set<MinHashHeap *> & minHashHeaps)
+void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps)
 {
 	minHashHeaps.emplace(output->minHashHeap);
 	delete output;


=====================================
src/mash/CommandScreen.h
=====================================
@@ -13,8 +13,7 @@
 #include <string>
 #include <vector>
 #include <atomic>
-#include <unordered_set>
-#include <unordered_map>
+#include "robin_hood.h"
 #include "MinHashHeap.h"
 
 namespace mash {
@@ -24,13 +23,13 @@ struct HashTableEntry
 	HashTableEntry() : count(0) {}
 	
 	uint32_t count;
-	std::unordered_set<uint64_t> indices;
+	robin_hood::unordered_set<uint64_t> indices;
 };
 
-//typedef std::unordered_map< uint64_t, HashTableEntry > HashTable;
-typedef std::unordered_map< uint64_t, std::unordered_set<uint64_t> > HashTable;
+//typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
+typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;
 
-static const std::unordered_map< std::string, char > codons =
+static const robin_hood::unordered_map< std::string, char > codons =
 {
 	{"AAA",	'K'},
 	{"AAC",	'N'},
@@ -104,7 +103,7 @@ public:
     
     struct HashInput
     {
-    	HashInput(std::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
+    	HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
     	:
     	hashCounts(hashCountsNew),
     	minHashHeap(minHashHeapNew),
@@ -129,7 +128,7 @@ public:
     	bool trans;
     	
     	Sketch::Parameters parameters;
-		std::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
+		robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
 		MinHashHeap * minHashHeap;
     };
     
@@ -165,7 +164,7 @@ double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double km
 CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
 double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
 void translate(const char * src, char * dst, uint64_t len);
-void useThreadOutput(CommandScreen::HashOutput * output, std::unordered_set<MinHashHeap *> & minHashHeaps);
+void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);
 
 } // namespace mash
 


=====================================
src/mash/CommandTaxScreen.cpp
=====================================
@@ -0,0 +1,486 @@
+// Copyright © 2015, Battelle National Biodefense Institute (BNBI);
+// all rights reserved. Authored by: Brian Ondov, Todd Treangen,
+// Sergey Koren, and Adam Phillippy
+//
+// See the LICENSE.txt file included with this software for license information.
+
+#include "CommandTaxScreen.h"
+#include "CommandDistance.h" // for pvalue
+#include "Sketch.h"
+#include "kseq.h"
+#include "taxdb.hpp"
+#include <iostream>
+#include <zlib.h>
+#include "ThreadPool.h"
+#include <math.h>
+#include <set>
+
+#ifdef USE_BOOST
+	#include <boost/math/distributions/binomial.hpp>
+	using namespace::boost::math;
+#else
+	#include <gsl/gsl_cdf.h>
+#endif
+
+#define SET_BINARY_MODE(file)
+KSEQ_INIT(gzFile, gzread)
+
+using std::ifstream;
+using std::stringstream;
+
+namespace mash {
+
+inline bool file_exists (const std::string& name) {
+    ifstream f(name.c_str());
+    return f.good();
+}
+
+
+CommandTaxScreen::CommandTaxScreen()
+: Command()
+{
+	name = "taxscreen";
+	summary = "Create Kraken-style taxonomic report based on mash screen.";
+	description = "Create Kraken-style taxonomic report based on how well query sequences are contained within a pool of sequences. The queries must be formatted as a single Mash sketch file (.msh), created with the `mash sketch` command. The <pool> files can be contigs or reads, in fasta or fastq, gzipped or not, and \"-\" can be given for <pool> to read from standard input. The <pool> sequences are assumed to be nucleotides, and will be 6-frame translated if the <queries> are amino acids. The output fields are [total percent of hashes, number of contained hashes in the clade, number of contained hashes in the taxon, total number of hashes in the clade, total number of hashes in the taxon, rank, taxonomy ID, padded name].";
+    argumentString = "<queries>.msh <pool> [<pool>] ...";
+
+	useOption("help");
+	useOption("threads");
+//	useOption("minCov");
+//    addOption("saturation", Option(Option::Boolean, "s", "", "Include saturation curve in output. Each line will have an additional field representing the absolute number of k-mers seen at each Jaccard increase, formatted as a comma-separated list.", ""));
+    addOption("identity", Option(Option::Number, "i", "Output", "Minimum identity to report. Inclusive unless set to zero, in which case only identities greater than zero (i.e. with at least one shared hash) will be reported. Set to -1 to output everything.", "0", -1., 1.));
+    addOption("pvalue", Option(Option::Number, "v", "Output", "Maximum p-value to report.", "1.0", 0., 1.));
+	addOption("mapping-file", Option(Option::String, "m", "", "Mapping file from reference name to taxonomy ID", ""));
+	addOption("taxonomy-dir", Option(Option::String, "t", "", "Directory containing NCBI taxonomy dump", "."));
+}
+
+int CommandTaxScreen::run() const
+{
+	if ( arguments.size() < 2 || options.at("help").active )
+	{
+		print();
+		return 0;
+	}
+
+	if ( ! hasSuffix(arguments[0], suffixSketch) )
+	{
+		cerr << "ERROR: " << arguments[0] << " does not look like a sketch (.msh)" << endl;
+		exit(1);
+	}
+
+	bool sat = false;//options.at("saturation").active;
+
+    double pValueMax = options.at("pvalue").getArgumentAsNumber();
+    double identityMin = options.at("identity").getArgumentAsNumber();
+    string taxonomyDir = options.at("taxonomy-dir").argument;
+    string mappingFileName = options.at("mapping-file").argument;
+
+    vector<string> refArgVector;
+    refArgVector.push_back(arguments[0]);
+
+	Sketch sketch;
+    Sketch::Parameters parameters;
+
+    sketch.initFromFiles(refArgVector, parameters);
+
+    string alphabet;
+    sketch.getAlphabetAsString(alphabet);
+    setAlphabetFromString(parameters, alphabet.c_str());
+
+	parameters.parallelism = options.at("threads").getArgumentAsNumber();
+	parameters.kmerSize = sketch.getKmerSize();
+	parameters.noncanonical = sketch.getNoncanonical();
+	parameters.use64 = sketch.getUse64();
+	parameters.preserveCase = sketch.getPreserveCase();
+	parameters.seed = sketch.getHashSeed();
+	parameters.minHashesPerWindow = sketch.getMinHashesPerWindow();
+
+	HashTable hashTable;
+	robin_hood::unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
+	unordered_map<uint64_t, TaxID> hashTaxIDs;
+	unordered_map<uint64_t, list<uint32_t> > saturationByIndex;
+
+	string namesDumpFile = taxonomyDir + "/names.dmp";
+	string nodesDumpFile = taxonomyDir + "/nodes.dmp";
+	if (!file_exists(namesDumpFile) || !file_exists(nodesDumpFile)) {
+		cerr << "Could not find a file names.dmp or nodes.dmp in directory " << taxonomyDir << "\n" 
+		     << " To download the required taxonomy files into the current directory, use the following commands:\n"
+			 << "   wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz\n"
+			 << "   tar xvvf taxdump.tar.gz\n"
+			 << endl;
+		exit(1);
+
+	}
+	cerr << "Loading taxonomy files ..." << endl;
+	TaxDB taxdb(namesDumpFile, nodesDumpFile); 
+
+	cerr << "Reading mapping file ..." << endl;
+	// Read mapping file - not implemented in favor of specifying the taxonomy ID in the reference comment
+	vector<TaxID> referenceTaxIDs(sketch.getReferenceCount(), 0);
+	if (mappingFileName != "") {
+	  	std::ifstream mappingFile(mappingFileName);
+	  	if (!mappingFile.is_open())
+	    	throw std::runtime_error("unable to open mapping file");
+
+		string referenceID;
+		TaxID taxID;
+		unordered_map<string, TaxID> refTaxMap;
+	  	while (mappingFile >> taxID) 
+		{
+			mappingFile.ignore(1);
+			getline(mappingFile, referenceID, '\n');
+			refTaxMap.emplace(referenceID, taxID);
+	  	}
+		for ( int i = 0; i < sketch.getReferenceCount(); i ++ )
+		{
+			auto const it = refTaxMap.find(sketch.getReference(i).name);
+			if (it == refTaxMap.end()) {
+				// No warning? Could still be mapped based on comment
+				//cerr << "Could not find taxID for reference " << sketch.getReference(i).name << endl;
+			} else {
+				referenceTaxIDs[i] = it->second;
+			}
+		}	
+	} 
+	for ( int i = 0; i < sketch.getReferenceCount(); i ++ )
+	{
+		string word;
+		TaxID taxID = referenceTaxIDs[i];
+		if (taxID == 0) 
+		{
+			stringstream comment_stream(sketch.getReference(i).comment);
+			while (comment_stream >> word) {
+				if (word == "taxid") {
+					comment_stream >> taxID;
+				}
+			}
+		}
+		if (taxID == 0) {
+			cerr << "Could not find taxID for reference " << sketch.getReference(i).name << " in comment field or mapping file!" << endl;
+		} else {
+			//cerr << "Got taxID " << taxID << " for reference " << sketch.getReference(i).name << endl;
+			referenceTaxIDs[i] = taxID;
+		}
+	}
+	
+
+	cerr << "Loading " << arguments[0] << "..." << endl;
+
+	// for each reference
+	for ( int i = 0; i < sketch.getReferenceCount(); i++ )
+	{
+		const HashList & hashes = sketch.getReference(i).hashesSorted;
+
+        // for each hash a reference has
+		for ( int j = 0; j < hashes.size(); j++ )
+		{
+			uint64_t hash = hashes.get64() ? hashes.at(j).hash64 : hashes.at(j).hash32;
+
+			if ( hashTable.count(hash) == 0 )
+			{
+			    // records the counts for each hash
+				hashCounts[hash] = 0;
+			}
+
+			// save the reference ID for the hash
+			hashTable[hash].insert(i);
+		}
+	}
+
+	cerr << "   " << hashTable.size() << " distinct hashes." << endl;
+
+	robin_hood::unordered_set<MinHashHeap *> minHashHeaps;
+
+	bool trans = (alphabet == alphabetProtein);
+
+/*	if ( ! trans )
+	{
+		if ( alphabet != alphabetNucleotide )
+		{
+			cerr << "ERROR: <query> sketch must have nucleotide or amino acid alphabet" << endl;
+			exit(1);
+		}
+
+		if ( sketch.getNoncanonical() )
+		{
+			cerr << "ERROR: nucleotide <query> sketch must be canonical" << endl;
+			exit(1);
+		}
+	}
+*/
+
+	int queryCount = arguments.size() - 1;
+	cerr << (trans ? "Translating from " : "Streaming from ");
+
+	if ( queryCount == 1 )
+	{
+		cerr << arguments[1];
+	}
+	else
+	{
+		cerr << queryCount << " inputs";
+	}
+
+	cerr << "..." << endl;
+
+	int kmerSize = parameters.kmerSize;
+	int minCov = 1;//options.at("minCov").getArgumentAsNumber();
+
+	ThreadPool<CommandScreen::HashInput, CommandScreen::HashOutput> threadPool(hashSequence, parameters.parallelism);
+
+	// open all query files for round robin
+	//
+	gzFile fps[queryCount];
+	list<kseq_t *> kseqs;
+	//
+	for ( int f = 1; f < arguments.size(); f++ )
+	{
+		if ( arguments[f] == "-" )
+		{
+			if ( f > 1 )
+			{
+				cerr << "ERROR: '-' for stdin must be first query" << endl;
+				exit(1);
+			}
+
+			fps[f - 1] = gzdopen(fileno(stdin), "r");
+		}
+		else
+		{
+			fps[f - 1] = gzopen(arguments[f].c_str(), "r");
+
+			if ( fps[f - 1] == 0 )
+			{
+				cerr << "ERROR: could not open " << arguments[f] << endl;
+				exit(1);
+			}
+		}
+
+		kseqs.push_back(kseq_init(fps[f - 1]));
+	}
+
+	// perform round-robin, closing files as they end
+
+	int l;
+	uint64_t count = 0;
+	//uint64_t kmersTotal = 0;
+	uint64_t chunkSize = 1 << 20;
+	string input;
+	input.reserve(chunkSize);
+	list<kseq_t *>::iterator it = kseqs.begin();
+	//
+	while ( true )
+	{
+		if ( kseqs.begin() == kseqs.end() )
+		{
+			l = 0;
+		}
+		else
+		{
+			l = kseq_read(*it);
+
+			if ( l < -1 ) // error
+			{
+				break;
+			}
+
+			if ( l == -1 ) // eof
+			{
+				kseq_destroy(*it);
+				it = kseqs.erase(it);
+				if ( it == kseqs.end() )
+				{
+					it = kseqs.begin();
+				}
+				//continue;
+			}
+		}
+
+		if ( input.length() + (l >= kmerSize ? l + 1 : 0) > chunkSize || kseqs.begin() == kseqs.end() )
+		{
+			// chunk big enough or at the end; time to flush
+
+			// buffer this out since kseq will overwrite (deleted by HashInput destructor)
+			//
+			char * seqCopy = new char[input.length()];
+			//
+			memcpy(seqCopy, input.c_str(), input.length());
+
+			if ( minHashHeaps.begin() == minHashHeaps.end() )
+			{
+				minHashHeaps.emplace(new MinHashHeap(sketch.getUse64(), sketch.getMinHashesPerWindow()));
+			}
+
+			threadPool.runWhenThreadAvailable(new CommandScreen::HashInput(hashCounts, *minHashHeaps.begin(), seqCopy, input.length(), parameters, trans));
+
+			input = "";
+
+			minHashHeaps.erase(minHashHeaps.begin());
+
+			while ( threadPool.outputAvailable() )
+			{
+				useThreadOutput(threadPool.popOutputWhenAvailable(), minHashHeaps);
+			}
+		}
+
+		if ( kseqs.begin() == kseqs.end() )
+		{
+			break;
+		}
+
+		count++;
+
+		if ( l >= kmerSize )
+		{
+			input.append(1, '*');
+			input.append((*it)->seq.s, l);
+		}
+
+		it++;
+
+		if ( it == kseqs.end() )
+		{
+			it = kseqs.begin();
+		}
+	}
+
+	if (  l != -1 )
+	{
+		cerr << "\nERROR: reading inputs" << endl;
+		exit(1);
+	}
+
+	while ( threadPool.running() )
+	{
+		useThreadOutput(threadPool.popOutputWhenAvailable(), minHashHeaps);
+	}
+
+	for ( int i = 0; i < queryCount; i++ )
+	{
+		gzclose(fps[i]);
+	}
+
+	MinHashHeap minHashHeap(sketch.getUse64(), sketch.getMinHashesPerWindow());
+
+	for ( robin_hood::unordered_set<MinHashHeap *>::const_iterator i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
+	{
+		HashList hashList(parameters.use64);
+
+		(*i)->toHashList(hashList);
+
+		for ( int i = 0; i < hashList.size(); i++ )
+		{
+			minHashHeap.tryInsert(hashList.at(i));
+		}
+
+		delete *i;
+	}
+
+	if ( count == 0 )
+	{
+		cerr << "\nERROR: Did not find sequence records in inputs" << endl;
+
+		exit(1);
+	}
+
+	/*
+	if ( parameters.targetCov != 0 )
+	{
+		cerr << "Reads required for " << parameters.targetCov << "x coverage: " << count << endl;
+	}
+	else
+	{
+		cerr << "Estimated coverage: " << minHashHeap.estimateMultiplicity() << "x" << endl;
+	}
+	*/
+
+	uint64_t setSize = minHashHeap.estimateSetSize();
+	cerr << "   Estimated distinct" << (trans ? " (translated)" : "") << " k-mers in pool: " << setSize << endl;
+
+	if ( setSize == 0 )
+	{
+		cerr << "WARNING: no valid k-mers in input." << endl;
+		//exit(0);
+	}
+
+    // for each hash we can calculate the LCA, and add a count to the LCA at the end
+	cerr << "Assigning LCA taxIDs to hashes ..." << endl;
+
+	uint64_t * shared = new uint64_t[sketch.getReferenceCount()]; // number of hashes shared with the query
+	vector<uint64_t> * depths = new vector<uint64_t>[sketch.getReferenceCount()]; // how many other references share each hash of a reference?
+	memset(shared, 0, sizeof(uint64_t) * sketch.getReferenceCount());
+	unordered_map<TaxID, TaxCounts> counts;
+	unordered_set<TaxID> allTaxIDs;
+
+	for ( robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> >::const_iterator i = hashCounts.begin(); i != hashCounts.end(); i++ )
+	{
+		// indices of all the references - map them to taxonomy IDs
+		const robin_hood::unordered_set<uint64_t> & indeces = hashTable.at(i->first);
+
+		TaxID taxID = 0;
+		for ( robin_hood::unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
+		{
+			taxID = taxdb.getLowestCommonAncestor(referenceTaxIDs[*k], taxID);
+			shared[*k]++;
+			depths[*k].push_back(i->second);
+
+			if ( sat )
+			{
+				saturationByIndex[*k].push_back(0);// TODO kmersTotal);
+			}
+		}
+		//hashTaxIDs.insert(i->first, taxID);
+		hashTaxIDs[i->first] = taxID;
+		counts[taxID].taxHashCount += 1;
+		if ( i->second >= minCov )
+		{
+			counts[taxID].taxCount += 1;
+		allTaxIDs.insert(taxID);
+		}
+	}
+
+	// Sum up the clade counts and populate the children vectors
+	uint64_t totalCount = 0;
+	uint64_t totalHashCount = 0;
+	for ( unordered_map<TaxID, TaxCounts>::iterator it = counts.begin(); it != counts.end(); ++it ) 
+	{
+		uint64_t hashCount = it->second.taxHashCount;
+		totalHashCount += hashCount;
+
+		uint64_t count = it->second.taxCount;
+		totalCount += count;
+
+		TaxID taxID = it->first;
+		TaxEntry const * taxon = taxdb.getEntry(taxID);
+		while (taxon != NULL) {
+			counts[taxon->taxID].cladeCount += count;
+			counts[taxon->taxID].cladeHashCount += hashCount;
+			if (taxon->parent != NULL) {
+				vector<TaxID>& children = counts[taxon->parent->taxID].children;
+				auto pc_it = lower_bound(children.begin(),
+				                         children.end(),
+									     taxon->taxID);
+				if (pc_it == children.end() || *pc_it != taxon->taxID) {
+					children.insert(pc_it, taxon->taxID);
+				}
+				taxon = taxon->parent;
+			} else {
+				break;
+			}
+		}
+	}
+
+	cerr << "Writing output..." << endl;
+
+	taxdb.writeReport(stdout, counts, totalCount, totalHashCount);
+
+	delete [] shared;
+
+	return 0;
+}
+
+
+
+
+
+} // namespace mash


=====================================
src/mash/CommandTaxScreen.h
=====================================
@@ -0,0 +1,60 @@
+// Copyright © 2015, Battelle National Biodefense Institute (BNBI);
+// all rights reserved. Authored by: Brian Ondov, Todd Treangen,
+// Sergey Koren, and Adam Phillippy
+//
+// See the LICENSE.txt file included with this software for license information.
+
+#ifndef INCLUDED_CommandTaxScreen
+#define INCLUDED_CommandTaxScreen
+
+#include "Command.h"
+#include "Sketch.h"
+#include <list>
+#include <string>
+#include <vector>
+#include <atomic>
+#include <unordered_set>
+#include <unordered_map>
+#include "MinHashHeap.h"
+#include "CommandScreen.h"
+
+
+using std::string;
+using std::cerr;
+using std::cout;
+using std::endl;
+using std::list;
+using std::string;
+using std::unordered_map;
+using std::unordered_set;
+using std::vector;
+
+
+namespace mash {
+
+using TaxID = uint64_t;
+
+class CommandTaxScreen : public Command
+{
+public:
+    
+    CommandTaxScreen();
+    
+    int run() const; // override
+
+private:
+	
+	struct Reference
+	{
+		Reference(uint64_t amerCountNew, std::string nameNew, std::string commentNew)
+		: amerCount(amerCountNew), name(nameNew), comment(commentNew) {}
+		
+		uint64_t amerCount;
+		std::string name;
+		std::string comment;
+	};
+};
+
+} // namespace mash
+
+#endif


=====================================
src/mash/HashSet.cpp
=====================================
@@ -78,14 +78,14 @@ void HashSet::toCounts(std::vector<uint32_t> & counts) const
 {
     if ( use64 )
     {
-        for ( std::unordered_map<hash64_t, uint32_t>::const_iterator i = hashes64.begin(); i != hashes64.end(); i++ )
+        for ( auto i = hashes64.begin(); i != hashes64.end(); i++ )
         {
             counts.push_back(i->second);
         }
     }
     else
     {
-        for ( std::unordered_map<hash32_t, uint32_t>::const_iterator i = hashes32.begin(); i != hashes32.end(); i++ )
+        for ( auto i = hashes32.begin(); i != hashes32.end(); i++ )
         {
             counts.push_back(i->second);
         }
@@ -96,14 +96,14 @@ void HashSet::toHashList(HashList & hashList) const
 {
     if ( use64 )
     {
-        for ( std::unordered_map<hash64_t, uint32_t>::const_iterator i = hashes64.begin(); i != hashes64.end(); i++ )
+        for ( auto i = hashes64.begin(); i != hashes64.end(); i++ )
         {
             hashList.push_back64(i->first);
         }
     }
     else
     {
-        for ( std::unordered_map<hash32_t, uint32_t>::const_iterator i = hashes32.begin(); i != hashes32.end(); i++ )
+        for ( auto i = hashes32.begin(); i != hashes32.end(); i++ )
         {
             hashList.push_back32(i->first);
         }


=====================================
src/mash/HashSet.h
=====================================
@@ -8,7 +8,7 @@
 #define HashSet_h
 
 #include "HashList.h"
-#include <unordered_map>
+#include "robin_hood.h"
 #include <vector>
 
 class HashSet
@@ -28,8 +28,8 @@ public:
 private:
     
     bool use64;
-    std::unordered_map<hash32_t, uint32_t> hashes32;
-    std::unordered_map<hash64_t, uint32_t> hashes64;
+    robin_hood::unordered_map<hash32_t, uint32_t> hashes32;
+    robin_hood::unordered_map<hash64_t, uint32_t> hashes64;
 };
 
 #endif


=====================================
src/mash/Sketch.cpp
=====================================
@@ -1061,22 +1061,42 @@ Sketch::SketchOutput * loadCapnp(Sketch::SketchInput * input)
     return output;
 }
 
+
+/* Array from 0..25 of DNA complement of A..Z */
+const char complement[] = {
+  'T', // 'A' = A
+  'V', // 'B' = not A = C,T,G
+  'G', // 'C' = C
+  'H', // 'D' = not C = A,T,G
+  'N', // 'E' = .
+  'N', // 'F' = .
+  'C', // 'G' = G
+  'D', // 'H' = not G = A,C,T
+  'N', // 'I' = .
+  'N', // 'J' = .
+  'M', // 'K' = T,G = Keto
+  'N', // 'L' = .
+  'K', // 'M' = A,C = Amino
+  'N', // 'N' = A,C,T,G = uNkNowN
+  'N', // 'O' = .
+  'N', // 'P' = .
+  'N', // 'Q' = .
+  'Y', // 'R' = A,G = puRine
+  'S', // 'S' = G,C = Strong
+  'A', // 'T' = T
+  'A', // 'U' = T (RNA)
+  'B', // 'V' = not T = A,C,G
+  'W', // 'W' = A,T = Weak
+  'N', // 'X' = .
+  'R', // 'Y' = pYrimidine = C,T
+  'N', // 'Z' = .
+};
+
 void reverseComplement(const char * src, char * dest, int length)
 {
     for ( int i = 0; i < length; i++ )
     {
-        char base = src[i];
-        
-        switch ( base )
-        {
-            case 'A': base = 'T'; break;
-            case 'C': base = 'G'; break;
-            case 'G': base = 'C'; break;
-            case 'T': base = 'A'; break;
-            default: break;
-        }
-        
-        dest[length - i - 1] = base;
+        dest[i] = complement[ (int) src[length-i-1] - (int) 'A' ];
     }
 }
 


=====================================
src/mash/Sketch.h
=====================================
@@ -8,8 +8,7 @@
 #define Sketch_h
 
 #include "mash/capnp/MinHash.capnp.h"
-#include <unordered_map>
-#include <unordered_set>
+#include "robin_hood.h"
 #include <map>
 #include <vector>
 #include <string>
@@ -126,8 +125,6 @@ public:
         uint32_t position;
     };
     
-    typedef std::unordered_set<hash_t> Hash_set;
-    
     struct Reference
     {
         // no sequence for now
@@ -215,9 +212,9 @@ private:
     void createIndex();
     
     std::vector<Reference> references;
-    std::unordered_map<std::string, int> referenceIndecesById;
+    robin_hood::unordered_map<std::string, int> referenceIndecesById;
     std::vector<std::vector<PositionHash>> positionHashesByReference;
-    std::unordered_map<hash_t, std::vector<Locus>> lociByHash;
+    robin_hood::unordered_map<hash_t, std::vector<Locus>> lociByHash;
     
     Parameters parameters;
     double kmerSpace;


=====================================
src/mash/mash.cpp
=====================================
@@ -10,6 +10,7 @@
 #include "CommandFind.h"
 #include "CommandDistance.h"
 #include "CommandScreen.h"
+#include "CommandTaxScreen.h"
 #include "CommandTriangle.h"
 #include "CommandContain.h"
 #include "CommandInfo.h"
@@ -23,6 +24,7 @@ int main(int argc, const char ** argv)
     //commandList.addCommand(new CommandFind());
     commandList.addCommand(new mash::CommandDistance());
     commandList.addCommand(new mash::CommandScreen());
+    commandList.addCommand(new mash::CommandTaxScreen());
     commandList.addCommand(new mash::CommandTriangle());
 #ifdef COMMAND_WITHIN
     commandList.addCommand(new mash::CommandContain());


=====================================
src/mash/robin_hood.h
=====================================
@@ -0,0 +1,2429 @@
+//                 ______  _____                 ______                _________
+//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
+//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
+//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
+//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
+//                                      _/_____/
+//
+// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
+// https://github.com/martinus/robin-hood-hashing
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2020 Martin Ankerl <http://martin.ankerl.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ROBIN_HOOD_H_INCLUDED
+#define ROBIN_HOOD_H_INCLUDED
+
+// see https://semver.org/
+#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 9 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 1 // for backwards-compatible bug fixes
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <memory> // only to support hash of smart pointers
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#if __cplusplus >= 201703L
+#    include <string_view>
+#endif
+
+// #define ROBIN_HOOD_LOG_ENABLED
+#ifdef ROBIN_HOOD_LOG_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_LOG(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_LOG(x)
+#endif
+
+// #define ROBIN_HOOD_TRACE_ENABLED
+#ifdef ROBIN_HOOD_TRACE_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_TRACE(...) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
+#else
+#    define ROBIN_HOOD_TRACE(x)
+#endif
+
+// #define ROBIN_HOOD_COUNT_ENABLED
+#ifdef ROBIN_HOOD_COUNT_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_COUNT(x) ++counts().x;
+namespace robin_hood {
+struct Counts {
+    uint64_t shiftUp{};
+    uint64_t shiftDown{};
+};
+inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
+    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
+}
+
+static Counts& counts() {
+    static Counts counts{};
+    return counts;
+}
+} // namespace robin_hood
+#else
+#    define ROBIN_HOOD_COUNT(x)
+#endif
+
+// all non-argument macros should use this facility. See
+// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
+#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
+
+// mark unused members with this macro
+#define ROBIN_HOOD_UNUSED(identifier)
+
+// bitness
+#if SIZE_MAX == UINT32_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
+#elif SIZE_MAX == UINT64_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
+#else
+#    error Unsupported bitness
+#endif
+
+// endianess
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
+        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#endif
+
+// inline
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
+#endif
+
+// exceptions
+#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
+#endif
+
+// count leading/trailing bits
+#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+#    ifdef _MSC_VER
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+#        endif
+#        include <intrin.h>
+#        pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
+            [](size_t mask) noexcept -> int {                                             \
+                unsigned long index;                                                      \
+                return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+                                                                : ROBIN_HOOD(BITNESS);    \
+            }(x)
+#    else
+#        if ROBIN_HOOD(BITNESS) == 32
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+#        else
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+#            define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+#        endif
+#        define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+#        define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
+#    endif
+#endif
+
+// fallthrough
+#ifndef __has_cpp_attribute // For backwards compatibility
+#    define __has_cpp_attribute(x) 0
+#endif
+#if __has_cpp_attribute(clang::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
+#endif
+
+// likely/unlikely
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_LIKELY(condition) condition
+#    define ROBIN_HOOD_UNLIKELY(condition) condition
+#else
+#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
+#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
+#endif
+
+// detect if native wchar_t type is availiable in MSVC
+#ifdef _MSC_VER
+#    ifdef _NATIVE_WCHAR_T_DEFINED
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#endif
+
+// workaround missing "is_trivially_copyable" in g++ < 5.0
+// See https://stackoverflow.com/a/31798726/48181
+#if defined(__GNUC__) && __GNUC__ < 5
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#else
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
+#endif
+
+// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
+#endif
+
+namespace robin_hood {
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+#    define ROBIN_HOOD_STD std
+#else
+
+// c++11 compatibility layer
+namespace ROBIN_HOOD_STD {
+template <class T>
+struct alignment_of
+    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
+
+template <class T, T... Ints>
+class integer_sequence {
+public:
+    using value_type = T;
+    static_assert(std::is_integral<value_type>::value, "not integral type");
+    static constexpr std::size_t size() noexcept {
+        return sizeof...(Ints);
+    }
+};
+template <std::size_t... Inds>
+using index_sequence = integer_sequence<std::size_t, Inds...>;
+
+namespace detail_ {
+template <class T, T Begin, T End, bool>
+struct IntSeqImpl {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
+
+    template <class, class>
+    struct IntSeqCombiner;
+
+    template <TValue... Inds0, TValue... Inds1>
+    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
+        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
+    };
+
+    using TResult =
+        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
+                                                    (End - Begin) / 2 == 1>::TResult,
+                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
+                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
+};
+
+template <class T, T Begin>
+struct IntSeqImpl<T, Begin, Begin, false> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue>;
+};
+
+template <class T, T Begin, T End>
+struct IntSeqImpl<T, Begin, End, true> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue, Begin>;
+};
+} // namespace detail_
+
+template <class T, T N>
+using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <class... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+} // namespace ROBIN_HOOD_STD
+
+#endif
+
+namespace detail {
+
+// make sure we static_cast to the correct type for hash_int
+#if ROBIN_HOOD(BITNESS) == 64
+using SizeT = uint64_t;
+#else
+using SizeT = uint32_t;
+#endif
+
+template <typename T>
+T rotr(T x, unsigned k) {
+    return (x >> k) | (x << (8U * sizeof(T) - k));
+}
+
+// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
+// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
+// care!
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
+// inlinings more difficult. Throws are also generally the slow path.
+template <typename E, typename... Args>
+[[noreturn]] ROBIN_HOOD(NOINLINE)
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+    void doThrow(Args&&... args) {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+    throw E(std::forward<Args>(args)...);
+}
+#else
+    void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+    abort();
+}
+#endif
+
+template <typename E, typename T, typename... Args>
+T* assertNotNull(T* t, Args&&... args) {
+    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
+        doThrow<E>(std::forward<Args>(args)...);
+    }
+    return t;
+}
+
+template <typename T>
+inline T unaligned_load(void const* ptr) noexcept {
+    // using memcpy so we don't get into unaligned load problems.
+    // compiler should optimize this very well anyways.
+    T t;
+    std::memcpy(&t, ptr, sizeof(T));
+    return t;
+}
+
+// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
+// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
+// pointer.
+template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
+class BulkPoolAllocator {
+public:
+    BulkPoolAllocator() noexcept = default;
+
+    // does not copy anything, just creates a new allocator.
+    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
+        : mHead(nullptr)
+        , mListForFree(nullptr) {}
+
+    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
+        : mHead(o.mHead)
+        , mListForFree(o.mListForFree) {
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+    }
+
+    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
+        reset();
+        mHead = o.mHead;
+        mListForFree = o.mListForFree;
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+        return *this;
+    }
+
+    BulkPoolAllocator&
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
+        // does not do anything
+        return *this;
+    }
+
+    ~BulkPoolAllocator() noexcept {
+        reset();
+    }
+
+    // Deallocates all allocated memory.
+    void reset() noexcept {
+        while (mListForFree) {
+            T* tmp = *mListForFree;
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mListForFree);
+            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        }
+        mHead = nullptr;
+    }
+
+    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
+    //   T* obj = pool.allocate();
+    //   ::new (static_cast<void*>(obj)) T();
+    T* allocate() {
+        T* tmp = mHead;
+        if (!tmp) {
+            tmp = performAllocation();
+        }
+
+        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        return tmp;
+    }
+
+    // does not actually deallocate but puts it in store.
+    // make sure you have already called the destructor! e.g. with
+    //  obj->~T();
+    //  pool.deallocate(obj);
+    void deallocate(T* obj) noexcept {
+        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
+        mHead = obj;
+    }
+
+    // Adds an already allocated block of memory to the allocator. This allocator is from now on
+    // responsible for freeing the data (with free()). If the provided data is not large enough to
+    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
+    void addOrFree(void* ptr, const size_t numBytes) noexcept {
+        // calculate number of available elements in ptr
+        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
+            // not enough data for at least one element. Free and return..
+            ROBIN_HOOD_LOG("std::free")
+            std::free(ptr);
+        } else {
+            ROBIN_HOOD_LOG("add to buffer")
+            add(ptr, numBytes);
+        }
+    }
+
+    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
+        using std::swap;
+        swap(mHead, other.mHead);
+        swap(mListForFree, other.mListForFree);
+    }
+
+private:
+    // iterates the list of allocated memory to calculate how many to alloc next.
+    // Recalculating this each time saves us a size_t member.
+    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
+    // practice, this should not matter much.
+    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
+        auto tmp = mListForFree;
+        size_t numAllocs = MinNumAllocs;
+
+        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
+            auto x = reinterpret_cast<T***>(tmp);
+            tmp = *x;
+            numAllocs *= 2;
+        }
+
+        return numAllocs;
+    }
+
+    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
+    void add(void* ptr, const size_t numBytes) noexcept {
+        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
+
+        auto data = reinterpret_cast<T**>(ptr);
+
+        // link free list
+        auto x = reinterpret_cast<T***>(data);
+        *x = mListForFree;
+        mListForFree = data;
+
+        // create linked list for newly allocated data
+        auto* const headT =
+            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
+
+        auto* const head = reinterpret_cast<char*>(headT);
+
+        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
+        for (size_t i = 0; i < numElements; ++i) {
+            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
+                head + (i + 1) * ALIGNED_SIZE;
+        }
+
+        // last one points to 0
+        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
+            mHead;
+        mHead = headT;
+    }
+
+    // Called when no memory is available (mHead == 0).
+    // Don't inline this slow path.
+    ROBIN_HOOD(NOINLINE) T* performAllocation() {
+        size_t const numElementsToAlloc = calcNumElementsToAlloc();
+
+        // alloc new memory: [prev |T, T, ... T]
+        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
+        ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
+                                      << " * " << numElementsToAlloc)
+        add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
+        return mHead;
+    }
+
+    // enforce byte alignment of the T's
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+    static constexpr size_t ALIGNMENT =
+        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
+#else
+    static const size_t ALIGNMENT =
+        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
+            ? ROBIN_HOOD_STD::alignment_of<T>::value
+            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
+#endif
+
+    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
+
+    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
+    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
+    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
+    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
+    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
+
+    T* mHead{nullptr};
+    T** mListForFree{nullptr};
+};
+
+template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
+struct NodeAllocator;
+
+// dummy allocator that does nothing
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, true> {
+
+    // we are not using the data, so just free it.
+    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
+        ROBIN_HOOD_LOG("std::free")
+        std::free(ptr);
+    }
+};
+
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
+
+// dummy hash, unsed as mixer when robin_hood::hash is already used
+template <typename T>
+struct identity_hash {
+    constexpr size_t operator()(T const& obj) const noexcept {
+        return static_cast<size_t>(obj);
+    }
+};
+
+// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
+// my own here.
+namespace swappable {
+#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
+using std::swap;
+template <typename T>
+struct nothrow {
+    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
+};
+#else
+template <typename T>
+struct nothrow {
+    static const bool value = std::is_nothrow_swappable<T>::value;
+};
+#endif
+} // namespace swappable
+
+} // namespace detail
+
+struct is_transparent_tag {};
+
+// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
+// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
+// also tested.
+template <typename T1, typename T2>
+struct pair {
+    using first_type = T1;
+    using second_type = T2;
+
+    template <typename U1 = T1, typename U2 = T2,
+              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
+                                                 std::is_default_constructible<U2>::value>::type>
+    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
+        : first()
+        , second() {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
+        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
+        : first(o.first)
+        , second(o.second) {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(o.first))
+        , second(std::move(o.second)) {}
+
+    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(a))
+        , second(std::move(b)) {}
+
+    template <typename U1, typename U2>
+    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
+        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
+        : first(std::forward<U1>(a))
+        , second(std::forward<U2>(b)) {}
+
+    template <typename... U1, typename... U2>
+    constexpr pair(
+        std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
+        std::tuple<U2...> b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
+                                                    std::declval<std::tuple<U2...>&>(),
+                                                    ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+                                                    ROBIN_HOOD_STD::index_sequence_for<U2...>())))
+        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {}
+
+    // constructor called from the std::piecewise_construct_t ctor
+    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
+    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
+        noexcept(T1(std::forward<U1>(std::get<I1>(
+            std::declval<std::tuple<
+                U1...>&>()))...)) && noexcept(T2(std::
+                                                     forward<U2>(std::get<I2>(
+                                                         std::declval<std::tuple<U2...>&>()))...)))
+        : first(std::forward<U1>(std::get<I1>(a))...)
+        , second(std::forward<U2>(std::get<I2>(b))...) {
+        // make visual studio compiler happy about warning about unused a & b.
+        // Visual studio's pair implementation disables warning 4100.
+        (void)a;
+        (void)b;
+    }
+
+    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
+                                        (detail::swappable::nothrow<T2>::value)) {
+        using std::swap;
+        swap(first, o.first);
+        swap(second, o.second);
+    }
+
+    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
+    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
+};
+
+template <typename A, typename B>
+inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
+    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
+    a.swap(b);
+}
+
+template <typename A, typename B>
+inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
+    return (x.first == y.first) && (x.second == y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x == y);
+}
+template <typename A, typename B>
+inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
+    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
+                                                                     std::declval<B const&>())) {
+    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
+    return y < x;
+}
+template <typename A, typename B>
+inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x > y);
+}
+template <typename A, typename B>
+inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x < y);
+}
+
+inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
+    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
+    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
+    static constexpr unsigned int r = 47;
+
+    auto const* const data64 = static_cast<uint64_t const*>(ptr);
+    uint64_t h = seed ^ (len * m);
+
+    size_t const n_blocks = len / 8;
+    for (size_t i = 0; i < n_blocks; ++i) {
+        auto k = detail::unaligned_load<uint64_t>(data64 + i);
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
+    switch (len & 7U) {
+    case 7:
+        h ^= static_cast<uint64_t>(data8[6]) << 48U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 6:
+        h ^= static_cast<uint64_t>(data8[5]) << 40U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 5:
+        h ^= static_cast<uint64_t>(data8[4]) << 32U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 4:
+        h ^= static_cast<uint64_t>(data8[3]) << 24U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 3:
+        h ^= static_cast<uint64_t>(data8[2]) << 16U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 2:
+        h ^= static_cast<uint64_t>(data8[1]) << 8U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 1:
+        h ^= static_cast<uint64_t>(data8[0]);
+        h *= m;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    default:
+        break;
+    }
+
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+    return static_cast<size_t>(h);
+}
+
+inline size_t hash_int(uint64_t x) noexcept {
+    // inspired by lemire's strongly universal hashing
+    // https://lemire.me/blog/2018/08/15/fast-strongly-universal-64-bit-hashing-everywhere/
+    //
+    // Instead of shifts, we use rotations so we don't lose any bits.
+    //
+    // Added a final multiplcation with a constant for more mixing. It is most important that
+    // the lower bits are well mixed.
+    auto h1 = x * UINT64_C(0xA24BAED4963EE407);
+    auto h2 = detail::rotr(x, 32U) * UINT64_C(0x9FB21C651E98DF25);
+    auto h = detail::rotr(h1 + h2, 32U);
+    return static_cast<size_t>(h);
+}
+
+// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
+template <typename T, typename Enable = void>
+struct hash : public std::hash<T> {
+    size_t operator()(T const& obj) const
+        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
+        // call base hash
+        auto result = std::hash<T>::operator()(obj);
+        // return mixed of that, to be save against identity has
+        return hash_int(static_cast<detail::SizeT>(result));
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string<CharT>> {
+    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
+        return hash_bytes(str.data(), sizeof(CharT) * str.size());
+    }
+};
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+template <typename CharT>
+struct hash<std::basic_string_view<CharT>> {
+    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
+        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
+    }
+};
+#endif
+
+template <class T>
+struct hash<T*> {
+    size_t operator()(T* ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr));
+    }
+};
+
+template <class T>
+struct hash<std::unique_ptr<T>> {
+    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <class T>
+struct hash<std::shared_ptr<T>> {
+    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+    }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+    size_t operator()(Enum e) const noexcept {
+        using Underlying = typename std::underlying_type<Enum>::type;
+        return hash<Underlying>{}(static_cast<Underlying>(e));
+    }
+};
+
+#define ROBIN_HOOD_HASH_INT(T)                           \
+    template <>                                          \
+    struct hash<T> {                                     \
+        size_t operator()(T const& obj) const noexcept { \
+            return hash_int(static_cast<uint64_t>(obj)); \
+        }                                                \
+    }
+
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wuseless-cast"
+#endif
+// see https://en.cppreference.com/w/cpp/utility/hash
+ROBIN_HOOD_HASH_INT(bool);
+ROBIN_HOOD_HASH_INT(char);
+ROBIN_HOOD_HASH_INT(signed char);
+ROBIN_HOOD_HASH_INT(unsigned char);
+ROBIN_HOOD_HASH_INT(char16_t);
+ROBIN_HOOD_HASH_INT(char32_t);
+#if ROBIN_HOOD(HAS_NATIVE_WCHART)
+ROBIN_HOOD_HASH_INT(wchar_t);
+#endif
+ROBIN_HOOD_HASH_INT(short);
+ROBIN_HOOD_HASH_INT(unsigned short);
+ROBIN_HOOD_HASH_INT(int);
+ROBIN_HOOD_HASH_INT(unsigned int);
+ROBIN_HOOD_HASH_INT(long);
+ROBIN_HOOD_HASH_INT(long long);
+ROBIN_HOOD_HASH_INT(unsigned long);
+ROBIN_HOOD_HASH_INT(unsigned long long);
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic pop
+#endif
+namespace detail {
+
+template <typename T>
+struct void_type {
+    using type = void;
+};
+
+template <typename T, typename = void>
+struct has_is_transparent : public std::false_type {};
+
+template <typename T>
+struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
+    : public std::true_type {};
+
+// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
+// is used. see https://stackoverflow.com/a/28771920/48181
+template <typename T>
+struct WrapHash : public T {
+    WrapHash() = default;
+    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+template <typename T>
+struct WrapKeyEqual : public T {
+    WrapKeyEqual() = default;
+    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+// A highly optimized hashmap implementation, using the Robin Hood algorithm.
+//
+// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
+// be about 2x faster in most cases and require much less allocations.
+//
+// This implementation uses the following memory layout:
+//
+// [Node, Node, ... Node | info, info, ... infoSentinel ]
+//
+// * Node: either a DataNode that directly has the std::pair<key, val> as member,
+//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
+//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
+//   based on sizeof(). there are always 2^n Nodes.
+//
+// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
+//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
+//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
+//   actually belongs to the previous position and was pushed out because that place is already
+//   taken.
+//
+// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
+//   need for a idx variable.
+//
+// According to STL, order of templates has effect on throughput. That's why I've moved the
+// boolean to the front.
+// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
+template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
+          typename KeyEqual>
+class Table
+    : public WrapHash<Hash>,
+      public WrapKeyEqual<KeyEqual>,
+      detail::NodeAllocator<
+          typename std::conditional<
+              std::is_void<T>::value, Key,
+              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
+          4, 16384, IsFlat> {
+public:
+    static constexpr bool is_flat = IsFlat;
+    static constexpr bool is_map = !std::is_void<T>::value;
+    static constexpr bool is_set = !is_map;
+    static constexpr bool is_transparent =
+        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
+
+    using key_type = Key;
+    using mapped_type = T;
+    using value_type = typename std::conditional<
+        is_set, Key,
+        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
+    using size_type = size_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+
+private:
+    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
+                  "MaxLoadFactor100 needs to be >10 && < 100");
+
+    using WHash = WrapHash<Hash>;
+    using WKeyEqual = WrapKeyEqual<KeyEqual>;
+
+    // configuration defaults
+
+    // make sure we have 8 elements, needed to quickly rehash mInfo
+    static constexpr size_t InitialNumElements = sizeof(uint64_t);
+    static constexpr uint32_t InitialInfoNumBits = 5;
+    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
+    static constexpr size_t InfoMask = InitialInfoInc - 1U;
+    static constexpr uint8_t InitialInfoHashShift = 0;
+    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
+
+    // type needs to be wider than uint8_t.
+    using InfoType = uint32_t;
+
+    // DataNode ////////////////////////////////////////////////////////
+
+    // Primary template for the data node. We have special implementations for small and big
+    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
+    // on the heap so swap merely swaps a pointer.
+    template <typename M, bool>
+    class DataNode {};
+
+    // Small: just allocate on the stack.
+    template <typename M>
+    class DataNode<M, true> final {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
+            noexcept(value_type(std::forward<Args>(args)...)))
+            : mData(std::forward<Args>(args)...) {}
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
+            std::is_nothrow_move_constructible<value_type>::value)
+            : mData(std::move(n.mData)) {}
+
+        // doesn't do anything
+        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
+        void destroyDoNotDeallocate() noexcept {}
+
+        value_type const* operator->() const noexcept {
+            return &mData;
+        }
+        value_type* operator->() noexcept {
+            return &mData;
+        }
+
+        const value_type& operator*() const noexcept {
+            return mData;
+        }
+
+        value_type& operator*() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData.second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
+            return mData.second;
+        }
+
+        void swap(DataNode<M, true>& o) noexcept(
+            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
+            mData.swap(o.mData);
+        }
+
+    private:
+        value_type mData;
+    };
+
+    // big object: allocate on heap.
+    template <typename M>
+    class DataNode<M, false> {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& map, Args&&... args)
+            : mData(map.allocate()) {
+            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
+        }
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
+            : mData(std::move(n.mData)) {}
+
+        void destroy(M& map) noexcept {
+            // don't deallocate, just put it into list of datapool.
+            mData->~value_type();
+            map.deallocate(mData);
+        }
+
+        void destroyDoNotDeallocate() noexcept {
+            mData->~value_type();
+        }
+
+        value_type const* operator->() const noexcept {
+            return mData;
+        }
+
+        value_type* operator->() noexcept {
+            return mData;
+        }
+
+        const value_type& operator*() const {
+            return *mData;
+        }
+
+        value_type& operator*() {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return *mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData->second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
+            return mData->second;
+        }
+
+        void swap(DataNode<M, false>& o) noexcept {
+            using std::swap;
+            swap(mData, o.mData);
+        }
+
+    private:
+        value_type* mData;
+    };
+
+    using Node = DataNode<Self, IsFlat>;
+
+    // helpers for doInsert: extract first entry (only const required)
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
+        return n.getFirst();
+    }
+
+    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
+    // No need to disable this because it's just not used if not applicable.
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
+        return k;
+    }
+
+    // in case we have non-void mapped_type, we have a standard robin_hood::pair
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
+        getFirstConst(value_type const& vt) const noexcept {
+        return vt.first;
+    }
+
+    // Cloner //////////////////////////////////////////////////////////
+
+    template <typename M, bool UseMemcpy>
+    struct Cloner;
+
+    // fast path: Just copy data, without allocating anything.
+    template <typename M>
+    struct Cloner<M, true> {
+        void operator()(M const& source, M& target) const {
+            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
+            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
+            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
+            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
+        }
+    };
+
+    template <typename M>
+    struct Cloner<M, false> {
+        void operator()(M const& s, M& t) const {
+            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
+            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
+
+            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
+                if (t.mInfo[i]) {
+                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
+                }
+            }
+        }
+    };
+
+    // Destroyer ///////////////////////////////////////////////////////
+
+    template <typename M, bool IsFlatAndTrivial>
+    struct Destroyer {};
+
+    template <typename M>
+    struct Destroyer<M, true> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+    };
+
+    template <typename M>
+    struct Destroyer<M, false> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroy(m);
+                    n.~Node();
+                }
+            }
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroyDoNotDeallocate();
+                    n.~Node();
+                }
+            }
+        }
+    };
+
+    // Iter ////////////////////////////////////////////////////////////
+
+    struct fast_forward_tag {};
+
+    // generic iterator for both const_iterator and iterator.
+    template <bool IsConst>
+    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
+    class Iter {
+    private:
+        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
+
+    public:
+        using difference_type = std::ptrdiff_t;
+        using value_type = typename Self::value_type;
+        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
+        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
+        using iterator_category = std::forward_iterator_tag;
+
+        // default constructed iterator can be compared to itself, but WON'T return true when
+        // compared to end().
+        Iter() = default;
+
+        // Rule of zero: nothing specified. The conversion constructor is only enabled for
+        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
+
+        // Conversion constructor from iterator to const_iterator.
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        // NOLINTNEXTLINE(hicpp-explicit-conversions)
+        Iter(Iter<OtherIsConst> const& other) noexcept
+            : mKeyVals(other.mKeyVals)
+            , mInfo(other.mInfo) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr,
+             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {
+            fastForward();
+        }
+
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
+            mKeyVals = other.mKeyVals;
+            mInfo = other.mInfo;
+            return *this;
+        }
+
+        // prefix increment. Undefined behavior if we are at end()!
+        Iter& operator++() noexcept {
+            mInfo++;
+            mKeyVals++;
+            fastForward();
+            return *this;
+        }
+
+        Iter operator++(int) noexcept {
+            Iter tmp = *this;
+            ++(*this);
+            return tmp;
+        }
+
+        reference operator*() const {
+            return **mKeyVals;
+        }
+
+        pointer operator->() const {
+            return &**mKeyVals;
+        }
+
+        template <bool O>
+        bool operator==(Iter<O> const& o) const noexcept {
+            return mKeyVals == o.mKeyVals;
+        }
+
+        template <bool O>
+        bool operator!=(Iter<O> const& o) const noexcept {
+            return mKeyVals != o.mKeyVals;
+        }
+
+    private:
+        // fast forward to the next non-free info byte
+        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
+        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
+        void fastForward() noexcept {
+            size_t n = 0;
+            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
+                mInfo += sizeof(size_t);
+                mKeyVals += sizeof(size_t);
+            }
+#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+            // we know for certain that within the next 8 bytes we'll find a non-zero one.
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
+                mInfo += 4;
+                mKeyVals += 4;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
+                mInfo += 2;
+                mKeyVals += 2;
+            }
+            if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
+                mInfo += 1;
+                mKeyVals += 1;
+            }
+#else
+#    if ROBIN_HOOD(LITTLE_ENDIAN)
+            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#    else
+            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
+#    endif
+            mInfo += inc;
+            mKeyVals += inc;
+#endif
+        }
+
+        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+        NodePtr mKeyVals{nullptr};
+        uint8_t const* mInfo{nullptr};
+    };
+
+    ////////////////////////////////////////////////////////////////////
+
+    // highly performance relevant code.
+    // Lower bits are used for indexing into the array (2^n size)
+    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
+    template <typename HashKey>
+    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
+        // for a user-specified hash that is *not* robin_hood::hash, apply robin_hood::hash as
+        // an additional mixing step. This serves as a bad hash prevention, if the given data is
+        // badly mixed.
+        using Mix =
+            typename std::conditional<std::is_same<::robin_hood::hash<key_type>, hasher>::value,
+                                      ::robin_hood::detail::identity_hash<size_t>,
+                                      ::robin_hood::hash<size_t>>::type;
+
+        // the lower InitialInfoNumBits are reserved for info.
+        auto h = Mix{}(WHash::operator()(key));
+        *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
+        *idx = (h >> InitialInfoNumBits) & mMask;
+    }
+
+    // forwards the index by one, wrapping around at the end
+    void next(InfoType* info, size_t* idx) const noexcept {
+        *idx = *idx + 1;
+        *info += mInfoInc;
+    }
+
+    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
+        // unrolling this by hand did not bring any speedups.
+        while (*info < mInfo[*idx]) {
+            next(info, idx);
+        }
+    }
+
+    // Shift everything up by one element. Tries to move stuff around.
+    void
+    shiftUp(size_t startIdx,
+            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        auto idx = startIdx;
+        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
+        while (--idx != insertion_idx) {
+            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
+        }
+
+        idx = startIdx;
+        while (idx != insertion_idx) {
+            ROBIN_HOOD_COUNT(shiftUp)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
+            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+            --idx;
+        }
+    }
+
+    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        // until we find one that is either empty or has zero offset.
+        // TODO(martinus) we don't need to move everything, just the last one for the same
+        // bucket.
+        mKeyVals[idx].destroy(*this);
+
+        // until we find one that is either empty or has zero offset.
+        while (mInfo[idx + 1] >= 2 * mInfoInc) {
+            ROBIN_HOOD_COUNT(shiftDown)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
+            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
+            ++idx;
+        }
+
+        mInfo[idx] = 0;
+        // don't destroy, we've moved it
+        // mKeyVals[idx].destroy(*this);
+        mKeyVals[idx].~Node();
+    }
+
+    // copy of find(), except that it returns iterator instead of const_iterator.
+    template <typename Other>
+    ROBIN_HOOD(NODISCARD)
+    size_t findIdx(Other const& key) const {
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        do {
+            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found!
+        return mMask == 0 ? 0
+                          : static_cast<size_t>(std::distance(
+                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
+    }
+
+    void cloneData(const Table& o) {
+        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
+    }
+
+    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
+    // @return index where the element was created
+    size_t insert_move(Node&& keyval) {
+        // we don't retry, fail if overflowing
+        // don't need to check max num elements
+        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
+            throwOverflowError(); // impossible to reach LCOV_EXCL_LINE
+        }
+
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(keyval.getFirst(), &idx, &info);
+
+        // skip forward. Use <= because we are certain that the element is not there.
+        while (info <= mInfo[idx]) {
+            idx = idx + 1;
+            info += mInfoInc;
+        }
+
+        // key not found, so we are now exactly where we want to insert it.
+        auto const insertion_idx = idx;
+        auto const insertion_info = static_cast<uint8_t>(info);
+        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+            mMaxNumElementsAllowed = 0;
+        }
+
+        // find an empty spot
+        while (0 != mInfo[idx]) {
+            next(&info, &idx);
+        }
+
+        auto& l = mKeyVals[insertion_idx];
+        if (idx == insertion_idx) {
+            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
+        } else {
+            shiftUp(idx, insertion_idx);
+            l = std::move(keyval);
+        }
+
+        // put at empty spot
+        mInfo[insertion_idx] = insertion_info;
+
+        ++mNumElements;
+        return insertion_idx;
+    }
+
+public:
+    using iterator = Iter<false>;
+    using const_iterator = Iter<true>;
+
+    Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
+        : WHash()
+        , WKeyEqual() {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
+    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
+    // penalty is payed at the first insert, and not before. Lookup of this empty map works
+    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
+    // standard, but we can ignore it.
+    explicit Table(
+        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
+        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    template <typename Iter>
+    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
+          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(first, last);
+    }
+
+    Table(std::initializer_list<value_type> initlist,
+          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+          const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(initlist.begin(), initlist.end());
+    }
+
+    Table(Table&& o) noexcept
+        : WHash(std::move(static_cast<WHash&>(o)))
+        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
+        , DataPool(std::move(static_cast<DataPool&>(o))) {
+        ROBIN_HOOD_TRACE(this)
+        if (o.mMask) {
+            mKeyVals = std::move(o.mKeyVals);
+            mInfo = std::move(o.mInfo);
+            mNumElements = std::move(o.mNumElements);
+            mMask = std::move(o.mMask);
+            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+            mInfoInc = std::move(o.mInfoInc);
+            mInfoHashShift = std::move(o.mInfoHashShift);
+            // set other's mask to 0 so its destructor won't do anything
+            o.init();
+        }
+    }
+
+    Table& operator=(Table&& o) noexcept {
+        ROBIN_HOOD_TRACE(this)
+        if (&o != this) {
+            if (o.mMask) {
+                // only move stuff if the other map actually has some data
+                destroy();
+                mKeyVals = std::move(o.mKeyVals);
+                mInfo = std::move(o.mInfo);
+                mNumElements = std::move(o.mNumElements);
+                mMask = std::move(o.mMask);
+                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+                mInfoInc = std::move(o.mInfoInc);
+                mInfoHashShift = std::move(o.mInfoHashShift);
+                WHash::operator=(std::move(static_cast<WHash&>(o)));
+                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
+                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
+
+                o.init();
+
+            } else {
+                // nothing in the other map => just clear us.
+                clear();
+            }
+        }
+        return *this;
+    }
+
+    Table(const Table& o)
+        : WHash(static_cast<const WHash&>(o))
+        , WKeyEqual(static_cast<const WKeyEqual&>(o))
+        , DataPool(static_cast<const DataPool&>(o)) {
+        ROBIN_HOOD_TRACE(this)
+        if (!o.empty()) {
+            // not empty: create an exact copy. it is also possible to just iterate through all
+            // elements and insert them, but copying is probably faster.
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+            // no need for calloc because clonData does memcpy
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            mNumElements = o.mNumElements;
+            mMask = o.mMask;
+            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+            mInfoInc = o.mInfoInc;
+            mInfoHashShift = o.mInfoHashShift;
+            cloneData(o);
+        }
+    }
+
+    // Creates a copy of the given map. Copy constructor of each entry is used.
+    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    Table& operator=(Table const& o) {
+        ROBIN_HOOD_TRACE(this)
+        if (&o == this) {
+            // prevent assigning of itself
+            return *this;
+        }
+
+        // we keep using the old allocator and not assign the new one, because we want to keep
+        // the memory available. when it is the same size.
+        if (o.empty()) {
+            if (0 == mMask) {
+                // nothing to do, we are empty too
+                return *this;
+            }
+
+            // not empty: destroy what we have there
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            destroy();
+            init();
+            WHash::operator=(static_cast<const WHash&>(o));
+            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+            DataPool::operator=(static_cast<DataPool const&>(o));
+
+            return *this;
+        }
+
+        // clean up old stuff
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        if (mMask != o.mMask) {
+            // no luck: we don't have the same array size allocated, so we need to realloc.
+            if (0 != mMask) {
+                // only deallocate if we actually have data!
+                ROBIN_HOOD_LOG("std::free")
+                std::free(mKeyVals);
+            }
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+            ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+                                          << numElementsWithBuffer << ")")
+            mKeyVals = static_cast<Node*>(
+                detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
+
+            // no need for calloc here because cloneData performs a memcpy.
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            // sentinel is set in cloneData
+        }
+        WHash::operator=(static_cast<const WHash&>(o));
+        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+        DataPool::operator=(static_cast<DataPool const&>(o));
+        mNumElements = o.mNumElements;
+        mMask = o.mMask;
+        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+        mInfoInc = o.mInfoInc;
+        mInfoHashShift = o.mInfoHashShift;
+        cloneData(o);
+
+        return *this;
+    }
+
+    // Swaps everything between the two maps.
+    void swap(Table& o) {
+        ROBIN_HOOD_TRACE(this)
+        using std::swap;
+        swap(o, *this);
+    }
+
+    // Clears all data, without resizing.
+    void clear() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            // don't do anything! also important because we don't want to write to
+            // DummyInfoByte::b, even though we would just write 0 to it..
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+        // clear everything, then set the sentinel again
+        uint8_t const z = 0;
+        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // Destroys the map and all it's contents.
+    ~Table() {
+        ROBIN_HOOD_TRACE(this)
+        destroy();
+    }
+
+    // Checks if both tables contain the same entries. Order is irrelevant.
+    bool operator==(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        if (other.size() != size()) {
+            return false;
+        }
+        for (auto const& otherEntry : other) {
+            if (!has(otherEntry)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool operator!=(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        return !operator==(other);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        return doCreateByKey(key);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
+        ROBIN_HOOD_TRACE(this)
+        return doCreateByKey(std::move(key));
+    }
+
+    template <typename Iter>
+    void insert(Iter first, Iter last) {
+        for (; first != last; ++first) {
+            // value_type ctor needed because this might be called with std::pair's
+            insert(value_type(*first));
+        }
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        Node n{*this, std::forward<Args>(args)...};
+        auto r = doInsert(std::move(n));
+        if (!r.second) {
+            // insertion not possible: destroy node
+            // NOLINTNEXTLINE(bugprone-use-after-move)
+            n.destroy(*this);
+        }
+        return r;
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const_iterator hint, const key_type& key,
+                                          Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
+        return insert_or_assign_impl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
+        return insert_or_assign_impl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const_iterator hint, const key_type& key,
+                                               Mapped&& obj) {
+        (void)hint;
+        return insert_or_assign_impl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
+        (void)hint;
+        return insert_or_assign_impl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    std::pair<iterator, bool> insert(const value_type& keyval) {
+        ROBIN_HOOD_TRACE(this)
+        return doInsert(keyval);
+    }
+
+    std::pair<iterator, bool> insert(value_type&& keyval) {
+        return doInsert(std::move(keyval));
+    }
+
+    // Returns 1 if key is found, 0 otherwise.
+    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        return 1U == count(key);
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
+        return 1U == count(key);
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
+                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
+    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator find(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator begin() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return end();
+        }
+        return iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cbegin();
+    }
+    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return cend();
+        }
+        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+
+    iterator end() {
+        ROBIN_HOOD_TRACE(this)
+        // no need to supply valid info pointer: end() must not be dereferenced, and only node
+        // pointer is compared.
+        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cend();
+    }
+    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+
+    iterator erase(const_iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // its safe to perform const cast here
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
+    }
+
+    // Erases element at pos, returns iterator to the next element.
+    iterator erase(iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // we assume that pos always points to a valid entry, and not end().
+        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
+
+        shiftDown(idx);
+        --mNumElements;
+
+        if (*pos.mInfo) {
+            // we've backward shifted, return this again
+            return pos;
+        }
+
+        // no backward shift, return next element
+        return ++pos;
+    }
+
+    size_t erase(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        // check while info matches with the source idx
+        do {
+            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                shiftDown(idx);
+                --mNumElements;
+                return 1;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found to delete
+        return 0;
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // exactly the same as reserve(c).
+    void rehash(size_t c) {
+        // forces a reserve
+        reserve(c, true);
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
+    void reserve(size_t c) {
+        // reserve, but don't force rehash
+        reserve(c, false);
+    }
+
+    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return mNumElements;
+    }
+
+    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<size_type>(-1);
+    }
+
+    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return 0 == mNumElements;
+    }
+
+    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return MaxLoadFactor100 / 100.0F;
+    }
+
+    // Average number of elements per bucket. Since we allow only 1 per bucket
+    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return mMask;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
+        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
+            return maxElements * MaxLoadFactor100 / 100;
+        }
+
+        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
+        return (maxElements / 100) * MaxLoadFactor100;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
+        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
+        // 64bit types.
+        return numElements + sizeof(uint64_t);
+    }
+
+    ROBIN_HOOD(NODISCARD)
+    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
+        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
+        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
+    }
+
+    // calculation only allowed for 2^n values
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
+#if ROBIN_HOOD(BITNESS) == 64
+        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
+#else
+        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
+        auto const ne = static_cast<uint64_t>(numElements);
+        auto const s = static_cast<uint64_t>(sizeof(Node));
+        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
+
+        auto const total64 = ne * s + infos;
+        auto const total = static_cast<size_t>(total64);
+
+        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
+            throwOverflowError();
+        }
+        return total;
+#endif
+    }
+
+private:
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(e.first);
+        return it != end() && it->second == e.second;
+    }
+
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        return find(e) != end();
+    }
+
+    void reserve(size_t c, bool forceRehash) {
+        ROBIN_HOOD_TRACE(this)
+        auto const minElementsAllowed = (std::max)(c, mNumElements);
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+        // only actually do anything when the new size is bigger than the old one. This prevents to
+        // continuously allocate for each reserve() call.
+        if (forceRehash || newSize > mMask + 1) {
+            rehashPowerOfTwo(newSize);
+        }
+    }
+
+    // reserves space for at least the specified number of elements.
+    // only works if numBuckets if power of two
+    void rehashPowerOfTwo(size_t numBuckets) {
+        ROBIN_HOOD_TRACE(this)
+
+        Node* const oldKeyVals = mKeyVals;
+        uint8_t const* const oldInfo = mInfo;
+
+        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        // resize operation: move stuff
+        init_data(numBuckets);
+        if (oldMaxElementsWithBuffer > 1) {
+            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
+                if (oldInfo[i] != 0) {
+                    insert_move(std::move(oldKeyVals[i]));
+                    // destroy the node but DON'T destroy the data.
+                    oldKeyVals[i].~Node();
+                }
+            }
+
+            // this check is not necessary as it's guarded by the previous if, but it helps silence
+            // g++'s overeager "attempt to free a non-heap object 'map'
+            // [-Werror=free-nonheap-object]" warning.
+            if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+                // don't destroy old data: put it into the pool instead
+                DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+            }
+        }
+    }
+
+    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+        throw std::overflow_error("robin_hood::map overflow");
+#else
+        abort();
+#endif
+    }
+
+    template <typename OtherKey, typename... Args>
+    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(key);
+        if (it == end()) {
+            return emplace(std::piecewise_construct,
+                           std::forward_as_tuple(std::forward<OtherKey>(key)),
+                           std::forward_as_tuple(std::forward<Args>(args)...));
+        }
+        return {it, false};
+    }
+
+    template <typename OtherKey, typename Mapped>
+    std::pair<iterator, bool> insert_or_assign_impl(OtherKey&& key, Mapped&& obj) {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(key);
+        if (it == end()) {
+            return emplace(std::forward<OtherKey>(key), std::forward<Mapped>(obj));
+        }
+        it->second = std::forward<Mapped>(obj);
+        return {it, false};
+    }
+
+    void init_data(size_t max_elements) {
+        mNumElements = 0;
+        mMask = max_elements - 1;
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
+
+        // calloc also zeroes everything
+        auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+        ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
+                                      << numElementsWithBuffer << ")")
+        mKeyVals = reinterpret_cast<Node*>(
+            detail::assertNotNull<std::bad_alloc>(std::calloc(1, numBytesTotal)));
+        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+
+        // set sentinel
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    template <typename Arg, typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type doCreateByKey(Arg&& key) {
+        while (true) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(key, &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match. Can't do a do-while here because when mInfo is
+            // 0 we don't want to skip forward
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                    // key already exists, do not insert.
+                    return mKeyVals[idx].getSecond();
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                increase_size();
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            auto& l = mKeyVals[insertion_idx];
+            if (idx == insertion_idx) {
+                // put at empty spot. This forwards all arguments into the node where the object
+                // is constructed exactly where it is needed.
+                ::new (static_cast<void*>(&l))
+                    Node(*this, std::piecewise_construct,
+                         std::forward_as_tuple(std::forward<Arg>(key)), std::forward_as_tuple());
+            } else {
+                shiftUp(idx, insertion_idx);
+                l = Node(*this, std::piecewise_construct,
+                         std::forward_as_tuple(std::forward<Arg>(key)), std::forward_as_tuple());
+            }
+
+            // mKeyVals[idx].getFirst() = std::move(key);
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+
+            ++mNumElements;
+            return mKeyVals[insertion_idx].getSecond();
+        }
+    }
+
+    // This is exactly the same code as operator[], except for the return values
+    template <typename Arg>
+    std::pair<iterator, bool> doInsert(Arg&& keyval) {
+        while (true) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(getFirstConst(keyval), &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(getFirstConst(keyval), mKeyVals[idx].getFirst())) {
+                    // key already exists, do NOT insert.
+                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
+                    return std::make_pair<iterator, bool>(iterator(mKeyVals + idx, mInfo + idx),
+                                                          false);
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                increase_size();
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            auto& l = mKeyVals[insertion_idx];
+            if (idx == insertion_idx) {
+                ::new (static_cast<void*>(&l)) Node(*this, std::forward<Arg>(keyval));
+            } else {
+                shiftUp(idx, insertion_idx);
+                l = Node(*this, std::forward<Arg>(keyval));
+            }
+
+            // put at empty spot
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+
+            ++mNumElements;
+            return std::make_pair(iterator(mKeyVals + insertion_idx, mInfo + insertion_idx), true);
+        }
+    }
+
+    bool try_increase_info() {
+        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
+                                   << ", maxNumElementsAllowed="
+                                   << calcMaxNumElementsAllowed(mMask + 1))
+        if (mInfoInc <= 2) {
+            // need to be > 2 so that shift works (otherwise undefined behavior!)
+            return false;
+        }
+        // we got space left, try to make info smaller
+        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
+
+        // remove one bit of the hash, leaving more space for the distance info.
+        // This is extremely fast because we can operate on 8 bytes at once.
+        ++mInfoHashShift;
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
+            auto val = unaligned_load<uint64_t>(mInfo + i);
+            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
+            std::memcpy(mInfo + i, &val, sizeof(val));
+        }
+        // update sentinel, which might have been cleared out!
+        mInfo[numElementsWithBuffer] = 1;
+
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        return true;
+    }
+
+    void increase_size() {
+        // nothing allocated yet? just allocate InitialNumElements
+        if (0 == mMask) {
+            init_data(InitialNumElements);
+            return;
+        }
+
+        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
+            return;
+        }
+
+        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
+                                       << maxNumElementsAllowed << ", load="
+                                       << (static_cast<double>(mNumElements) * 100.0 /
+                                           (static_cast<double>(mMask) + 1)))
+        // it seems we have a really bad hash function! don't try to resize again
+        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
+            throwOverflowError();
+        }
+
+        rehashPowerOfTwo((mMask + 1) * 2);
+    }
+
+    void destroy() {
+        if (0 == mMask) {
+            // don't deallocate!
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
+            .nodesDoNotDeallocate(*this);
+
+        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
+        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
+        // reports a compile error: attempt to free a non-heap object 'fm'
+        // [-Werror=free-nonheap-object]
+        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+            ROBIN_HOOD_LOG("std::free")
+            std::free(mKeyVals);
+        }
+    }
+
+    void init() noexcept {
+        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
+        mInfo = reinterpret_cast<uint8_t*>(&mMask);
+        mNumElements = 0;
+        mMask = 0;
+        mMaxNumElementsAllowed = 0;
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // members are sorted so no padding occurs
+    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte  8
+    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 16
+    size_t mNumElements = 0;                                                // 8 byte 24
+    size_t mMask = 0;                                                       // 8 byte 32
+    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 40
+    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 44
+    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 48
+                                                    // 16 byte 56 if NodeAllocator
+};
+
+} // namespace detail
+
+// map
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_map =
+    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
+                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
+                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
+                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+// set
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
+                                        std::is_nothrow_move_constructible<Key>::value &&
+                                        std::is_nothrow_move_assignable<Key>::value,
+                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+} // namespace robin_hood
+
+#endif


=====================================
src/mash/taxdb.hpp
=====================================
@@ -0,0 +1,304 @@
+#ifndef TAXD_DB_H_
+#define TAXD_DB_H_
+
+// Florian Breitiweser
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <unordered_set>
+#include <iomanip>
+#include <sstream>
+#include <stdexcept>
+
+
+using TaxID = uint64_t;
+
+using std::vector;
+using std::string;
+
+namespace mash {
+
+class TaxEntry {
+  public:
+    TaxID taxID;
+    TaxEntry* parent;
+    vector<TaxEntry*> children;
+
+    string rank;
+    string name; // scientific name
+
+    TaxEntry() : taxID(0), parent(NULL) {}
+
+    TaxEntry(TaxID taxID, string rank) : taxID(taxID), rank(rank) {}
+};
+
+struct TaxCounts {
+  uint64_t cladeCount = 0;
+  uint64_t taxCount = 0;
+  uint64_t taxHashCount = 0;
+  uint64_t cladeHashCount = 0;
+  vector<TaxID> children;
+};
+
+class TaxDB {
+  public:
+    TaxDB(const string namesDumpFileName, const string nodesDumpFileName);
+    TaxDB(const string inFileName);
+    TaxDB();
+
+    void writeTaxIndex(std::ostream & outs) const;
+    void readTaxIndex(const string inFileName);
+
+    TaxID getLowestCommonAncestor(TaxID a, TaxID b) const;
+    string getLineage(TaxID taxID) const;
+    string getMetaPhlAnLineage(TaxID taxID) const;
+    TaxEntry const * getEntry(TaxID taxID) const;
+
+    unordered_map<TaxID, TaxEntry> entries;
+
+    void writeReport(FILE* FP, const unordered_map<TaxID, TaxCounts> & counts, 
+                     unsigned long totalCounts, 
+                     unsigned long totalHashCounts, 
+                     TaxID taxID = 0, int depth = 0);
+
+  private:
+    unordered_map<TaxEntry*, TaxID> parseNodesDump(const string nodesDumpFile);
+    void parseNamesDump(const string namesDumpFile);
+};
+
+TaxEntry const * TaxDB::getEntry(TaxID taxID) const {
+  auto it = entries.find(taxID);
+  if (it == entries.end()) {
+    cerr << "Couldn't find tax entry with taxID " << taxID << endl;
+    return NULL;
+  } else {
+    return &it->second;
+  }
+}
+
+
+TaxDB::TaxDB(const string namesDumpFileName, const string nodesDumpFileName) {
+  unordered_map<TaxEntry*, TaxID> parentMap = parseNodesDump(nodesDumpFileName);
+
+  // set parent links correctly
+  for (auto const & c : parentMap) {
+    if (c.first->taxID != c.second) {
+      auto p = entries.find(c.second);
+      if (p == entries.end()) {
+         cerr << "Could not find parent with tax ID " << c.second << " for tax ID " << c.first->taxID << endl;
+      } else {
+        c.first->parent = &p->second;
+      }
+    } else {
+      c.first->parent = NULL;
+    }
+  }
+  parseNamesDump(namesDumpFileName);
+  cerr << "   " << entries.size() << " distinct taxa\n";
+}
+
+unordered_map<TaxEntry*,TaxID> TaxDB::parseNodesDump(const string nodesDumpFileName) {
+  std::ifstream nodesDumpFile(nodesDumpFileName);
+  if (!nodesDumpFile.is_open())
+    throw std::runtime_error("unable to open nodes file");
+
+  string line;
+  TaxID taxID;
+  TaxID parentTaxID;
+  string rank;
+  char delim;
+  unordered_map<TaxEntry*,TaxID> parentMap;
+
+  while (nodesDumpFile >> taxID >> delim >> parentTaxID >> delim) {
+    nodesDumpFile.ignore(1);
+    getline(nodesDumpFile, rank, '\t');
+    // TODO: Insert
+    //auto res = entries.insert(taxID, TaxEntry(taxID, rank));
+    auto res = entries.emplace(taxID, TaxEntry(taxID, rank));
+    parentMap.emplace(&res.first->second, parentTaxID);
+    nodesDumpFile.ignore(2560, '\n');
+  }
+  return parentMap;
+}
+
+void TaxDB::parseNamesDump(const string namesDumpFileName) {
+  std::ifstream namesDumpFile(namesDumpFileName);
+  if (!namesDumpFile.is_open())
+    throw std::runtime_error("unable to open names file");
+  string line;
+
+  TaxID taxID;
+  string name, type;
+  char delim;
+  while (namesDumpFile >> taxID) {
+    namesDumpFile.ignore(3);
+    getline(namesDumpFile, name, '\t');
+    namesDumpFile.ignore(3);
+    namesDumpFile.ignore(256, '|');
+    namesDumpFile.ignore(1);
+    getline(namesDumpFile, type, '\t');
+
+    if (type == "scientific name") {
+      auto entryIt = entries.find(taxID);
+      if (entryIt == entries.end()) {
+        cerr << "Entry for " << taxID << " does not exist - it should!" << '\n';
+      } else {
+        entryIt->second.name = name;
+      }
+    }
+    namesDumpFile.ignore(2560, '\n');
+  }
+}
+
+TaxID TaxDB::getLowestCommonAncestor(TaxID a, TaxID b) const {
+  if (b == 0) { return a; }
+  if (a == 0) { return b; } 
+
+  // create a path from a to the root
+  unordered_set<TaxEntry const *> a_path;
+  std::unordered_map<TaxID, TaxEntry>::const_iterator ta = entries.find(a);
+  if (ta == entries.end()) {
+    cerr << "TaxID " << a << " not in database - ignoring it.\n";
+    return 1;
+  }
+
+  std::unordered_map<TaxID, TaxEntry>::const_iterator tb = entries.find(b);
+  if (tb == entries.end()) {
+    cerr << "TaxID " << b << " not in database - ignoring it.\n";
+    return 1;
+  }
+  TaxEntry const * pta = &(ta->second);
+  while (pta != NULL && pta->taxID > 1 && pta->parent != NULL) {
+    if (pta->taxID == b) { return b; }
+    a_path.insert(pta);
+    pta = pta->parent;
+  }
+  TaxEntry const * ptb = &(tb->second);
+  // search for b in the path from a to the root
+  while (ptb->taxID > 0 && ptb->parent != NULL) {
+    if (a_path.count(ptb)) {
+      return ptb->taxID;
+    }
+    ptb = ptb->parent;
+  }
+  return 1;
+}
+
+void TaxDB::writeReport(FILE* FP,
+			const unordered_map<TaxID, TaxCounts> & counts,
+			unsigned long totalCounts,
+			unsigned long totalHashCounts,
+			TaxID taxID, int depth) {
+
+	unordered_map<TaxID, TaxCounts>::const_iterator it = counts.find(taxID);
+	unsigned int cladeCount = it == counts.end()? 0 : it->second.cladeCount;
+	unsigned int cladeHashCount = it == counts.end()? 0 : it->second.cladeHashCount;
+	unsigned int taxCount = it == counts.end()? 0 : it->second.taxCount;
+	unsigned int taxHashCount = it == counts.end()? 0 : it->second.taxHashCount;
+	if (taxID == 0) {
+    // TODO: Write header?
+    // identity, shared-hashes, median-multiplicity, p-value, query-ID, query-comment
+    fprintf(FP, "%%\thashes\ttaxHashes\thashesDB\ttaxHashesDB\ttaxID\trank\tname\n");
+		if (cladeCount > 0) { // Should not happen
+			fprintf(FP, "%.4f\t%i\t%i\tno rank\t0\tunclassified\n",
+					100 * cladeCount / double(totalCounts),
+					cladeCount, taxCount);
+		}
+	  writeReport(FP, counts, totalCounts, totalHashCounts, 1, 0);
+	} else {
+		if (cladeCount == 0) {
+			return;
+		}
+		TaxEntry const * taxon = getEntry(taxID);
+		fprintf(FP, "%.4f\t%i\t%i\t%i\t%i\t%s\t%llu\t%s%s\n",
+				100*cladeCount/double(totalCounts), 
+        cladeCount, 
+        taxCount, 
+        cladeHashCount,
+        taxHashCount,
+				taxon->rank.c_str(), taxID, std::string(2*depth, ' ').c_str(), taxon->name.c_str());
+
+		std::vector<TaxID> children = it->second.children;
+		std::sort(children.begin(), children.end(), [&](int a, int b) { return counts.at(a).cladeCount > counts.at(b).cladeCount; });
+		for (TaxID childTaxId : children) {
+			if (counts.count(childTaxId)) {
+				writeReport(FP, counts, totalCounts, totalHashCounts, childTaxId, depth + 1);
+			} else {
+				break;
+			}
+		}
+	}
+}
+
+/*
+string TaxDB::getLineage(TaxEntry tax) const {
+  string lineage;
+  while (true) {
+    // 131567 = Cellular organisms
+    if (taxID != 131567) {
+      if (lineage.size()) lineage.insert(0, "; ");
+      lineage.insert(0, getScientificName(taxID));
+      if (getRank(taxID) == "species") lineage.clear();
+    }
+    taxID = getParentTaxID(taxID);
+    if (taxID == 0) {
+      if (lineage.size()) lineage.append(".");
+      break;
+    }
+  }
+  return lineage;
+}
+
+string TaxDB::getMetaPhlAnLineage(TaxID taxID) const {
+  string rank = getRank(taxID);
+  if (rank == "superphylum") return string();
+  string lineage;
+  while (true) {
+    // 131567 = Cellular organisms
+    //if (taxID != 131567) {
+      string rank = getRank(taxID);
+      if (rank == "species") {
+  lineage.insert(0, "|s__");
+  lineage.insert(4, getScientificName(taxID));
+      } else if (rank == "genus") {
+  lineage.insert(0, "|g__");
+  lineage.insert(4, getScientificName(taxID));
+      } else if (rank == "family") {
+  lineage.insert(0, "|f__");
+  lineage.insert(4, getScientificName(taxID));
+      } else if (rank == "order") {
+  lineage.insert(0, "|o__");
+  lineage.insert(4, getScientificName(taxID));
+      } else if (rank == "class") {
+  lineage.insert(0, "|c__");
+  lineage.insert(4, getScientificName(taxID));
+      } else if (rank == "phylum") {
+  lineage.insert(0, "|p__");
+  lineage.insert(4, getScientificName(taxID));
+      } else if (rank == "superkingdom") {
+  lineage.insert(0, "|k__");
+  lineage.insert(4, getScientificName(taxID));
+      } else {
+  lineage.insert(0, "|-__");
+  lineage.insert(4, getScientificName(taxID));
+
+	 // }
+    }
+    taxID = getParentTaxID(taxID);
+    if (taxID == 0) {
+      break;
+    }
+  }
+  std::replace(lineage.begin(), lineage.end(), ' ', '_');
+  return lineage;
+}
+*/
+
+}
+
+#endif /* TAXD_DB_H_ */


=====================================
src/mash/version.h
=====================================
@@ -4,4 +4,4 @@
 //
 // See the LICENSE.txt file included with this software for license information.
 
-static const char * version = "2.2.2";
+static const char * version = "2.3";



View it on GitLab: https://salsa.debian.org/med-team/mash/-/commit/87a2ce0e244dbd3c613c96e166702ae0d09b8da4

-- 
View it on GitLab: https://salsa.debian.org/med-team/mash/-/commit/87a2ce0e244dbd3c613c96e166702ae0d09b8da4
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211012/0d6df7c9/attachment-0001.htm>


More information about the debian-med-commit mailing list