[med-svn] [salmon] 01/01: Imported Upstream version 0.6.0+ds1

Sun Feb 7 12:00:30 UTC 2016

This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch upstream
in repository salmon.

commit 9a33d6a9cfc94dce059f06e7df382d075eb3d5cd
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date:   Sat Jan 30 09:57:29 2016 -0800

    Imported Upstream version 0.6.0+ds1
---
 .gitignore                                    |    3 +
 CMakeLists.txt                                |  172 +-
 README.md                                     |    2 +-
 cmake/TestSalmon.cmake                        |   35 -
 cmake/TestSalmonFMD.cmake                     |   33 +
 cmake/TestSalmonQuasi.cmake                   |   24 +
 cmake/UnitTests.cmake                         |   10 +
 doc/source/conf.py                            |    5 +-
 doc/source/salmon.rst                         |  179 +-
 external/.gitignore                           |   28 -
 include/AlignmentGroup.hpp                    |    2 +-
 include/AlignmentLibrary.hpp                  |   70 +-
 include/BAMQueue.hpp                          |    9 +-
 include/BAMQueue.tpp                          |   23 +-
 include/BWAMemStaticFuncs.hpp                 |  120 +
 include/BWAUtils.hpp                          |   34 +
 include/BootstrapWriter.hpp                   |   15 +
 include/ClusterForest.hpp                     |    5 +-
 include/CollapsedEMOptimizer.hpp              |   11 +
 include/CollapsedGibbsSampler.hpp             |    5 +-
 include/EquivalenceClassBuilder.hpp           |   94 +-
 include/ForgettingMassCalculator.hpp          |    2 +-
 include/FragmentLengthDistribution.hpp        |    9 +
 include/FragmentStartPositionDistribution.hpp |   25 +-
 include/GZipWriter.hpp                        |   52 +
 include/IndexVersionInfo.hpp                  |   11 +
 include/KmerIntervalMap.hpp                   |  131 +
 include/LibraryFormat.hpp                     |   14 +-
 include/LightweightAlignmentDefs.hpp          | 1470 ++++
 include/MultinomialSampler.hpp                |  104 +-
 include/PCA.hpp                               |    8 +-
 include/PairSequenceParser.hpp                |   10 +-
 include/ReadExperiment.hpp                    |  337 +-
 include/ReadKmerDist.hpp                      |   76 +
 include/ReadLibrary.hpp                       |    4 +-
 include/ReadPair.hpp                          |   25 +-
 include/SalmonConfig.hpp                      |    7 +-
 include/SalmonIndex.hpp                       |  321 +
 include/SalmonIndexVersionInfo.hpp            |   80 +
 include/SalmonMath.hpp                        |   28 +
 include/SalmonOpts.hpp                        |   33 +-
 include/{SpinLock.hpp => SalmonSpinLock.hpp}  |    6 +-
 include/SalmonUtils.hpp                       |   44 +-
 include/Sampler.hpp                           |   33 +-
 include/TextBootstrapWriter.hpp               |  101 +
 include/Transcript.hpp                        |  146 +-
 include/TranscriptCluster.hpp                 |    2 +-
 include/UnpairedRead.hpp                      |   10 +
 include/UtilityFunctions.hpp                  |  148 +
 include/blockingconcurrentqueue.h             |  760 ++
 include/concurrentqueue.h                     | 1121 +--
 include/cuckoohash_config.h                   |   16 -
 include/cuckoohash_config.hh                  |   28 +
 include/cuckoohash_map.hh                     | 2190 +++---
 include/cuckoohash_util.h                     |   23 -
 include/cuckoohash_util.hh                    |   88 +
 include/default_hasher.hh                     |   29 +
 include/kseq.h                                |  235 -
 include/posix.h                               |  344 -
 include/tensemble/BaseForest.h                |  513 --
 include/tensemble/BaseGBM.h                   |  426 --
 include/tensemble/ClassificationCriterion.h   |  201 -
 include/tensemble/Criterion.h                 |  163 -
 include/tensemble/Estimator.h                 |   79 -
 include/tensemble/EvaluateMetric.h            |   83 -
 include/tensemble/FeatureData.h               |  227 -
 include/tensemble/GBMClassifier.h             |  400 --
 include/tensemble/GBMRegressor.h              |  242 -
 include/tensemble/LossFunction.h              |  350 -
 include/tensemble/RandomForestClassifier.h    |  269 -
 include/tensemble/RandomForestRegressor.h     |  279 -
 include/tensemble/RandomGenerator.h           |   33 -
 include/tensemble/ReadData.h                  |  163 -
 include/tensemble/Tree.h                      |  731 --
 include/tensemble/Tree.h.backup.h             |  391 -
 include/tensemble/TreeClassifier.h            |   74 -
 include/tensemble/TreeNode.h                  |   41 -
 include/tensemble/TreeRegressor.h             |   66 -
 include/tensemble/TypeDef.h                   |   39 -
 include/tensemble/cmdline.h                   |  160 -
 include/xxhash.h                              |  196 +
 scripts/cpld.bash                             |   35 -
 scripts/fetchRapMap.sh                        |   36 +
 src/BWAUtils.cpp                              |  146 +
 src/BiasCorrectionDriver.cpp                  |   52 -
 src/BuildSalmonIndex.cpp                      |  197 +-
 src/CMakeLists.txt                            |  187 +-
 src/CollapsedEMOptimizer.cpp                  |  693 +-
 src/CollapsedGibbsSampler.cpp                 |  229 +-
 src/ComputeBiasFeatures.cpp                   |  225 -
 src/FASTAParser.cpp                           |   38 +-
 src/FragmentLengthDistribution.cpp            |   23 +-
 src/FragmentStartPositionDistribution.cpp     |  128 +-
 src/GZipWriter.cpp                            |  253 +
 src/PerformBiasCorrection.cpp                 |  349 -
 src/PerformBiasCorrection_old.cpp             |  420 --
 src/Salmon.cpp                                |    2 +-
 src/SalmonQuantify.cpp                        | 3229 +++------
 src/SalmonQuantifyAlignments.cpp              |  608 +-
 src/SalmonUtils.cpp                           |  600 +-
 src/SequenceBiasModel.cpp                     |    2 +-
 src/TranscriptGroup.cpp                       |    7 +-
 src/cokus.cpp                                 |  196 -
 src/posix.cc                                  |  252 -
 src/xxhash.c                                  |  915 +++
 tests/KmerHistTests.cpp                       |  115 +
 tests/LibraryTypeTests.cpp                    |  166 +
 tests/UnitTests.cpp                           |   11 +
 tests/catch.hpp                               | 9416 +++++++++++++++++++++++++
 109 files changed, 20314 insertions(+), 12292 deletions(-)

diff --git a/.gitignore b/.gitignore
index b8bd026..1162b32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,6 @@
 *.exe
 *.out
 *.app
+
+# Emacs save files
+*~
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c95f755..598602b 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,29 +4,29 @@ enable_testing()
 
 project (Salmon)
 
-set(CPACK_PACKAGE_VERSION "0.4.2")
+set(CPACK_PACKAGE_VERSION "0.6.0")
 set(CPACK_PACKAGE_VERSION_MAJOR "0")
-set(CPACK_PACKAGE_VERSION_MINOR 4")
-set(CPACK_PACKAGE_VERSION_PATCH 2")
+set(CPACK_PACKAGE_VERSION_MINOR "6")
+set(CPACK_PACKAGE_VERSION_PATCH "0")
 set(CPACK_GENERATOR "TGZ")
 set(CPACK_SOURCE_GENERATOR "TGZ")
-set(CPACK_PACKAGE_VENDOR Stony Brook University")
-set(CPACK_PACKAGE_DESCRIPTION_SUMMARY Salmon - Wicked-fast RNA-seq isoform quantification using lightweight alignments")
-set(CPACK_PACKAGE_NAME 
+set(CPACK_PACKAGE_VENDOR "Stony Brook University")
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Salmon - Wicked-fast RNA-seq isoform quantification using lightweight mapping")
+set(CPACK_PACKAGE_NAME
   "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
 set(CPACK_SOURCE_PACKAGE_FILE_NAME
   "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-Source")
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
 
-set (WARNING_IGNORE_FLAGS "-Wno-deprecated-register") 
+set (WARNING_IGNORE_FLAGS "-Wno-deprecated-register")
 set (BOOST_CXX_FLAGS "-Wno-deprecated-register -std=c++11")
 ## Prefer static to dynamic libraries
 SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
 
 ## Set the standard required compile flags
-# Nov 18th --- removed -DHAVE_CONFIG_H 
-set (CMAKE_CXX_FLAGS "-pthread -funroll-loops -fPIC -fomit-frame-pointer -Ofast -DHAVE_ANSI_TERM -DHAVE_SSTREAM -Wall -std=c++11 -Wreturn-type -Werror=return-type")
+# Nov 18th --- removed -DHAVE_CONFIG_H
+set (CMAKE_CXX_FLAGS "-pthread -funroll-loops -fPIC -fomit-frame-pointer -Ofast -DRAPMAP_SALMON_SUPPORT -DHAVE_ANSI_TERM -DHAVE_SSTREAM -Wall -Wno-reorder -Wno-unused-variable -std=c++11 -Wreturn-type -Werror=return-type")
 
 ##
 # OSX is strange (some might say, stupid in this regard).  Deal with it's quirkines here.
@@ -46,7 +46,7 @@ else()
 endif()
 
 set( BOOST_EXTRA_FLAGS "--layout=tagged" )
-## this get's set differently below if we 
+## this get's set differently below if we
 ## are on clang & apple
 set (NON_APPLECLANG_LIBS gomp rt)
 set (PTHREAD_LIB)
@@ -70,7 +70,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
     endif ()
 
     set (GCC TRUE)
-    
+
     # Put complete static linking on hold for the time-being
     # If we're not on OSX, make an attempt to compile everything statically
     #if (NOT APPLE)
@@ -79,9 +79,9 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
     set (PTHREAD_LIB "pthread")
     #endif()
 
-    # If we're on Linux (i.e. not OSX) and we're using 
+    # If we're on Linux (i.e. not OSX) and we're using
     # gcc, then set the -static-libstdc++ flag
-    if (NOT APPLE) 
+    if (NOT APPLE)
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++")
     endif()
 
@@ -93,7 +93,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
 # Tentatively, we support clang now
 elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     set(CLANG TRUE)
-    # If we have libc++, then try and use it 
+    # If we have libc++, then try and use it
     include(CheckCXXCompilerFlag)
     check_cxx_compiler_flag(-stdlib=libc++ HAVE_LIBCPP)
     if (HAVE_LIBCPP)
@@ -104,14 +104,14 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
 	    set (BCXX_FLAGS "-stdlib=libc++ -DBOOST_HAS_INT128")
 	    set (BOOST_EXTRA_FLAGS toolset=clang cxxflags=${BCXX_FLAGS} linkflags="-stdlib=libc++")
         set (JELLYFISH_CXX_FLAGS "-stdlib=libc++")
-    # Otherwise, use libstdc++ (and make it static) 
+    # Otherwise, use libstdc++ (and make it static)
     else()
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++")
     endif()
     # There's currently a bug with clang-3.4 & Boost 1.55 -- this hack fixes it
     # but we should do something better (does this break things if CPU doesn't
     # have 128-bit support)?
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_HAS_INT128") 
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_HAS_INT128")
 
     if (APPLE)
         set (NON_APPLECLANG_LIBS "")
@@ -123,7 +123,7 @@ else ()
 endif ()
 
 ## TODO: Figure out how to detect this automatically
-# If the "assembler" is too old, tell TBB not to compile 
+# If the "assembler" is too old, tell TBB not to compile
 # with -mrtm
 if (NO_RTM)
     set (TBB_CXXFLAGS "-mno-rtm")
@@ -154,6 +154,16 @@ set (GAT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 # Have CMake tell us what it's doing
 set (CMAKE_VERBOSE_MAKEFILE true)
 
+###
+#
+#  Grab RapMap sources for quasi-mapping code --- DURING CONFIGURE TIME!
+#
+####
+if(NOT FETCHED_RAPMAP)
+  exec_program(${CMAKE_CURRENT_SOURCE_DIR}/scripts/fetchRapMap.sh)
+  set(FETCHED_RAPMAP TRUE CACHE BOOL "Has RapMap been fetched?" FORCE)
+endif()
+
 ##
 # Super-secret override
 ##
@@ -180,11 +190,11 @@ if (NOT LIBLZMA_FOUND)
     message ("=======================================")
 ExternalProject_Add(liblzma
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    URL http://tukaani.org/xz/xz-5.2.0.tar.gz 
-    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/xz-5.2.0
+    URL http://tukaani.org/xz/xz-5.2.2.tar.gz
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/xz-5.2.2
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     BUILD_IN_SOURCE TRUE
-    CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/external/xz-5.2.0/configure --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
+    CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/external/xz-5.2.2/configure --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
     BUILD_COMMAND make
     INSTALL_COMMAND make install
 )
@@ -242,16 +252,16 @@ if (BOOST_RECONFIGURE)
     unset(Boost_FOUND CACHE)
     unset(Boost_INCLUDE_DIR CACHE)
     unset(Boost_INCLUDE_DIRS CACHE)
-    unset(Boost_LIBRARY_DIRS CACHE) 
+    unset(Boost_LIBRARY_DIRS CACHE)
     unset(Boost_LIBRARIES CACHE)
     unset(BOOST_ROOT CACHE)
     unset(CMAKE_PREFIX_PATH CACHE)
-    
+
     set(BOOST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
     set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
     set(Boost_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
     set(Boost_LIBRARY_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib)
-    find_package(Boost 1.53.0 COMPONENTS filesystem system thread timer chrono program_options REQUIRED)
+    find_package(Boost 1.53.0 COMPONENTS iostreams filesystem system thread timer chrono program_options REQUIRED)
     set(FETCH_BOOST FALSE)
 endif()
 
@@ -262,7 +272,7 @@ endif()
 # option, go and grab it for them.
 ##
 if ((NOT Boost_FOUND) AND (NOT FETCH_BOOST))
-	message(FATAL_ERROR 
+	message(FATAL_ERROR
         "Salmon cannot be compiled without Boost.\n"
         "It is recommended to visit http://www.boost.org/ and install Boost according to those instructions.\n"
         "This build system can also download and install a local version of boost for you (this takes a lot of time).\n"
@@ -270,8 +280,8 @@ if ((NOT Boost_FOUND) AND (NOT FETCH_BOOST))
     )
 elseif(FETCH_BOOST)
     ## Let the rest of the build process know we're going to be fetching boost
-    set (BOOST_LIB_SUBSET --with-atomic --with-chrono --with-container --with-date_time --with-exception 
-                          --with-filesystem --with-graph --with-graph_parallel --with-math 
+    set (BOOST_LIB_SUBSET --with-iostreams --with-atomic --with-chrono --with-container --with-date_time --with-exception
+                          --with-filesystem --with-graph --with-graph_parallel --with-math
                           --with-program_options --with-system --with-thread
                           --with-timer)
     set (BOOST_WILL_RECONFIGURE TRUE)
@@ -280,20 +290,20 @@ elseif(FETCH_BOOST)
     message("==================================================================")
     ExternalProject_Add(libboost
         DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-        DOWNLOAD_COMMAND curl -k -L http://fossies.org/linux/misc/boost_1_57_0.tar.gz -o boost_1_57_0.tar.gz && 
-                tar xzf boost_1_57_0.tar.gz
+        DOWNLOAD_COMMAND curl -k -L  http://downloads.sourceforge.net/project/boost/boost/1.59.0/boost_1_59_0.tar.gz -o boost_1_59_0.tar.gz &&
+                tar xzf boost_1_59_0.tar.gz
         #URL http://downloads.sourceforge.net/project/boost/boost/1.57.0/boost_1_57_0.tar.gz
-        SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_57_0
+        SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_59_0
         INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
         #PATCH_COMMAND patch -p2 < ${CMAKE_CURRENT_SOURCE_DIR}/external/boost156.patch
-	CONFIGURE_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_57_0/bootstrap.sh ${BOOST_CONFIGURE_TOOLSET} ${BOOST_BUILD_LIBS} --prefix=<INSTALL_DIR>
-	BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_57_0/b2 -d0 -j2 ${BOOST_LIB_SUBSET} toolset=${BOOST_TOOLSET} ${BOOST_EXTRA_FLAGS} cxxflags=${BOOST_CXX_FLAGS} install
+	CONFIGURE_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_59_0/bootstrap.sh ${BOOST_CONFIGURE_TOOLSET} ${BOOST_BUILD_LIBS} --prefix=<INSTALL_DIR>
+	BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_59_0/b2 -d0 -j2 ${BOOST_LIB_SUBSET} toolset=${BOOST_TOOLSET} ${BOOST_EXTRA_FLAGS} cxxflags=${BOOST_CXX_FLAGS} install
         BUILD_IN_SOURCE 1
         INSTALL_COMMAND ""
     )
 
     ##
-    # After we've installed boost, 
+    # After we've installed boost,
     ##
     SET( RECONFIG_FLAGS ${RECONFIG_FLAGS} -DBOOST_WILL_RECONFIGURE=FALSE -DBOOST_RECONFIGURE=TRUE -DFETCH_BOOST=FALSE)
     ExternalProject_Add_Step(libboost reconfigure
@@ -310,7 +320,7 @@ if (BOOST_WILL_RECONFIGURE)
     message("Setting Temporary Boost paths")
     set(Boost_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
     set(Boost_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
-    set(Boost_LIBRARY_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib)        
+    set(Boost_LIBRARY_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib)
     set(Boost_FOUND TRUE)
 endif()
 
@@ -319,19 +329,34 @@ message("BOOST INCLUDE DIR = ${Boost_INCLUDE_DIR}")
 message("BOOST INCLUDE DIRS = ${Boost_INCLUDE_DIRS}")
 message("BOOST LIB DIR = ${Boost_LIBRARY_DIRS}")
 message("BOOST LIBRAREIS = ${Boost_LIBRARIES}")
-	
+
 set(EXTERNAL_LIBRARY_PATH $CMAKE_CURRENT_SOURCE_DIR/lib)
 
+message("Build system will build libdivsufsort")
+message("==================================================================")
+include(ExternalProject)
+ExternalProject_Add(libdivsufsort
+    DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
+    URL ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort.zip
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort-master
+    INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
+    UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
+    BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort-master/build
+    CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR> -DBUILD_DIVSUFSORT64=TRUE -DUSE_OPENMP=TRUE -DBUILD_SHARED_LIBS=FALSE
+)
+set(SUFFARRAY_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
+
 message("Build system will fetch and build the Cereal serialization library")
 message("==================================================================")
 include(ExternalProject)
 ExternalProject_Add(libcereal
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    URL http://www.cs.cmu.edu/~robp/files/cereal-v1.0.0.tgz
-    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.0.0
+    DOWNLOAD_COMMAND curl -k -L https://github.com/USCiLab/cereal/archive/v1.1.2.tar.gz -o cereal-v1.1.2.tar.gz &&
+		tar -xzvf cereal-v1.1.2.tar.gz
+	SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.1.2
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
-    BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.0.0/build
+    BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.1.2/build
     CONFIGURE_COMMAND ""
     BUILD_COMMAND ""
     INSTALL_COMMAND sh -c "mkdir -p <INSTALL_DIR>/include && cp -r <SOURCE_DIR>/include/cereal <INSTALL_DIR>/include"
@@ -343,8 +368,8 @@ include(ExternalProject)
 ExternalProject_Add(libbwa
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
     DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/bwa/archive/0.7.12.3.tar.gz -o bwa-master.tar.gz &&
-                     mkdir -p bwa-master && 
-                     tar -xzvf bwa-master.tar.gz --strip-components=1 -C bwa-master 
+                     mkdir -p bwa-master &&
+                     tar -xzvf bwa-master.tar.gz --strip-components=1 -C bwa-master
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/bwa-master
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     CONFIGURE_COMMAND ""
@@ -357,14 +382,16 @@ message("Build system will fetch and build Jellyfish")
 message("==================================================================")
 ExternalProject_Add(libjellyfish
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    URL ftp://ftp.genome.umd.edu/pub/jellyfish/jellyfish-2.1.3.tar.gz
-    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.1.3
+    DOWNLOAD_COMMAND curl -k -L https://github.com/gmarcais/Jellyfish/releases/download/v2.2.3/jellyfish-2.2.3.tar.gz -o jellyfish-2.2.3.tgz &&
+        rm -fr jellyfish-2.2.3 &&
+        tar -xzvf jellyfish-2.2.3.tgz
+   SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.2.3
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
-    CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.1.3/configure --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${JELLYFISH_CXX_FLAGS}
+    CONFIGURE_COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/external/jellyfish-2.2.3/configure --prefix=<INSTALL_DIR> CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${JELLYFISH_CXX_FLAGS}
     BUILD_COMMAND ${MAKE} CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${JELLYFISH_CXX_FLAGS}
     BUILD_IN_SOURCE 1
-    INSTALL_COMMAND make install && 
-                    cp config.h <INSTALL_DIR>/include/jellyfish-2.1.3/jellyfish/ &&
+    INSTALL_COMMAND make install &&
+                    cp config.h <INSTALL_DIR>/include/jellyfish-2.2.3/jellyfish/ &&
                     cp config.h <INSTALL_DIR>/include/
 )
 
@@ -391,10 +418,10 @@ message("==================================================================")
 set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb43_20140724oss)
 set(TBB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
 
-if ("${TBB_COMPILER}" STREQUAL "gcc") 
+if ("${TBB_COMPILER}" STREQUAL "gcc")
     ## Don't know why it's a problem yet, but if we're using
     ## GCC, get rid of the DO_ITT_NOTIFY flag
-    set(TBB_CXXFLAGS "${TBB_CXXFLAGS} -UDO_ITT_NOTIFY")  
+    set(TBB_CXXFLAGS "${TBB_CXXFLAGS} -UDO_ITT_NOTIFY")
 endif()
 
 ExternalProject_Add(libtbb
@@ -406,7 +433,7 @@ ExternalProject_Add(libtbb
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     PATCH_COMMAND "${TBB_PATCH_STEP}"
     CONFIGURE_COMMAND ""
-    BUILD_COMMAND make CXXFLAGS=${TBB_CXXFLAGS} lambdas=1 compiler=${TBB_COMPILER} cfg=release tbb_build_prefix=LIBS 
+    BUILD_COMMAND make CXXFLAGS=${TBB_CXXFLAGS} lambdas=1 compiler=${TBB_COMPILER} cfg=release tbb_build_prefix=LIBS
     INSTALL_COMMAND sh -c "cp ${TBB_SOURCE_DIR}/build/LIBS_release/*.${SHARED_LIB_EXTENSION}* ${TBB_INSTALL_DIR}/lib && cp -r ${TBB_SOURCE_DIR}/include/* ${TBB_INSTALL_DIR}/include"
     BUILD_IN_SOURCE 1
 )
@@ -453,7 +480,10 @@ message("Build system will compile libgff")
 message("==================================================================")
 ExternalProject_Add(libgff
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    URL http://www.cs.cmu.edu/~robp/files/libgff.tgz
+    DOWNLOAD_COMMAND curl -k -L https://github.com/Kingsford-Group/libgff/archive/v1.0.tar.gz -o libgff.tgz &&
+    tar -xzvf libgff.tgz &&
+    rm -fr libgff &&
+    mv libgff-1.0 libgff
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libgff
     UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
@@ -461,9 +491,17 @@ ExternalProject_Add(libgff
     CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_CURRENT_SOURCE_DIR}/external/install
 )
 
+# Because of the way that Apple has changed SIP 
+# in el capitan, some headers may be in a new location
+if (APPLE)
+    set(STADEN_INC "-I/usr/local/include")
+    set(STADEN_LIB "-L/usr/local/lib")
+endif()
+
+
 message("Build system will compile Staden IOLib")
 message("==================================================================")
-ExternalProject_Add(libstadenio 
+ExternalProject_Add(libstadenio
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
     DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/releases/download/v1.13.10/io_lib-1.13.10.tar.gz -o staden-io_lib-v1.13.10.tar.gz &&
                      mkdir -p staden-io_lib-1.13.10 &&
@@ -473,7 +511,8 @@ ExternalProject_Add(libstadenio
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/staden-io_lib
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     CONFIGURE_COMMAND ./configure --enable-shared=no --without-libcurl --prefix=<INSTALL_DIR> LDFLAGS=${LIBSTADEN_LDFLAGS} CFLAGS=${LIBSTADEN_CFLAGS} CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
-    BUILD_COMMAND make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
+    BUILD_COMMAND make CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CFLAGS+=${STADEN_INC} CFLAGS+=${STADEN_LIB}
+
     BUILD_IN_SOURCE 1
     INSTALL_COMMAND make install
 )
@@ -482,10 +521,10 @@ message("Build system will fetch SPDLOG")
 message("==================================================================")
 ExternalProject_Add(libspdlog
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v1.5.tar.gz -o spdlog-v1.5.tar.gz &&
-                     tar -xzf spdlog-v1.5.tar.gz &&
+    DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v1.6.tar.gz -o spdlog-v1.6.tar.gz &&
+                     tar -xzf spdlog-v1.6.tar.gz &&
                      rm -fr spdlog &&
-                     mv -f  spdlog-1.5 spdlog
+                     mv -f  spdlog-1.6 spdlog
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/spdlog
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     CONFIGURE_COMMAND ""
@@ -521,9 +560,9 @@ if (NOT HAVE_FAST_MALLOC)
     message("==================================================================")
     ExternalProject_Add(libjemalloc
         DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-        DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/jemalloc/archive/3.6.0.tar.gz -o jemalloc-3.6.0.tar.gz &&
-        tar -xzf jemalloc-3.6.0.tar.gz 
-        SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jemalloc-3.6.0
+        DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/jemalloc/archive/4.0.4.tar.gz -o jemalloc-4.0.4.tar.gz &&
+        tar -xzf jemalloc-4.0.4.tar.gz
+        SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jemalloc-4.0.4
         BUILD_IN_SOURCE TRUE
         INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
         CONFIGURE_COMMAND sh -c "CC=${CMAKE_C_COMPILER} ./autogen.sh --prefix=<INSTALL_DIR>"
@@ -534,13 +573,30 @@ if (NOT HAVE_FAST_MALLOC)
     set (HAVE_FAST_MALLOC TRUE)
 endif ()
 
+
+##
+## This depenency is for RapMap
+##
+message("Build system will fetch and build SparseHash")
+message("==================================================================")
+ExternalProject_Add(libsparsehash
+    DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
+    DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/sparsehash/archive/sparsehash-2.0.2.tar.gz -o sparsehash-2.0.2.tar.gz &&
+        tar -xzf sparsehash-2.0.2.tar.gz
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/sparsehash-sparsehash-2.0.2
+    BUILD_IN_SOURCE TRUE
+    INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
+    CONFIGURE_COMMAND sh -c "CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ./configure --prefix=<INSTALL_DIR>"
+    INSTALL_COMMAND make install
+)
+
 ###
 #
 # Done building external dependencies.
 #
 ###
 
-set (CPACK_SOURCE_IGNORE_FILES 
+set (CPACK_SOURCE_IGNORE_FILES
 "/src/PCA.cpp"
 "/src/PCAUtils.cpp"
 "/build/"
@@ -567,7 +623,7 @@ message("CPACK_SOURCE_IGNORE_FILES = ${CPACK_SOURCE_IGNORE_FILES}")
 
 # Recurse into Salmon source directory
 add_subdirectory ( src )
+#add_dependencies(salmon RapMap)
 
 # build a CPack driven installer package
 include (CPack)
-
diff --git a/README.md b/README.md
index 1e343ea..5dd8ce4 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 [![Build Status](https://travis-ci.org/COMBINE-lab/salmon.svg?branch=master)](https://travis-ci.org/COMBINE-lab/salmon)
-[![Documentation Status](https://readthedocs.org/projects/salmon/badge/?version=latest)](https://readthedocs.org/projects/salmon/?badge=latest)
+[![Documentation Status](https://readthedocs.org/projects/salmon/badge/?version=latest)](http://salmon.readthedocs.org/en/latest)
 
 What is Salmon?
 ===============
diff --git a/cmake/TestSalmon.cmake b/cmake/TestSalmon.cmake
deleted file mode 100644
index 6384868..0000000
--- a/cmake/TestSalmon.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-execute_process(COMMAND tar xzvf sample_data.tgz
-                WORKING_DIRECTORY ${TOPLEVEL_DIR}
-                RESULT_VARIABLE TAR_RESULT
-               )
-
-if (TAR_RESULT)
-    message(FATAL_ERROR "Error untarring sample_data.tgz")
-endif()
-
-set(SALMON_INDEX_CMD ${TOPLEVEL_DIR}/build/src/salmon index -t transcripts.fasta -i sample_salmon_index)
-execute_process(COMMAND ${SALMON_INDEX_CMD}
-                WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
-                RESULT_VARIABLE SALMON_INDEX_RESULT
-                )
-
-if (SALMON_INDEX_RESULT)
-    message(FATAL_ERROR "Error running ${SALMON_INDEX_COMMAND}")
-endif()
-
-set(SALMON_QUANT_COMMAND ${TOPLEVEL_DIR}/build/src/salmon quant -i sample_salmon_index -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_salmon_quant -n 1000000)
-execute_process(COMMAND ${SALMON_QUANT_COMMAND}
-	            WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
-                RESULT_VARIABLE SALMON_QUANT_RESULT
-                )
-if (SALMON_QUANT_RESULT)
-    message(FATAL_ERROR "Error running ${QUANT_RESULT}")
-endif()
-
-if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_salmon_quant/quant.sf)
-    message("Salmon (read) ran successfully")
-else()
-    message(FATAL_ERROR "Salmon (read) failed to produce output")
-endif()
-
-
diff --git a/cmake/TestSalmonFMD.cmake b/cmake/TestSalmonFMD.cmake
new file mode 100644
index 0000000..ff40e4a
--- /dev/null
+++ b/cmake/TestSalmonFMD.cmake
@@ -0,0 +1,33 @@
+execute_process(COMMAND tar xzvf sample_data.tgz
+                WORKING_DIRECTORY ${TOPLEVEL_DIR}
+                RESULT_VARIABLE TAR_RESULT
+               )
+
+if (TAR_RESULT)
+    message(FATAL_ERROR "Error untarring sample_data.tgz")
+endif()
+
+set(SALMON_FMD_INDEX_CMD ${CMAKE_BINARY_DIR}/salmon index -t transcripts.fasta -i sample_salmon_fmd_index --type fmd)
+execute_process(COMMAND ${SALMON_FMD_INDEX_CMD}
+                WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
+                RESULT_VARIABLE SALMON_FMD_INDEX_RESULT
+                )
+
+if (SALMON_FMD_INDEX_RESULT)
+    message(FATAL_ERROR "Error running ${SALMON_FMD_INDEX_COMMAND}")
+endif()
+
+set(SALMON_QUANT_COMMAND ${CMAKE_BINARY_DIR}/salmon quant -i sample_salmon_fmd_index -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_salmon_fmd_quant)
+execute_process(COMMAND ${SALMON_QUANT_COMMAND}
+	            WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
+                RESULT_VARIABLE SALMON_FMD_QUANT_RESULT
+                )
+if (SALMON_FMD_QUANT_RESULT)
+    message(FATAL_ERROR "Error running ${SALMON_FMD_QUANT_RESULT}")
+endif()
+
+if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_salmon_fmd_quant/quant.sf)
+    message("Salmon (read) ran successfully")
+else()
+    message(FATAL_ERROR "Salmon (read --- fmd-index) failed to produce output")
+endif()
diff --git a/cmake/TestSalmonQuasi.cmake b/cmake/TestSalmonQuasi.cmake
new file mode 100644
index 0000000..4b2f9d0
--- /dev/null
+++ b/cmake/TestSalmonQuasi.cmake
@@ -0,0 +1,24 @@
+set(SALMON_QUASI_INDEX_CMD ${CMAKE_BINARY_DIR}/salmon index -t transcripts.fasta -i sample_salmon_quasi_index --type quasi)
+execute_process(COMMAND ${SALMON_QUASI_INDEX_CMD}
+                WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
+                RESULT_VARIABLE SALMON_QUASI_INDEX_RESULT
+                )
+
+if (SALMON_QUASI_INDEX_RESULT)
+    message(FATAL_ERROR "Error running ${SALMON_QUASI_INDEX_COMMAND}")
+endif()
+
+set(SALMON_QUANT_COMMAND ${CMAKE_BINARY_DIR}/salmon quant -i sample_salmon_quasi_index -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_salmon_quasi_quant)
+execute_process(COMMAND ${SALMON_QUANT_COMMAND}
+	            WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
+                RESULT_VARIABLE SALMON_QUASI_QUANT_RESULT
+                )
+if (SALMON_QUASI_QUANT_RESULT)
+    message(FATAL_ERROR "Error running ${SALMON_QUASI_QUANT_RESULT}")
+endif()
+
+if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_salmon_quasi_quant/quant.sf)
+    message("Salmon (read) ran successfully")
+else()
+    message(FATAL_ERROR "Salmon (read --- quasi-index) failed to produce output")
+endif()
diff --git a/cmake/UnitTests.cmake b/cmake/UnitTests.cmake
new file mode 100644
index 0000000..857f604
--- /dev/null
+++ b/cmake/UnitTests.cmake
@@ -0,0 +1,10 @@
+set(TEST_COMMAND ${TOPLEVEL_DIR}/tests/unitTests)
+execute_process(COMMAND ${TEST_COMMAND}
+	            WORKING_DIRECTORY ${TOPLEVEL_DIR}/tests
+                RESULT_VARIABLE UNIT_TEST_RESULT
+                )
+if (UNIT_TEST_RESULT)
+    message(FATAL_ERROR "Error running ${UNIT_TEST_RESULT}")
+endif()
+
+
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 2df870c..8909cba 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -31,7 +31,6 @@ import os
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.todo',
-    'sphinx.ext.mathjax',
     'sphinx.ext.ifconfig',
 ]
 
@@ -56,9 +55,9 @@ copyright = u'2015, Rob Patro, Carl Kingsford and Steve Mount'
 # built documents.
 #
 # The short X.Y version.
-version = '0.3.2'
+version = '0.5.0'
 # The full version, including alpha/beta/rc tags.
-release = '0.3.2'
+release = '0.5.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/doc/source/salmon.rst b/doc/source/salmon.rst
index 2c6e64d..34dce1d 100644
--- a/doc/source/salmon.rst
+++ b/doc/source/salmon.rst
@@ -74,8 +74,8 @@ set of alignments.
     the mapping cache (``--disableMappingCache``), and potentially increase the
     parallelizability of lightweight-alignment-based Salmon.
 
-Lightweight-alignment-based mode
---------------------------------
+Lightweight-alignment-based mode (including quasimapping)
+---------------------------------------------------------
 
 One of the novel and innovative features of Salmon is its ability to accurately
 quantify transcripts using *lightweight* alignments.  Lightweight alignments
@@ -84,6 +84,14 @@ performing a base-to-base alignment of the read to the transcript.  Lightweight
 alignments are typically much faster to compute than traditional (or full)
 alignments, and can sometimes provide superior accuracy by being more robust 
 to errors in the read or genomic variation from the reference sequence.
+
+Salmon currently supports two different methods for lightweight-alignment; 
+SMEM-based mapping and quasi-mapping.  SMEM-based mapping is the original 
+lightweight-alignment method used by Salmon, and quasi-mapping is a newer and 
+considerably faster alternative.  Both methods are currently exposed via the 
+same ``quant`` command, but the methods require different indices so that 
+SMEM-based mapping cannot be used with a quasi-mapping index and vice-versa.
+
 If you want to use Salmon in lightweight alignment-based mode, then you first
 have to build an Salmon index for your transcriptome.  Assume that
 ``transcripts.fa`` contains the set of transcripts you wish to quantify. First,
@@ -91,22 +99,62 @@ you run the Salmon indexer:
 
 ::
     
-    > ./bin/salmon index -t transcripts.fa -i transcripts_index
+    > ./bin/salmon index -t transcripts.fa -i transcripts_index --type quasi -k 31 
+    
+This will build the quasi-mapping-based index, using an auxiliary k-mer hash
+over k-mers of length 31.  While quasi-mapping will make used of arbitrarily 
+long matches between the query and reference, the `k` size selected here will 
+act as the *minimum* acceptable length for a valid match.  Thus, a smaller 
+value of `k` may slightly improve sensitivty.  We find that a `k` of 31 seems
+to work well for reads of 75bp or longer, but you might consider a smaller 
+`k` if you plan to deal with shorter reads. Note that there is also a 
+`k` parameter that can be passed to the ``quant`` command.  However, this has
+no effect if one is using a quasi-mapping index, as the `k` value provided
+during the index building phase overrides any `k` provided during
+quantification in this case.
+
+::
+    
+    > ./bin/salmon index -t transcripts.fa -i transcripts_index --type fmd
+
+This will build the SMEM-based mapping index.  Note that no value of `k` 
+is given here.  However, the SMEM-based mapping index makes use of a parameter 
+`k` that is passed in during the ``quant`` phase (the default value is `19`). 
 
 Then, you can quantify any set of reads (say, paired-end reads in files
-`reads1.fa` and `reads2.fa`) directly against this index using the Salmon
+`reads1.fq` and `reads2.fq`) directly against this index using the Salmon
 ``quant`` command as follows:
 
 ::
 
-    > ./bin/salmon quant -i transcripts_index -l <LIBTYPE> -1 reads1.fa -2 reads2.fa -o transcripts_quant
+    > ./bin/salmon quant -i transcripts_index -l <LIBTYPE> -1 reads1.fq -2 reads2.fq -o transcripts_quant
+
+If you are using single-end reads, then you pass them to Salmon with 
+the ``-r`` flag like:
+
+::
+
+    > ./bin/salmon quant -i transcripts_index -l <LIBTYPE> -r reads.fq -o transcripts_quant
 
+
+This same ``quant`` command will work with either index (quasi-mapping or
+SMEM-based), and Salmon will automatically determine the type of index being 
+read and perform the appropriate lightweight mapping accordingly.
+
+.. note:: Order of command-line parameters
+
+    The library type ``-l`` should be specified on the command line **before** the 
+    read files (i.e. the parameters to ``-1`` and ``-2``, or ``-r``).  This is because
+    the contents of the library type flag is used to determine how the reads should 
+    be interpreted.
+    
 You can, of course, pass a number of options to control things such as the
 number of threads used or the different cutoffs used for counting reads.
 Just as with the alignment-based mode, after Salmon has finished running, there
 will be a directory called ``salmon_quant``, that contains a file called
 ``quant.sf`` containing the quantification results.
 
+
 Alignment-based mode
 --------------------
 
@@ -157,6 +205,64 @@ mode, and a description of each, run ``salmon quant --help-alignment``.
     header sections must be identical).
 
 
+Description of important options
+--------------------------------
+
+Salmon exposes a number of useful optional command-line parameters to the user.
+The particularly important ones are explained here, but you can always run
+``salmon quant -h`` to see them all.
+
+""""""""""""""""""""""""""
+``-p`` / ``--numThreads``
+""""""""""""""""""""""""""
+
+The number of threads that will be used for quasi-mapping, quantification, and
+bootstrapping / posterior sampling (if enabled).  Salmon is designed to work
+well with many threads, so, if you have a sufficient number of processors, larger
+values here can speed up the run substantially.
+
+
+""""""""""""""
+``--useVBOpt``
+""""""""""""""
+
+Use the variational Bayesian EM algorithm rather than the "standard" EM algorithm
+to optimize abundance estimates.  The details of the VBEM algorithm can be found
+in [2]_, and the details of the variant over fragment equivalence classes that
+we use can be found in [3]_.  While both the standard EM and the VBEM produce
+accurate abundance estimates, those produced by the VBEM seem, generally, to be
+a bit more accurate.  Further, the VBEM tends to converge after fewer iterations,
+so it may result in a shorter runtime; especially if you are computing many
+bootstrap samples. 
+
+"""""""""""""""""""
+``--numBootstraps``
+"""""""""""""""""""
+
+Salmon has the ability to optionally compute bootstrapped abundance estimates.
+This is done by resampling (with replacement) from the counts assigned to
+the fragment equivalence classes, and then re-running the optimization procedure,
+either the EM or VBEM, for each such sample.  The values of these different
+bootstraps allows us to assess technical variance in the main abundance estimates
+we produce.  Such estimates can be useful for downstream (e.g. differential
+expression) tools that can make use of such uncertainty estimates.  This option
+takes a positive integer that dictates the number of bootstrap samples to compute.
+The more samples computed, the better the estimates of varaiance, but the
+more computation (and time) required.
+
+"""""""""""""""""""""
+``--numGibbsSamples``
+"""""""""""""""""""""
+
+Just as with the bootstrap procedure above, this option produces samples that allow
+us to estimate the variance in abundance estimates.  However, in this case the
+samples are generated using posterior Gibbs sampling over the fragment equivalence
+classes rather than bootstrapping.  We are currently analyzing these different approaches
+to assess the potential trade-offs in time / accuracy.  The ``--numBootstraps`` and
+``--numGibbsSamples`` options are mutually exclusive (i.e. in a given run, you must
+set at most one of these options to a positive integer.)
+
+
 What's this ``LIBTYPE``?
 ------------------------
 
@@ -233,9 +339,9 @@ with a ``#`` is a comment line, and can be safely ignored.  Salmon records the f
 and options passed to it in comments at the top of its output file.  The last comment 
 line gives the names of each of the data columns. The columns appear in the following order: 
 
-+------+--------+-----+------+----------+
-| Name | Length | TPM | FPKM | NumReads |
-+------+--------+-----+------+----------+
++------+--------+-----+----------+
+| Name | Length | TPM | NumReads |
++------+--------+-----+----------+
 
 Each subsequent row described a single quantification record.  The columns have
 the following interpretation.
@@ -250,19 +356,10 @@ the following interpretation.
   This is salmon's estimate of the relative abundance of this transcript in units of Transcripts Per Million (TPM).
   TPM is the recommended relative abundance measure to use for downstream analysis. 
 
-* **FPKM** ---
-  This is salmon's estimate of the relative abundance of this transcript in units of Fragments Per Kilobase per Million
-  mapped reads (FPKM).  This relative abundance measure is proportional, within-sample, to the TPM measure.  However, 
-  the TPM should generally be preferred to FPKM.  This column is provided mostly for compatibility with tools that expect
-  FPKM as input.
-
 * **NumReads** --- 
   This is salmon's estimate of the number of reads mapping to each transcript that was quantified.  It is an "estimate" 
   insofar as it is the expected number of reads that have originated from each transcript given the structure of the uniquely 
-  mapping and multi-mapping reads and the relative abundance estimates for each transcript.  You can round these values 
-  to the nearest integer and use them directly as input to count-based methods like 
-  `Deseq2 <http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html>`_ and 
-  `EdgeR <http://master.bioconductor.org/packages/release/bioc/html/edgeR.html>`_, among others.
+  mapping and multi-mapping reads and the relative abundance estimates for each transcript.
 
 Misc
 ----
@@ -280,43 +377,9 @@ command to decompress the reads "on-the-fly":
 and the gzipped files will be decompressed via separate processes and the raw
 reads will be fed into salmon.
 
-.. note:: The Mapping Cache 
-
-    Salmon requires a specific number of observations (fragments) to
-    be observed before it will report its quantification results.  If it 
-    doesn't see enough fragments when reading through the read files the 
-    first time, it will process the information again (don't worry; it's not 
-    double counting. The results from the first pass essentially become 
-    a "prior" for assigning the proper read counts in subsequent passes).
-
-    The first time the file is processed, the set of potential mappings for
-    each fragment is written to a temporary file in an efficient binary format
-    --- this file is called the mapping cache.  As soon as the required number
-    of obvservations have been seen, salmon stops writing to the mapping cache
-    (ensuring that the file size will not grow too large).  However, for
-    experiments with fewer than the required number of observations, the
-    mapping cache is a significant optimization over reading through the raw
-    set of reads multiple times.  First, the work of determining the potential
-    mapping locations for a read is only performed once, during the inital pass
-    through the file.  Second, since the mapping cache is implemented as a
-    regular file on disk, the information contained within a file can be
-    processed multiple times, even if the file itself is being produced via
-    e.g. process substitution as in the example above.
-    
-    You can control the required number of observations and thus, indirectly,
-    the maximum size of the mapping cache file, via the ``-n`` argument.
-    Note that the cache itself is considered a "temporary" file, and it is
-    removed from disk by salmon before the program terminates.  If you are
-    certain that your read library is large enough that you will observe the
-    required number of fragments in the first pass, or if you have some other 
-    reason to avoid creating the temporary mapping cache, it can disabled with
-    the ``--disableMappingCache`` flag.
-
-**Finally**, the purpose of making this beta executable (as well as the Salmon
-code) available is for people to use it and provide feedback.  A pre-print and
-manuscript are in the works, but the earlier we get feedback, thoughts,
-suggestions and ideas, the better!  So, if you have something useful to report
-or just some interesting ideas or suggestions, please contact us
-(`rob.patro at cs.stonybrook.edu` and/or `carlk at cs.cmu.edu`).  Also, please use
-the same e-mail addresses to contact us with any *detailed* bug-reports (though
-bug-support for these early beta versions may be slow).
+**Finally**, the purpose of making this software available is for people to use
+it and provide feedback.  The `pre-print describing this method is on bioRxiv <http://biorxiv.org/content/early/2015/10/03/021592>`_.
+If you have something useful to report or just some interesting ideas or
+suggestions, please contact us (`rob.patro at cs.stonybrook.edu` and/or
+`carlk at cs.cmu.edu`).  If you encounter any bugs, please file a *detailed*
+bug report at the `Salmon GitHub repository <https://github.com/COMBINE-lab/salmon>`_. 
diff --git a/external/.gitignore b/external/.gitignore
deleted file mode 100644
index b8bd026..0000000
--- a/external/.gitignore
+++ /dev/null
@@ -1,28 +0,0 @@
-# Compiled Object files
-*.slo
-*.lo
-*.o
-*.obj
-
-# Precompiled Headers
-*.gch
-*.pch
-
-# Compiled Dynamic libraries
-*.so
-*.dylib
-*.dll
-
-# Fortran module files
-*.mod
-
-# Compiled Static libraries
-*.lai
-*.la
-*.a
-*.lib
-
-# Executables
-*.exe
-*.out
-*.app
diff --git a/include/AlignmentGroup.hpp b/include/AlignmentGroup.hpp
index e9b1e94..84fd324 100644
--- a/include/AlignmentGroup.hpp
+++ b/include/AlignmentGroup.hpp
@@ -22,7 +22,7 @@ class AlignmentGroup {
     public:
         AlignmentGroup() : read_(nullptr), isUniquelyMapped_(true) { alignments_.reserve(10); }
         AlignmentGroup(AlignmentGroup& other) = delete;
-        AlignmentGroup(AlignmentGroup&& other) = delete;
+        AlignmentGroup(AlignmentGroup&& other) = default;
         AlignmentGroup& operator=(AlignmentGroup& other) = delete;
         AlignmentGroup& operator=(AlignmentGroup&& other) = delete;
 
diff --git a/include/AlignmentLibrary.hpp b/include/AlignmentLibrary.hpp
index 9387906..28f094c 100644
--- a/include/AlignmentLibrary.hpp
+++ b/include/AlignmentLibrary.hpp
@@ -23,6 +23,8 @@ extern "C" {
 #include "FASTAParser.hpp"
 #include "concurrentqueue.h"
 #include "EquivalenceClassBuilder.hpp"
+#include "SpinLock.hpp" // RapMap's with try_lock
+#include "ReadKmerDist.hpp"
 
 // Boost includes
 #include <boost/filesystem.hpp>
@@ -58,7 +60,8 @@ class AlignmentLibrary {
     	fragStartDists_(5),
         seqBiasModel_(1.0),
     	eqBuilder_(salmonOpts.jointLog),
-        quantificationPasses_(0) {
+        quantificationPasses_(0),
+        expectedBias_(constExprPow(4, readBias_.getK()), 1.0) {
             namespace bfs = boost::filesystem;
 
             // Make sure the alignment file exists.
@@ -161,7 +164,35 @@ class AlignmentLibrary {
         return eqBuilder_;
     }
 
+    void updateTranscriptLengthsAtomic(std::atomic<bool>& done) {
+        if (sl_.try_lock()) {
+            if (!done) {
+                auto& fld = *(flDist_.get());
+                std::vector<double> logPMF;
+                size_t minVal;
+                size_t maxVal;
+                double logFLDMean = fld.mean();
+                fld.dumpPMF(logPMF, minVal, maxVal);
+                double sum = salmon::math::LOG_0;
+                for (auto v : logPMF) {
+                    sum = salmon::math::logAdd(sum, v);
+                }
+                for (auto& v : logPMF) {
+                    v -= sum;
+                }
+                // Update the effective length of *every* transcript
+                for( auto& t : transcripts_ ) {
+                    t.updateEffectiveLength(logPMF, logFLDMean, minVal, maxVal);
+                }
+                // then declare that we are done
+                done = true;
+                sl_.unlock();
+            }
+        }
+    }
+
     std::vector<Transcript>& transcripts() { return transcripts_; }
+    const std::vector<Transcript>& transcripts() const { return transcripts_; }
 
     inline bool getAlignmentGroup(AlignmentGroup<FragT>*& ag) { return bq->getAlignmentGroup(ag); }
 
@@ -173,8 +204,8 @@ class AlignmentLibrary {
 	    return fragStartDists_;
     }
 
-    inline FragmentLengthDistribution& fragmentLengthDistribution() {
-        return *flDist_.get();
+    inline FragmentLengthDistribution* fragmentLengthDistribution() const {
+        return flDist_.get();
     }
 
     inline AlignmentModel& alignmentModel() {
@@ -197,9 +228,13 @@ class AlignmentLibrary {
 
     inline BAMQueue<FragT>& getAlignmentGroupQueue() { return *bq.get(); }
 
-    inline size_t upperBoundHits() { return bq->numMappedReads(); }
-    inline size_t numMappedReads() { return bq->numMappedReads(); }
-    inline size_t numUniquelyMappedReads() { return bq->numUniquelyMappedReads(); }
+    inline size_t upperBoundHits() { return bq->numMappedFragments(); }
+    inline size_t numObservedFragments() const { return bq->numObservedFragments(); }
+    inline size_t numMappedFragments() const { return bq->numMappedFragments(); }
+    inline size_t numUniquelyMappedFragments() { return bq->numUniquelyMappedFragments(); }
+    inline double effectiveMappingRate() const {
+        return static_cast<double>(numMappedFragments()) / numObservedFragments();
+    }
 
     //const boost::filesystem::path& alignmentFile() { return alignmentFile_; }
 
@@ -226,6 +261,22 @@ class AlignmentLibrary {
 
     inline LibraryFormat format() { return libFmt_; }
 
+    void setExpectedBias(const std::vector<double>& expectedBiasIn) {
+        expectedBias_ = expectedBiasIn;
+    }
+
+    std::vector<double>& expectedBias() {
+        return expectedBias_;
+    }
+
+    const std::vector<double>& expectedBias() const {
+        return expectedBias_;
+    }
+
+    ReadKmerDist<6, std::atomic<uint32_t>>& readBias() { return readBias_; }
+    const ReadKmerDist<6, std::atomic<uint32_t>>& readBias() const { return readBias_; }
+
+
     private:
     /**
      * The file from which the alignments will be read.
@@ -283,8 +334,13 @@ class AlignmentLibrary {
      *  made through the alignment file.
      */
     size_t quantificationPasses_;
-
+    SpinLock sl_;
     EquivalenceClassBuilder eqBuilder_;
+
+    // Since multiple threads can touch this dist, we
+    // need atomic counters.
+    ReadKmerDist<6, std::atomic<uint32_t>> readBias_;
+    std::vector<double> expectedBias_;
 };
 
 #endif // ALIGNMENT_LIBRARY_HPP
diff --git a/include/BAMQueue.hpp b/include/BAMQueue.hpp
index 67122aa..ece9f59 100644
--- a/include/BAMQueue.hpp
+++ b/include/BAMQueue.hpp
@@ -72,9 +72,10 @@ public:
   inline bool getAlignmentGroup(AlignmentGroup<FragT*>*& group);
 
   // Return the number of reads processed so far by the queue
-  size_t numObservedReads();
-  size_t numMappedReads();
-  size_t numUniquelyMappedReads();
+  size_t numObservedAlignments();
+  size_t numObservedFragments();
+  size_t numMappedFragments();
+  size_t numUniquelyMappedFragments();
 
   void reset();
 
@@ -111,7 +112,7 @@ private:
   SAM_hdr* hdr_ = nullptr;
 
   //htsFile* fp_ = nullptr;
-  size_t totalReads_;
+  size_t totalAlignments_;
   size_t numUnaligned_;
   size_t numMappedReads_;
   size_t numUniquelyMappedReads_;
diff --git a/include/BAMQueue.tpp b/include/BAMQueue.tpp
index 53aac9f..602d633 100644
--- a/include/BAMQueue.tpp
+++ b/include/BAMQueue.tpp
@@ -7,7 +7,7 @@ template <typename FragT>
 BAMQueue<FragT>::BAMQueue(std::vector<boost::filesystem::path>& fnames, LibraryFormat& libFmt,
                           uint32_t numParseThreads, uint32_t cacheSize):
     files_(std::vector<AlignmentFile>()),
-    libFmt_(libFmt), totalReads_(0),
+    libFmt_(libFmt), totalAlignments_(0),
     numUnaligned_(0), numMappedReads_(0), 
     numUniquelyMappedReads_(0),
     //fragmentQueue_(2000000),
@@ -105,7 +105,7 @@ void BAMQueue<FragT>::reset() {
   scram_set_option(file.fp, CRAM_OPT_NTHREADS, file.numParseThreads);
 
   fmt::print(stderr, "] . . . done\n");
-  totalReads_ = 0;
+  totalAlignments_ = 0;
   numUnaligned_ = 0;
   numMappedReads_ = 0;
   numUniquelyMappedReads_ = 0;
@@ -342,6 +342,8 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
     bool didRead1{false};
     bool didRead2{false};
     rpair.orphanStatus = salmon::utils::OrphanStatus::LeftOrphan;
+
+    // Until we get a valid pair of reads
     while (!haveValidPair) {
         // Consume a single read
         didRead1 = (scram_get_seq(fp_, &rpair.read1) >= 0);
@@ -368,10 +370,10 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
                     break;
                     // === end of UnmappedOrphan case
                 case AlignmentType::MappedOrphan:
-                    isFwd = !(bam_flag(rpair.read1) & BAM_FREVERSE);
+                    isFwd = !(bam_strand(rpair.read1));
                     startPos = bam_pos(rpair.read1); 
                     rpair.libFmt = salmon::utils::hitType(startPos, isFwd);
-                    rpair.orphanStatus = (!isFwd) ?
+                    rpair.orphanStatus = (bam_flag(rpair.read1) & BAM_FREAD1) ?
                         salmon::utils::OrphanStatus::LeftOrphan :
                         salmon::utils::OrphanStatus::RightOrphan;
                     rpair.logProb = salmon::math::LOG_0;
@@ -519,7 +521,7 @@ inline bool BAMQueue<FragT>::getFrag_(ReadPair& rpair, FilterT filt) {
                 std::exit(1);
                 break;
         }
-        ++totalReads_;
+        ++totalAlignments_;
     }
     rpair.logProb = salmon::math::LOG_0;
     return true;
@@ -559,7 +561,7 @@ inline bool BAMQueue<FragT>::getFrag_(UnpairedRead& sread, FilterT filt) {
             }
             ++numUnaligned_; 
         }
-        ++totalReads_;
+        ++totalAlignments_;
     }
 
     sread.logProb = salmon::math::LOG_0;
@@ -567,15 +569,18 @@ inline bool BAMQueue<FragT>::getFrag_(UnpairedRead& sread, FilterT filt) {
 }
 
 template <typename FragT>
-size_t BAMQueue<FragT>::numObservedReads(){ return totalReads_; }
+size_t BAMQueue<FragT>::numObservedAlignments(){ return totalAlignments_; }
+
+template <typename FragT>
+size_t BAMQueue<FragT>::numObservedFragments(){ return numMappedReads_ + numUnaligned_; }
 
 template <typename FragT>
-size_t BAMQueue<FragT>::numMappedReads(){ 
+size_t BAMQueue<FragT>::numMappedFragments(){ 
     return numMappedReads_;
 }
 
 template <typename FragT>
-size_t BAMQueue<FragT>::numUniquelyMappedReads(){ 
+size_t BAMQueue<FragT>::numUniquelyMappedFragments(){ 
     return numUniquelyMappedReads_;
 }
 
diff --git a/include/BWAMemStaticFuncs.hpp b/include/BWAMemStaticFuncs.hpp
new file mode 100644
index 0000000..2f5209d
--- /dev/null
+++ b/include/BWAMemStaticFuncs.hpp
@@ -0,0 +1,120 @@
+#ifndef BWAMEM_STATIC_FUNCS_HPP
+#define BWAMEM_STATIC_FUNCS_HPP
+
+extern unsigned char nst_nt4_table[256];
+char const* bwa_pg = "cha";
+
+/******* STUFF THAT IS STATIC IN BWAMEM THAT WE NEED HERE --- Just re-define it *************/
+#define intv_lt(a, b) ((a).info < (b).info)
+KSORT_INIT(mem_intv, bwtintv_t, intv_lt)
+
+typedef struct {
+    bwtintv_v mem, mem1, *tmpv[2];
+} smem_aux_t;
+
+static smem_aux_t *smem_aux_init()
+{
+    smem_aux_t *a;
+    a = static_cast<smem_aux_t*>(calloc(1, sizeof(smem_aux_t)));
+    a->tmpv[0] = static_cast<bwtintv_v*>(calloc(1, sizeof(bwtintv_v)));
+    a->tmpv[1] = static_cast<bwtintv_v*>(calloc(1, sizeof(bwtintv_v)));
+    return a;
+}
+
+static void smem_aux_destroy(smem_aux_t *a)
+{
+    free(a->tmpv[0]->a); free(a->tmpv[0]);
+    free(a->tmpv[1]->a); free(a->tmpv[1]);
+    free(a->mem.a); free(a->mem1.a);
+    free(a);
+}
+
+static void mem_collect_intv(const SalmonOpts& sopt, const mem_opt_t *opt, SalmonIndex* sidx, int len, const uint8_t *seq, smem_aux_t *a)
+{
+    const bwt_t* bwt = sidx->bwaIndex()->bwt;
+    int i, k, x = 0, old_n;
+    int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1;
+    int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
+    a->mem.n = 0;
+
+    // first pass: find all SMEMs
+    if (sidx->hasAuxKmerIndex()) {
+        KmerIntervalMap& auxIdx = sidx->auxIndex();
+        uint32_t klen = auxIdx.k();
+        while (x < len) {
+            if (seq[x] < 4) {
+                // Make sure there are at least k bases left
+                if (len - x < klen) { x = len; continue; }
+                // search for this key in the auxiliary index
+                KmerKey kmer(const_cast<uint8_t*>(&(seq[x])), klen);
+                auto it = auxIdx.find(kmer);
+                // if we can't find it, move to the next key
+                if (it == auxIdx.end()) { ++x; continue; }
+                // otherwise, start the search using the initial interval @it->second from the hash
+                int xb = x;
+                x = bwautils::bwt_smem1_with_kmer(bwt, len, seq, x, start_width, it->second, &a->mem1, a->tmpv);
+                for (i = 0; i < a->mem1.n; ++i) {
+                    bwtintv_t *p = &a->mem1.a[i];
+                    int slen = (uint32_t)p->info - (p->info>>32); // seed length
+                    if (slen >= opt->min_seed_len)
+                        kv_push(bwtintv_t, a->mem, *p);
+                }
+            } else ++x;
+        }
+    } else {
+        while (x < len) {
+            if (seq[x] < 4) {
+                x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
+                for (i = 0; i < a->mem1.n; ++i) {
+                    bwtintv_t *p = &a->mem1.a[i];
+                    int slen = (uint32_t)p->info - (p->info>>32); // seed length
+                    if (slen >= opt->min_seed_len)
+                        kv_push(bwtintv_t, a->mem, *p);
+                }
+            } else ++x;
+        }
+    }
+
+    // For sensitive / extra-sensitive mode only
+    if (sopt.sensitive or sopt.extraSeedPass) {
+        // second pass: find MEMs inside a long SMEM
+        old_n = a->mem.n;
+        for (k = 0; k < old_n; ++k) {
+            bwtintv_t *p = &a->mem.a[k];
+            int start = p->info>>32, end = (int32_t)p->info;
+            if (end - start < split_len || p->x[2] > opt->split_width) continue;
+
+            //int idx = (start + end) >> 1;
+            bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
+            for (i = 0; i < a->mem1.n; ++i)
+                if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
+                    kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
+        }
+    }
+
+    // For extra-sensitive mode only
+    // third pass: LAST-like
+    if (sopt.extraSeedPass and opt->max_mem_intv > 0) {
+        x = 0;
+        while (x < len) {
+            if (seq[x] < 4) {
+                if (1) {
+                    bwtintv_t m;
+                    x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
+                    if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
+                } else { // for now, we never come to this block which is slower
+                    x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
+                    for (i = 0; i < a->mem1.n; ++i)
+                        kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
+                }
+            } else ++x;
+        }
+    }
+    // sort
+    // ks_introsort(mem_intv, a->mem.n, a->mem.a);
+}
+
+
+/******* END OF STUFF THAT IS STATIC IN BWAMEM THAT WE NEED HERE --- Just re-define it *************/
+
+#endif // BWAMEM_STATIC_FUNCS_HPP
diff --git a/include/BWAUtils.hpp b/include/BWAUtils.hpp
new file mode 100644
index 0000000..c98b24a
--- /dev/null
+++ b/include/BWAUtils.hpp
@@ -0,0 +1,34 @@
+#ifndef __BWA_UTILS_HPP__
+#define __BWA_UTILS_HPP__
+
+extern "C" {
+#include "bwa.h"
+#include "bwamem.h"
+#include "kvec.h"
+#include "utils.h"
+}
+
+namespace bwautils {
+
+    // Function modified from bwt_smem1a:
+    // https://github.com/lh3/bwa/blob/eb428d7d31ced059ad39af2701a22ebe6d175657/bwt.c#L289
+    /**
+     * Search for the k-mer of length @len starting at @q. 
+     * Return true if an interval is found for the k-mer and false 
+     * otherwise. The appropriate bwt interval will be placed 
+     * in @resInterval upon success. 
+     *
+     */
+    bool getIntervalForKmer(const bwt_t* bwt, // the bwt index
+                            int len, // k-mer length
+                            const uint8_t *q, // query
+                            bwtintv_t& resInterval);
+
+    // NOTE: $max_intv is not currently used in BWA-MEM
+    // NOTE: Modified from the original functions to take an initial interval for the search query
+    int bwt_smem1a_with_kmer(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_t initial_interval, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
+    
+    int bwt_smem1_with_kmer(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_t initial_interval, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
+}
+
+#endif // __BWA_UTILS_HPP__
diff --git a/include/BootstrapWriter.hpp b/include/BootstrapWriter.hpp
new file mode 100644
index 0000000..b5a38e1
--- /dev/null
+++ b/include/BootstrapWriter.hpp
@@ -0,0 +1,15 @@
+#ifndef __BOOTSTRAP_WRITER_HPP__
+#define __BOOTSTRAP_WRITER_HPP__
+
+#include <vector>
+#include <mutex>
+
+class BootstrapWriter {
+    public:
+        virtual ~BootstrapWriter() {}
+        virtual bool writeHeader(std::string& comments, std::vector<Transcript>& transcripts) = 0;
+        virtual bool writeBootstrap(std::vector<double>& abund) = 0;
+};
+
+#endif // __BOOTSTRAP_WRITER_HPP__
+
diff --git a/include/ClusterForest.hpp b/include/ClusterForest.hpp
index 825d343..2e605bc 100644
--- a/include/ClusterForest.hpp
+++ b/include/ClusterForest.hpp
@@ -6,7 +6,7 @@
 
 #include "Transcript.hpp"
 #include "TranscriptCluster.hpp"
-#include "SpinLock.hpp"
+#include "SalmonSpinLock.hpp"
 
 #include <unordered_set>
 #include <vector>
@@ -68,8 +68,8 @@ public:
         std::lock_guard<std::mutex> lock(clusterMutex_);
 #endif
 
-        size_t firstCluster, otherCluster;
         auto firstTranscriptID = (*start)->transcriptID();
+        decltype(firstTranscriptID) firstCluster, otherCluster;
         ++start;
 
         for (auto it = start; it != finish; ++it) {
@@ -155,4 +155,3 @@ private:
 };
 
 #endif // __CLUSTER_FOREST_HPP__
-
diff --git a/include/CollapsedEMOptimizer.hpp b/include/CollapsedEMOptimizer.hpp
index d4aa3f6..a799bdb 100644
--- a/include/CollapsedEMOptimizer.hpp
+++ b/include/CollapsedEMOptimizer.hpp
@@ -2,6 +2,7 @@
 #define COLLAPSED_EM_OPTIMIZER_HPP
 
 #include <unordered_map>
+#include <functional>
 
 #include "tbb/atomic.h"
 #include "tbb/task_scheduler_init.h"
@@ -12,9 +13,12 @@
 #include "cuckoohash_map.hh"
 #include "Eigen/Dense"
 
+class BootstrapWriter;
+
 class CollapsedEMOptimizer {
     public:
         using VecType = std::vector<tbb::atomic<double>>;
+        using SerialVecType = std::vector<double>;
         CollapsedEMOptimizer();
 
         template <typename ExpT>
@@ -23,6 +27,13 @@ class CollapsedEMOptimizer {
                       double tolerance = 0.01,
                       uint32_t maxIter = 1000);
 
+        template <typename ExpT>
+        bool gatherBootstraps(
+                ExpT& readExp,
+                SalmonOpts& sopt,
+                std::function<bool(const std::vector<double>&)>& writeBootstrap,
+                double relDiffTolerance,
+                uint32_t maxIter);
 };
 
 #endif // COLLAPSED_EM_OPTIMIZER_HPP
diff --git a/include/CollapsedGibbsSampler.hpp b/include/CollapsedGibbsSampler.hpp
index c90f185..aa617cb 100644
--- a/include/CollapsedGibbsSampler.hpp
+++ b/include/CollapsedGibbsSampler.hpp
@@ -2,6 +2,7 @@
 #define COLLAPSED_GIBBS_SAMPLER_HPP
 
 #include <unordered_map>
+#include <functional>
 
 #include "tbb/atomic.h"
 #include "tbb/task_scheduler_init.h"
@@ -11,6 +12,8 @@
 #include "cuckoohash_map.hh"
 #include "Eigen/Dense"
 
+class BootstrapWriter;
+
 class CollapsedGibbsSampler {
     public:
         using VecType = std::vector<double>;
@@ -19,8 +22,8 @@ class CollapsedGibbsSampler {
         template <typename ExpT>
         bool sample(ExpT& readExp,
                       SalmonOpts& sopt,
+                      std::function<bool(const std::vector<int>&)>& writeBootstrap,
                       uint32_t numSamples = 500);
-
 };
 
 #endif // COLLAPSED_EM_OPTIMIZER_HPP
diff --git a/include/EquivalenceClassBuilder.hpp b/include/EquivalenceClassBuilder.hpp
index f91b836..1f53ed3 100644
--- a/include/EquivalenceClassBuilder.hpp
+++ b/include/EquivalenceClassBuilder.hpp
@@ -12,41 +12,53 @@
 
 #include "cuckoohash_map.hh"
 #include "concurrentqueue.h"
+#include "SalmonUtils.hpp"
 #include "TranscriptGroup.hpp"
 
 
 struct TGValue {
     TGValue(const TGValue& o) {
         weights = o.weights;
+	posWeights = o.posWeights;
+	combinedWeights = o.combinedWeights;
         count.store(o.count.load());
     }
 
-    TGValue(std::vector<double>& weightIn, uint64_t countIn) :
-        weights(weightIn) { count.store(countIn); }
+    TGValue(std::vector<double>& weightIn, 
+	    std::vector<double>& posWeightsIn, 
+	    uint64_t countIn) :
+        weights(weightIn.begin(), weightIn.end()),
+	posWeights(posWeightsIn.begin(), posWeightsIn.end()) { 
+	  count.store(countIn); 
+	}
 
     // const is a lie
     void normalizeAux() const {
-        double sumOfAux{0.0};
-        for (size_t i = 0; i < weights.size(); ++i) {
-            sumOfAux += weights[i];
-        }
-        double norm = 1.0 / sumOfAux;
-        for (size_t i = 0; i < weights.size(); ++i) {
-            weights[i] *= norm;
-        }
-        /* LOG SPACE
-        double sumOfAux = salmon::math::LOG_0;
-        for (size_t i = 0; i < weights.size(); ++i) {
-            sumOfAux = salmon::math::logAdd(sumOfAux, weights[i]);
-        }
-        for (size_t i = 0; i < weights.size(); ++i) {
-            weights[i] = std::exp(weights[i] - sumOfAux);
-        }
-        */
+      double sumOfAux{0.0};
+      for (size_t i = 0; i < weights.size(); ++i) {
+	sumOfAux += weights[i];
+      }
+      double norm = 1.0 / sumOfAux;
+      for (size_t i = 0; i < weights.size(); ++i) {
+	weights[i].store(weights[i].load() * norm);
+
+      }
+
+      // If we have positional weights, normalize them.
+      if (posWeights.size() > 0) {
+	double posNorm = 1.0 / count.load();
+	for (size_t i = 0; i < posWeights.size(); ++i) {
+	  posWeights[i].store(posWeights[i].load() * posNorm);
+	}
+      }
     }
 
-    // forget synchronizing this for the time being
-    mutable std::vector<double> weights;
+    mutable std::vector<tbb::atomic<double>> weights;
+    mutable std::vector<tbb::atomic<double>> posWeights;
+
+    // The combined auxiliary and position weights.  These
+    // are filled in by the inference algorithm.
+    mutable std::vector<double> combinedWeights;
     std::atomic<uint64_t> count{0};
 };
 
@@ -63,34 +75,44 @@ class EquivalenceClassBuilder {
 
         bool finish() {
             active_ = false;
-            for (auto kv = countMap_.begin(); !kv.is_end(); ++kv) {
-                kv->second.normalizeAux();
-                countVec_.push_back(*kv);
+            size_t totalCount{0};
+            auto lt = countMap_.lock_table();
+            for (auto& kv : lt) {
+                kv.second.normalizeAux();
+                totalCount += kv.second.count;
+                countVec_.push_back(kv);
             }
 
     	    logger_->info("Computed {} rich equivalence classes "
 			  "for further processing", countVec_.size());
+            logger_->info("Counted {} total reads in the equivalence classes ",
+                    totalCount);
             return true;
         }
 
         inline void addGroup(TranscriptGroup&& g,
-                             std::vector<double>& weights) {
+                             std::vector<double>& weights,
+			     std::vector<double>& posWeights) {
 
-            auto upfn = [&weights](TGValue& x) -> TGValue& {
+            auto upfn = [&weights, &posWeights](TGValue& x) -> void {
                 // update the count
                 x.count++;
                 // update the weights
-                for (size_t i = 0; i < x.weights.size(); ++i) {
-                    // Possibly atomicized in the future
-                    weights[i] += x.weights[i];
-                    /* LOG SPACE
-                    x.weights[i] =
-                        salmon::math::logAdd(x.weights[i], weights[i]);
-                    */
-                }
-                return x;
+
+		// If we have positional weights
+		if (weights.size() == posWeights.size()) {
+		  for (size_t i = 0; i < x.weights.size(); ++i) {
+		    salmon::utils::incLoop(x.weights[i], weights[i]);
+		    salmon::utils::incLoop(x.posWeights[i], posWeights[i]);
+		  }
+		} else {
+	        // With no positional weights
+		  for (size_t i = 0; i < x.weights.size(); ++i) {
+		    salmon::utils::incLoop(x.weights[i], weights[i]);
+		  }
+		}
             };
-            TGValue v(weights, 1);
+            TGValue v(weights, posWeights, 1);
             countMap_.upsert(g, upfn, v);
         }
 
diff --git a/include/ForgettingMassCalculator.hpp b/include/ForgettingMassCalculator.hpp
index e6a90f7..2e886c6 100644
--- a/include/ForgettingMassCalculator.hpp
+++ b/include/ForgettingMassCalculator.hpp
@@ -1,7 +1,7 @@
 #ifndef __FORGETTING_MASS_CALCULATOR__
 #define __FORGETTING_MASS_CALCULATOR__
 
-#include "SpinLock.hpp"
+#include "SalmonSpinLock.hpp"
 #include "SalmonMath.hpp"
 #include "spdlog/spdlog.h"
 
diff --git a/include/FragmentLengthDistribution.hpp b/include/FragmentLengthDistribution.hpp
index ee73aed..5aa1800 100644
--- a/include/FragmentLengthDistribution.hpp
+++ b/include/FragmentLengthDistribution.hpp
@@ -127,6 +127,15 @@ public:
    * @return (Logged) cmf of bins.
    */
   std::vector<double> cmf() const;
+
+
+  /**
+   * A member function that fills in a a vector containing the (logged) probability
+   * mass function *for the bins*, and the min and max values
+   * @return (Logged) pmf of bins.
+   */
+  void dumpPMF(std::vector<double>& pmfOut, size_t& minV, size_t& maxV) const;
+
   /**
    * An accessor for the (logged) observation mass (including pseudo-counts).
    * @return Total observation mass.
diff --git a/include/FragmentStartPositionDistribution.hpp b/include/FragmentStartPositionDistribution.hpp
index 753c60c..4c15827 100644
--- a/include/FragmentStartPositionDistribution.hpp
+++ b/include/FragmentStartPositionDistribution.hpp
@@ -44,7 +44,9 @@ class FragmentStartPositionDistribution {
 
   // Mutex for this distribution
   std::mutex fspdMut_;
-  bool isUpdated_;
+  std::atomic<bool> isUpdated_;
+  std::atomic<bool> allowUpdates_;
+  std::atomic<uint32_t> performingUpdate_;
 
 public:
   /**
@@ -69,6 +71,27 @@ public:
    * @param txpLen The length of the transcript
    */
   double operator()(int32_t hitPos, uint32_t txpLen, double effLen);
+
+
+   /**
+   * A member function that computes the probability that a hit
+   * starts at the specified position within the given transcript length.
+   * The overall log probability is given by logNum - logDenom. The function
+   * returns true if the probability is non-zero and false otherwise.
+   * @param hitPos The position where the fragment begins
+   * @param txpLen The length of the transcript
+   * @param logEffLen the log of the effective length of the transcript
+   * @param logNum the log of the numerator
+   * @param logDenom the log of the denominator
+   * @return true if the probaility is non-zero, false otherwise.
+   */
+  bool logNumDenomMass(
+        int32_t hitPos,
+        uint32_t txpLen,
+        double logEffLen,
+	double& logNum,
+	double& logDenom);
+
   // Evaluate the CDF between two points
   double evalCDF(int32_t hitPos, uint32_t txpLen);
   // Update the distribution (compute the CDF) and
diff --git a/include/GZipWriter.hpp b/include/GZipWriter.hpp
new file mode 100644
index 0000000..4708823
--- /dev/null
+++ b/include/GZipWriter.hpp
@@ -0,0 +1,52 @@
+#ifndef __GZIP_WRITER_HPP__
+#define __GZIP_WRITER_HPP__
+
+#include <memory>
+#include <mutex>
+
+#include "spdlog/spdlog.h"
+
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+
+#include "SalmonSpinLock.hpp"
+#include "SalmonOpts.hpp"
+#include "ReadExperiment.hpp"
+
+class GZipWriter {
+  public:
+    GZipWriter(const boost::filesystem::path path, std::shared_ptr<spdlog::logger> logger);
+
+    ~GZipWriter();
+
+    template <typename ExpT>
+    bool writeMeta(
+	const SalmonOpts& opts,
+	const ExpT& experiment,
+    const std::string& tstring  = "now"  // the start time of the run
+	);
+
+    template <typename ExpT>
+    bool writeAbundances(
+      const SalmonOpts& sopt,
+      ExpT& readExp);
+
+    template <typename T>
+    bool writeBootstrap(const std::vector<T>& abund);
+
+   private:
+     boost::filesystem::path path_;
+     boost::filesystem::path bsPath_;
+     std::shared_ptr<spdlog::logger> logger_;
+     std::unique_ptr<boost::iostreams::filtering_ostream> bsStream_{nullptr};
+// only one writer thread at a time
+#if defined __APPLE__
+        spin_lock writeMutex_;
+#else
+        std::mutex writeMutex_;
+#endif
+        std::atomic<uint32_t> numBootstrapsWritten_{0};
+};
+
+#endif //__GZIP_WRITER_HPP__
diff --git a/include/IndexVersionInfo.hpp b/include/IndexVersionInfo.hpp
new file mode 100644
index 0000000..9f47be6
--- /dev/null
+++ b/include/IndexVersionInfo.hpp
@@ -0,0 +1,11 @@
+#ifndef __INDEX_VERSION_INFO_HPP__ 
+#define __INDEX_VERSION_INFO_HPP__ 
+
+#include "cereal/archives/json.hpp"
+#include "cereal/types/vector.hpp"
+
+class IndexVersionInfo {
+
+};
+
+#endif //__INDEX_VERSION_INFO_HPP__ 
diff --git a/include/KmerIntervalMap.hpp b/include/KmerIntervalMap.hpp
new file mode 100644
index 0000000..21ae719
--- /dev/null
+++ b/include/KmerIntervalMap.hpp
@@ -0,0 +1,131 @@
+#ifndef __KMER_INTERVAL_MAP_HPP__
+#define __KMER_INTERVAL_MAP_HPP__
+
+extern "C" {
+#include "bwt.h"
+}
+
+#include <fstream>
+#include <unordered_map>
+
+#include <jellyfish/mer_dna.hpp>
+
+#include "cereal/archives/binary.hpp"
+#include "cereal/types/unordered_map.hpp"
+
+#include "xxhash.h"
+
+using JFMer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;
+
+// What will be the keys in our k-mer has map
+struct KmerKey {
+    KmerKey() {
+        mer_.polyT();
+    }
+
+    KmerKey(uint8_t* seq, uint32_t len) : mer_(len) {
+        mer_.polyT();
+        for (size_t i = 0; i < len; ++i) {
+            mer_.shift_left(seq[i]);
+        }
+    }
+
+    bool operator==(const KmerKey& ok) const {
+        return mer_ == ok.mer_;
+    }
+
+    // Is there a smarter way to do save / load here?
+    template <typename Archive>
+    void save(Archive& archive) const {
+        auto key = mer_.get_bits(0, 2*mer_.k());
+        archive(key);
+    }
+
+    template <typename Archive>
+    void load(Archive& archive) {
+        mer_.polyT();
+        uint64_t bits;
+        archive(bits);
+        mer_.set_bits(0, 2*mer_.k(), bits);
+    }
+
+    JFMer mer_;
+};
+
+template <typename Archive>
+void load(Archive& archive, bwtintv_t& interval) {
+    archive( interval.x[0], interval.x[1], interval.x[2], interval.info );
+}
+
+template <typename Archive>
+void save(Archive& archive, const bwtintv_t& interval) {
+    archive( interval.x[0], interval.x[1], interval.x[2], interval.info );
+}
+
+/**
+ *  This class provides an efficent hash-map from 
+ *  k-mers to BWT intervals.
+ */
+class KmerIntervalMap {
+    public:
+    // How we hash the keys
+    struct KmerHasher {
+        std::size_t operator()(const KmerKey& k) const {
+            void* data = static_cast<void*>(const_cast<KmerKey&>(k).mer_.data__());
+           return XXH64(data, sizeof(uint64_t), 0);
+        }
+    };
+ 
+    private:
+        std::unordered_map<KmerKey, bwtintv_t, KmerHasher> map_;
+
+    public:
+    void setK(unsigned int k) { JFMer::k(k); }
+    uint32_t k() { return JFMer::k(); }
+
+    bool hasKmer(KmerKey& k) {
+        return map_.find(k) != map_.end();
+    }
+
+    decltype(map_)::iterator find(const KmerKey& k) {
+        return map_.find(k);
+    }
+    decltype(map_)::iterator find(KmerKey&& k) {
+        return map_.find(k);
+    }
+
+    decltype(map_)::iterator end() {
+        return map_.end();
+    }
+
+
+    bwtintv_t& operator[](const KmerKey& k) {
+        return map_[k];
+    }
+    bwtintv_t& operator[](KmerKey&& k) {
+        return map_[k];
+    }
+    
+    decltype(map_)::size_type size() { return map_.size(); }
+
+    void save(boost::filesystem::path indexPath) {
+        std::ofstream ofs(indexPath.string(), std::ios::binary);
+        {
+            cereal::BinaryOutputArchive oa(ofs);
+            oa(map_);
+        }
+        ofs.close();
+    }
+
+    void load(boost::filesystem::path indexPath) {
+        std::ifstream ifs(indexPath.string(), std::ios::binary);
+        {
+            cereal::BinaryInputArchive ia(ifs);
+            ia(map_);
+        }
+        ifs.close();
+    }
+
+};
+
+#endif // __KMER_INTERVAL_MAP_HPP__
diff --git a/include/LibraryFormat.hpp b/include/LibraryFormat.hpp
index 24bc1e9..3db1139 100644
--- a/include/LibraryFormat.hpp
+++ b/include/LibraryFormat.hpp
@@ -35,9 +35,9 @@ public:
     }
 
     inline static LibraryFormat formatFromID(uint8_t id) {
-        ReadType rt;
-        ReadOrientation ro;
-        ReadStrandedness rs;
+        ReadType rt = ReadType::SINGLE_END;
+        ReadOrientation ro = ReadOrientation::NONE;
+        ReadStrandedness rs = ReadStrandedness::U;
 
         switch (id & 0x01) {
             case 0:
@@ -96,6 +96,14 @@ public:
         id |= (0x7 &static_cast<uint8_t>(strandedness)) << 3;
         return id;
     }
+
 };
 
+inline bool operator==(const LibraryFormat& lhs,
+        const LibraryFormat& rhs) {
+    return ((lhs.type == rhs.type) and
+            (lhs.orientation == rhs.orientation) and
+            (lhs.strandedness == rhs.strandedness));
+}
+
 #endif // LIBRARY_FORMAT_HPP
diff --git a/include/LightweightAlignmentDefs.hpp b/include/LightweightAlignmentDefs.hpp
new file mode 100644
index 0000000..637ba20
--- /dev/null
+++ b/include/LightweightAlignmentDefs.hpp
@@ -0,0 +1,1470 @@
+#ifndef LIGHTWEIGHT_ALIGNMENT_DEFS_HPP
+#define LIGHTWEIGHT_ALIGNMENT_DEFS_HPP
+
+
+#include "BWAMemStaticFuncs.hpp"
+#include "RapMapUtils.hpp"
+
+class SMEMAlignment {
+    public:
+        SMEMAlignment() :
+            pos(0),
+            fwd(false),
+            mateIsFwd(false),
+            transcriptID_(std::numeric_limits<TranscriptID>::max()),
+            format_(LibraryFormat::formatFromID(0)),
+            score_(0.0),
+            fragLength_(0),
+            logProb(salmon::math::LOG_0),
+            logBias(salmon::math::LOG_0){}
+
+        SMEMAlignment(TranscriptID transcriptIDIn, LibraryFormat format,
+                  double scoreIn = 0.0,
+                  int32_t hitPosIn = 0,
+                  uint32_t fragLengthIn= 0,
+                  double logProbIn = salmon::math::LOG_0) :
+            pos(hitPosIn), fwd(false), mateIsFwd(false), transcriptID_(transcriptIDIn),
+            format_(format), score_(scoreIn),
+            fragLength_(fragLengthIn), logProb(logProbIn) {}
+
+        SMEMAlignment(const SMEMAlignment& o) = default;
+        SMEMAlignment(SMEMAlignment&& o) = default;
+        SMEMAlignment& operator=(SMEMAlignment& o) = default;
+        SMEMAlignment& operator=(SMEMAlignment&& o) = default;
+
+
+        inline TranscriptID transcriptID() const { return transcriptID_; }
+        inline uint32_t fragLength() const { return fragLength_; }
+        inline LibraryFormat libFormat() const { return format_; }
+        inline double score() const { return score_; }
+        inline int32_t hitPos() const { return pos; }
+        // inline double coverage() {  return static_cast<double>(kmerCount) / fragLength_; };
+        uint32_t kmerCount;
+        double logProb;
+        double logBias;
+        template <typename Archive>
+        void save(Archive& archive) const {
+            archive(transcriptID_, format_.formatID(), score_, pos, fragLength_);
+        }
+
+        template <typename Archive>
+        void load(Archive& archive) {
+            uint8_t formatID;
+            archive(transcriptID_, formatID, score_, pos, fragLength_);
+            format_ = LibraryFormat::formatFromID(formatID);
+        }
+
+        rapmap::utils::MateStatus mateStatus;
+        int32_t pos;
+        bool fwd;
+        bool mateIsFwd;
+    private:
+        TranscriptID transcriptID_;
+        LibraryFormat format_;
+        double score_;
+        uint32_t fragLength_;
+};
+
+uint32_t basesCovered(std::vector<uint32_t>& kmerHits) {
+    std::sort(kmerHits.begin(), kmerHits.end());
+    uint32_t covered{0};
+    uint32_t lastHit{0};
+    uint32_t kl{20};
+    for (auto h : kmerHits) {
+        covered += std::min(h - lastHit, kl);
+        lastHit = h;
+    }
+    return covered;
+}
+
+uint32_t basesCovered(std::vector<uint32_t>& posLeft, std::vector<uint32_t>& posRight) {
+    return basesCovered(posLeft) + basesCovered(posRight);
+}
+
+class KmerVote {
+    public:
+        KmerVote(int32_t vp, uint32_t rp, uint32_t vl) : votePos(vp), readPos(rp), voteLen(vl) {}
+        int32_t votePos{0};
+        uint32_t readPos{0};
+        uint32_t voteLen{0};
+        /*
+        std::string str(){
+            return "<" + votePos  + ", "  + readPos  + ", "  + voteLen + ">";
+        }
+        */
+};
+class MatchFragment {
+    public:
+        MatchFragment(uint32_t refStart_, uint32_t queryStart_, uint32_t length_) :
+            refStart(refStart_), queryStart(queryStart_), length(length_) {}
+
+        uint32_t refStart, queryStart, length;
+        uint32_t weight;
+        double score;
+};
+
+bool precedes(const MatchFragment& a, const MatchFragment& b) {
+    return (a.refStart + a.length) < b.refStart and
+           (a.queryStart + a.length) < b.queryStart;
+}
+
+
+class TranscriptHitList {
+    public:
+        int32_t bestHitPos{0};
+        uint32_t bestHitCount{0};
+        double bestHitScore{0.0};
+
+        std::vector<KmerVote> votes;
+        std::vector<KmerVote> rcVotes;
+
+        uint32_t targetID;
+        uint32_t fwdCov{0};
+        uint32_t revCov{0};
+
+        bool isForward_{true};
+
+        void addFragMatch(uint32_t tpos, uint32_t readPos, uint32_t voteLen) {
+            int32_t votePos = static_cast<int32_t>(tpos) - static_cast<int32_t>(readPos);
+            votes.emplace_back(votePos, readPos, voteLen);
+            fwdCov += voteLen;
+        }
+
+        void addFragMatchRC(uint32_t tpos, uint32_t readPos, uint32_t voteLen, uint32_t readLen) {
+            //int32_t votePos = static_cast<int32_t>(tpos) - (readPos) + voteLen;
+            int32_t votePos = static_cast<int32_t>(tpos) - (readLen - readPos);
+            rcVotes.emplace_back(votePos, readPos, voteLen);
+            revCov += voteLen;
+        }
+
+        uint32_t totalNumHits() { return std::max(votes.size(), rcVotes.size()); }
+
+        bool computeBestLocFast_(std::vector<KmerVote>& sVotes, Transcript& transcript,
+                                 std::string& read, bool isRC,
+                                 int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
+            bool updatedMaxScore{true};
+            if (sVotes.size() == 0) { return updatedMaxScore; }
+            uint32_t readLen = read.length();
+            uint32_t votePos = sVotes.front().votePos;
+
+            uint32_t cov = isRC ? revCov : fwdCov;
+            if (cov > maxClusterCount) {
+                maxClusterCount = cov;
+                maxClusterPos = votePos;
+                maxClusterScore = maxClusterCount / static_cast<double>(readLen);
+                updatedMaxScore = true;
+            }
+            return updatedMaxScore;
+
+        }
+
+        bool computeBestLoc_(std::vector<KmerVote>& sVotes, Transcript& transcript,
+                             std::string& read, bool isRC,
+                             int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
+            // Did we update the highest-scoring cluster? This will be set to
+            // true iff we have a cluster of a higher score than the score
+            // currently given in maxClusterCount.
+            bool updatedMaxScore{false};
+
+            if (sVotes.size() == 0) { return updatedMaxScore; }
+
+            struct VoteInfo {
+                uint32_t coverage = 0;
+                int32_t rightmostBase = 0;
+            };
+
+            uint32_t readLen = read.length();
+
+            boost::container::flat_map<uint32_t, VoteInfo> hitMap;
+            int32_t currClust{static_cast<int32_t>(sVotes.front().votePos)};
+
+            for (size_t j = 0; j < sVotes.size(); ++j) {
+
+                int32_t votePos = sVotes[j].votePos;
+                uint32_t readPos = sVotes[j].readPos;
+                uint32_t voteLen = sVotes[j].voteLen;
+
+                if (votePos >= currClust) {
+                    if (votePos - currClust > 10) {
+                        currClust = votePos;
+                    }
+                    auto& hmEntry = hitMap[currClust];
+
+                    hmEntry.coverage += std::min(voteLen, (votePos + readPos + voteLen) - hmEntry.rightmostBase);
+                    hmEntry.rightmostBase = votePos + readPos + voteLen;
+                } else if (votePos < currClust) {
+                    std::cerr << "Should not have votePos = " << votePos << " <  currClust = " << currClust << "\n";
+                    std::exit(1);
+                }
+
+                if (hitMap[currClust].coverage > maxClusterCount) {
+                    maxClusterCount = hitMap[currClust].coverage;
+                    maxClusterPos = currClust;
+                    maxClusterScore = maxClusterCount / static_cast<double>(readLen);
+                    updatedMaxScore = true;
+                }
+
+            }
+            return updatedMaxScore;
+        }
+
+        bool computeBestLoc2_(std::vector<KmerVote>& sVotes, uint32_t tlen,
+                              int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
+
+            bool updatedMaxScore{false};
+
+            if (sVotes.size() == 0) { return updatedMaxScore; }
+
+            double weights[] = { 1.0, 0.983471453822, 0.935506985032,
+                0.860707976425, 0.765928338365, 0.6592406302, 0.548811636094,
+                0.441902209585, 0.344153786865, 0.259240260646,
+                0.188875602838};
+
+            uint32_t maxGap = 4;
+            uint32_t leftmost = (sVotes.front().votePos > maxGap) ? (sVotes.front().votePos - maxGap) : 0;
+            uint32_t rightmost = std::min(sVotes.back().votePos + maxGap, tlen);
+
+            uint32_t span = (rightmost - leftmost);
+            std::vector<double> probAln(span, 0.0);
+            double kwidth = 1.0 / (2.0 * maxGap);
+
+            size_t nvotes = sVotes.size();
+            for (size_t j = 0; j < nvotes; ++j) {
+                uint32_t votePos = sVotes[j].votePos;
+                uint32_t voteLen = sVotes[j].voteLen;
+
+                auto x = j + 1;
+                while (x < nvotes and sVotes[x].votePos == votePos) {
+                    voteLen += sVotes[x].voteLen;
+                    j += 1;
+                    x += 1;
+                }
+
+
+                uint32_t dist{0};
+                size_t start = (votePos >= maxGap) ? (votePos - maxGap - leftmost) : (votePos - leftmost);
+                size_t mid = votePos - leftmost;
+                size_t end = std::min(votePos + maxGap - leftmost, rightmost - leftmost);
+                for (size_t k = start; k < end; k += 1) {
+                    dist = (mid > k) ? mid - k : k - mid;
+                    probAln[k] += weights[dist] * voteLen;
+                    if (probAln[k] > maxClusterScore) {
+                        maxClusterScore = probAln[k];
+                        maxClusterPos = k + leftmost;
+                        updatedMaxScore = true;
+                    }
+                }
+            }
+
+            return updatedMaxScore;
+        }
+
+
+        inline uint32_t numSampledHits_(Transcript& transcript, std::string& readIn,
+                                        int32_t votePos, int32_t posInRead, int32_t voteLen, bool isRC, uint32_t numTries) {
+
+
+            // The read starts at this position in the transcript (may be negative!)
+            int32_t readStart = votePos;
+            // The (uncorrected) length of the read
+            int32_t readLen = readIn.length();
+            // Pointer to the sequence of the read
+            const char* read = readIn.c_str();
+            // Don't mess around with unsigned arithmetic here
+            int32_t tlen = transcript.RefLength;
+
+            // If the read starts before the first base of the transcript,
+            // trim off the initial overhang  and correct the other variables
+            if (readStart < 0) {
+                if (isRC) {
+                    uint32_t correction = -readStart;
+                    //std::cerr << "readLen = " << readLen << ", posInRead = " << posInRead << ", voteLen = " << voteLen << ", correction = " << correction << "\n";
+                    //std::cerr << "tlen = " << tlen << ", votePos = " << votePos << "\n";
+                    read += correction;
+                    readLen -= correction;
+                    posInRead -= correction;
+                    readStart = 0;
+                } else {
+                    uint32_t correction = -readStart;
+                    read += correction;
+                    readLen -= correction;
+                    posInRead -= correction;
+                    readStart = 0;
+                }
+            }
+            // If the read hangs off the end of the transcript,
+            // shorten its effective length.
+            if (readStart + readLen >= tlen) {
+                if (isRC) {
+                    uint32_t correction = (readStart + readLen) - transcript.RefLength + 1;
+                    //std::cerr << "Trimming RC hit: correction = " << correction << "\n";
+                    //std::cerr << "untrimmed read : "  << read << "\n";
+                    read += correction;
+                    readLen -= correction;
+                    if (voteLen > readLen) { voteLen = readLen; }
+                    posInRead = 0;
+                } else {
+                    readLen = tlen - (readStart + 1);
+                    voteLen = std::max(voteLen, readLen - (posInRead + voteLen));
+                }
+            }
+            // Finally, clip any reverse complement reads starting at 0
+            if (isRC) {
+
+                if (voteLen > readStart) {
+                    readLen -= (readLen - (posInRead + voteLen));
+                }
+
+            }
+
+            // If the read is too short, it's not useful
+            if (readLen <= 15) { return 0; }
+            // The step between sample centers (given the number of samples we're going to take)
+            double step = (readLen - 1) / static_cast<double>(numTries-1);
+            // The strand of the transcript from which we'll extract sequence
+            auto dir = (isRC) ? salmon::stringtools::strand::reverse :
+                                salmon::stringtools::strand::forward;
+
+            bool superVerbose{false};
+
+            if (superVerbose) {
+                std::stringstream ss;
+                ss << "Supposed hit " << (isRC ? "RC" : "") << "\n";
+                ss << "info: votePos = " << votePos << ", posInRead = " << posInRead
+                    << ", voteLen = " << voteLen << ", readLen = " << readLen
+                    << ", tran len = " << tlen << ", step = " << step << "\n";
+                if (readStart + readLen > tlen ) {
+                    ss << "ERROR!!!\n";
+                    std::cerr << "[[" << ss.str() << "]]";
+                    std::exit(1);
+                }
+                ss << "Transcript name = " << transcript.RefName << "\n";
+                ss << "T : ";
+                try {
+                    for ( size_t j = 0; j < readLen; ++j) {
+                        if (isRC) {
+                            if (j == posInRead) {
+                                char red[] = "\x1b[30m";
+                                red[3] = '0' + static_cast<char>(fmt::RED);
+                                ss << red;
+                            }
+
+                            if (j == posInRead + voteLen) {
+                                const char RESET_COLOR[] = "\x1b[0m";
+                                ss << RESET_COLOR;
+                            }
+                            ss << transcript.charBaseAt(readStart+readLen-j,dir);
+                        } else {
+                            if (j == posInRead ) {
+                                char red[] = "\x1b[30m";
+                                red[3] = '0' + static_cast<char>(fmt::RED);
+                                ss << red;
+                            }
+
+                            if (j == posInRead + voteLen) {
+                                const char RESET_COLOR[] = "\x1b[0m";
+                                ss << RESET_COLOR;
+                            }
+
+                            ss << transcript.charBaseAt(readStart+j);
+                        }
+                    }
+                    ss << "\n";
+                    char red[] = "\x1b[30m";
+                    red[3] = '0' + static_cast<char>(fmt::RED);
+                    const char RESET_COLOR[] = "\x1b[0m";
+
+                    ss << "R : " << std::string(read, posInRead) << red << std::string(read + posInRead, voteLen) << RESET_COLOR;
+                    if (readLen > posInRead + voteLen) { ss << std::string(read + posInRead + voteLen); }
+                    ss << "\n\n";
+                } catch (std::exception& e) {
+                    std::cerr << "EXCEPTION !!!!!! " << e.what() << "\n";
+                }
+                std::cerr << ss.str() << "\n";
+                ss.clear();
+            }
+
+            // The index of the current sample within the read
+            int32_t readIndex = 0;
+
+            // The number of loci in the subvotes and their
+            // offset patternns
+            size_t lpos = 3;
+            int leftPattern[] = {-4, -2, 0};
+            int rightPattern[] = {0, 2, 4};
+            int centerPattern[] = {-4, 0, 4};
+
+            // The number of subvote hits we've had
+            uint32_t numHits = 0;
+            // Take the samples
+            for (size_t i  = 0; i < numTries; ++i) {
+                // The sample will be centered around this point
+                readIndex = static_cast<uint32_t>(std::round(readStart + i * step)) - readStart;
+
+                // The number of successful sub-ovtes we have
+                uint32_t subHit = 0;
+                // Select the center sub-vote pattern, unless we're near the end of a read
+                int* pattern = &centerPattern[0];
+                if (readIndex + pattern[0] < 0) {
+                    pattern = &rightPattern[0];
+                } else if (readIndex + pattern[lpos-1] >= readLen) {
+                    pattern = &leftPattern[0];
+                }
+
+                // collect the subvotes
+                for (size_t j = 0; j < lpos; ++j) {
+                    // the pattern offset
+                    int offset = pattern[j];
+                    // and sample position it implies within the read
+                    int readPos = readIndex + offset;
+
+                    if (readStart + readPos >= tlen) {
+                        std::cerr  << "offset = " << offset << ", readPos = " << readPos << ", readStart = " << readStart << ", readStart + readPos = " << readStart + readPos << ", tlen = " << transcript.RefLength << "\n";
+                    }
+
+                    subHit += (isRC) ?
+                        (transcript.charBaseAt(readStart + readLen - readPos, dir) == salmon::stringtools::charCanon[read[readPos]]) :
+                        (transcript.charBaseAt(readStart + readPos               ) == salmon::stringtools::charCanon[read[readPos]]);
+                }
+                // if the entire subvote was successful, this is a hit
+                numHits += (subHit == lpos);
+            }
+            // return the number of hits we had
+            return numHits;
+        }
+
+
+
+        bool computeBestLoc3_(std::vector<KmerVote>& sVotes, Transcript& transcript,
+                              std::string& read, bool isRC,
+                              int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
+
+            bool updatedMaxScore{false};
+
+            if (sVotes.size() == 0) { return updatedMaxScore; }
+
+            struct LocHitCount {
+                int32_t loc;
+                uint32_t nhits;
+            };
+
+            uint32_t numSamp = 15;
+            std::vector<LocHitCount> hitCounts;
+            size_t nvotes = sVotes.size();
+            int32_t prevPos = -std::numeric_limits<int32_t>::max();
+            for (size_t j = 0; j < nvotes; ++j) {
+                int32_t votePos = sVotes[j].votePos;
+                int32_t posInRead = sVotes[j].readPos;
+                int32_t voteLen = sVotes[j].voteLen;
+                if (prevPos == votePos) { continue; }
+                auto numHits = numSampledHits_(transcript, read, votePos, posInRead, voteLen, isRC, numSamp);
+                hitCounts.push_back({votePos, numHits});
+                prevPos = votePos;
+            }
+
+            uint32_t maxGap = 8;
+            uint32_t hitIdx = 0;
+            uint32_t accumHits = 0;
+            int32_t hitLoc = hitCounts[hitIdx].loc;
+            while (hitIdx < hitCounts.size()) {
+                uint32_t idx2 = hitIdx;
+                while (idx2 < hitCounts.size() and std::abs(hitCounts[idx2].loc - hitLoc) <= maxGap) {
+                    accumHits += hitCounts[idx2].nhits;
+                    ++idx2;
+                }
+
+                double score = static_cast<double>(accumHits) / numSamp;
+                if (score > maxClusterScore) {
+                    maxClusterCount = accumHits;
+                    maxClusterScore = score;
+                    maxClusterPos = hitCounts[hitIdx].loc;
+                    updatedMaxScore = true;
+                }
+                accumHits = 0;
+                ++hitIdx;
+                hitLoc = hitCounts[hitIdx].loc;
+            }
+
+            return updatedMaxScore;
+        }
+
+
+        bool computeBestChain(Transcript& transcript, std::string& read) {
+            std::sort(votes.begin(), votes.end(),
+                    [](const KmerVote& v1, const KmerVote& v2) -> bool {
+                        if (v1.votePos == v2.votePos) {
+                            return v1.readPos < v2.readPos;
+                        }
+                        return v1.votePos < v2.votePos;
+                    });
+
+            std::sort(rcVotes.begin(), rcVotes.end(),
+                    [](const KmerVote& v1, const KmerVote& v2) -> bool {
+                        if (v1.votePos == v2.votePos) {
+                            return v1.readPos < v2.readPos;
+                        }
+                        return v1.votePos < v2.votePos;
+                    });
+
+            int32_t maxClusterPos{0};
+            uint32_t maxClusterCount{0};
+            double maxClusterScore{0.0};
+
+            // we don't need the return value from the first call
+            static_cast<void>(computeBestLoc_(votes, transcript, read, false, maxClusterPos, maxClusterCount, maxClusterScore));
+            bool revIsBest = computeBestLoc_(rcVotes, transcript, read, true, maxClusterPos, maxClusterCount, maxClusterScore);
+            isForward_ = not revIsBest;
+
+            bestHitPos = maxClusterPos;
+            bestHitCount = maxClusterCount;
+            bestHitScore = maxClusterScore;
+            return true;
+        }
+
+        bool isForward() { return isForward_; }
+
+};
+
+
+template <typename AlnT>
+void processMiniBatch(
+        ReadExperiment& readExp,
+        ForgettingMassCalculator& fmCalc,
+        uint64_t firstTimestepOfRound,
+        ReadLibrary& readLib,
+        const SalmonOpts& salmonOpts,
+        AlnGroupVecRange<AlnT> batchHits,
+        std::vector<Transcript>& transcripts,
+        ClusterForest& clusterForest,
+        FragmentLengthDistribution& fragLengthDist,
+        std::atomic<uint64_t>& numAssignedFragments,
+        std::default_random_engine& randEng,
+        bool initialRound,
+        std::atomic<bool>& burnedIn
+        );
+
+template <typename CoverageCalculator>
+inline void collectHitsForRead(SalmonIndex* sidx, const bwtintv_v* a, smem_aux_t* auxHits,
+                        mem_opt_t* memOptions, const SalmonOpts& salmonOpts, const uint8_t* read, uint32_t readLen,
+                        std::vector<CoverageCalculator>& hits) {
+                        //std::unordered_map<uint64_t, CoverageCalculator>& hits) {
+
+    bwaidx_t* idx = sidx->bwaIndex();
+    mem_collect_intv(salmonOpts, memOptions, sidx, readLen, read, auxHits);
+
+    // For each MEM
+    int firstSeedLen{-1};
+    for (int i = 0; i < auxHits->mem.n; ++i ) {
+        // A pointer to the interval of the MEMs occurences
+        bwtintv_t* p = &auxHits->mem.a[i];
+        // The start and end positions in the query string (i.e. read) of the MEM
+        int qstart = p->info>>32;
+        uint32_t qend = static_cast<uint32_t>(p->info);
+        int step, count, slen = (qend - qstart); // seed length
+
+        /*
+        if (firstSeedLen > -1) {
+            if (slen < firstSeedLen) { return; }
+        } else {
+            firstSeedLen = slen;
+        }
+        */
+
+        int64_t k;
+        step = p->x[2] > memOptions->max_occ? p->x[2] / memOptions->max_occ : 1;
+        // For every occurrence of the MEM
+        for (k = count = 0; k < p->x[2] && count < memOptions->max_occ; k += step, ++count) {
+            bwtint_t pos;
+            bwtint_t startPos, endPos;
+            int len, isRev, isRevStart, isRevEnd, refID, refIDStart, refIDEnd;
+            int queryStart = qstart;
+            len = slen;
+            uint32_t rlen = readLen;
+
+            // Get the position in the reference index of this MEM occurrence
+            int64_t refStart = bwt_sa(idx->bwt, p->x[0] + k);
+
+            pos = startPos = bns_depos(idx->bns, refStart, &isRevStart);
+            endPos = bns_depos(idx->bns, refStart + slen - 1, &isRevEnd);
+            // If we span the forward/reverse boundary, discard the hit
+            if (isRevStart != isRevEnd) {
+                continue;
+            }
+            // Otherwise, isRevStart = isRevEnd so just assign isRev = isRevStart
+            isRev = isRevStart;
+
+            // If the hit is reversed --- swap the start and end
+            if (isRev) {
+                if (endPos > startPos) {
+                    salmonOpts.jointLog->warn("Hit is supposedly reversed, "
+                                              "but startPos = {} < endPos = {}",
+                                              startPos, endPos);
+                }
+                auto temp = startPos;
+                startPos = endPos;
+                endPos = temp;
+            }
+            // Get the ID of the reference sequence in which it occurs
+            refID = refIDStart = bns_pos2rid(idx->bns, startPos);
+            refIDEnd = bns_pos2rid(idx->bns, endPos);
+
+            if (refID < 0) { continue; } // bridging multiple reference sequences or the forward-reverse boundary;
+
+            auto tlen = idx->bns->anns[refID].len;
+
+            // The refence sequence-relative (e.g. transcript-relative) position of the MEM
+            long hitLoc = static_cast<long>(isRev ? endPos : startPos) - idx->bns->anns[refID].offset;
+
+            if ((refIDStart != refIDEnd)) {
+                // If a seed spans two transcripts
+
+                // If we're not considering splitting such seeds, then
+                // just discard this seed and continue.
+                if (not salmonOpts.splitSpanningSeeds) { continue; }
+
+                //std::cerr << "Seed spans two transcripts! --- attempting to split: \n";
+                if (!isRev) {
+                    // If it's going forward, we have a situation like this
+                    // packed transcripts: t1 ===========|t2|==========>
+                    // hit:                          |==========>
+
+                    // length of hit in t1
+                    auto len1 = tlen - hitLoc;
+                    // length of hit in t2
+                    auto len2 = slen - len1;
+                    if (std::max(len1, len2) < memOptions->min_seed_len) { continue; }
+
+                    /** Keeping this here for now in case I need to debug splitting seeds again
+                    std::cerr << "\t hit is in the forward direction: ";
+                    std::cerr << "t1 part has length " << len1 << ", t2 part has length " << len2 << "\n";
+                    */
+
+                    // If the part in t1 is larger then just cut off the rest
+                    if (len1 >= len2) {
+                        slen = len1;
+                        int32_t votePos = static_cast<int32_t>(hitLoc) - queryStart;
+                        //std::cerr << "\t\t t1 (of length " << tlen << ") has larger hit --- new hit length = " << len1 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
+                    } else {
+                        // Otherwise, make the hit be in t2.
+                        // Because the hit spans the boundary where t2 begins,
+                        // the new seed begins matching at position 0 of
+                        // transcript t2
+                        hitLoc = 0;
+                        slen = len2;
+                        // The seed originally started at position q, now it starts  len1 characters to the  right of that
+                        queryStart += len1;
+                        refID = refIDEnd;
+                        int32_t votePos = static_cast<int32_t>(hitLoc) - queryStart;
+                        tlen = idx->bns->anns[refID].len;
+                        //std::cerr << "\t\t t2 (of length " << tlen << ") has larger hit --- new hit length = " << len2 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
+                    }
+                } else {
+
+                    // If it's going in the reverse direction, we have a situation like this
+                    // packed transcripts: t1 <===========|t2|<==========
+                    // hit:                          X======Y>======Z>
+                    // Which means we have
+                    // packed transcripts: t1 <===========|t2|<==========
+                    // hit:                          <Z=====Y<======X
+                    // length of hit in t1
+
+                    auto len2 = endPos - idx->bns->anns[refIDEnd].offset;
+                    auto len1 = slen - len2;
+                    if (std::max(len1, len2) < memOptions->min_seed_len) { continue; }
+
+                    /** Keeping this here for now in case I need to debug splitting seeds again
+                    std::cerr << "\t hit is in the reverse direction: ";
+                    std::cerr << "\n\n";
+                    std::cerr << "startPos = " << startPos << ", endPos = " << endPos << ", offset[refIDStart] = "
+                              <<  idx->bns->anns[refIDStart].offset << ", offset[refIDEnd] = " << idx->bns->anns[refIDEnd].offset << "\n";
+                    std::cerr << "\n\n";
+                    std::cerr << "t1 part has length " << len1 << ", t2 part has length " << len2 << "\n\n";
+                    */
+
+                    if (len1 >= len2) {
+                        slen = len1;
+                        hitLoc = tlen - len2;
+                        queryStart += len2;
+                        rlen -= len2;
+                        int32_t votePos = static_cast<int32_t>(hitLoc) - (rlen - queryStart);
+                        //std::cerr << "\t\t t1 (hitLoc: " << hitLoc << ") (of length " << tlen << ") has larger hit --- new hit length = " << len1 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
+                    } else {
+                        slen = len2;
+                        refID = bns_pos2rid(idx->bns, endPos);
+                        tlen = idx->bns->anns[refID].len;
+                        hitLoc = len2;
+                        rlen = hitLoc + queryStart;
+                        int32_t votePos = static_cast<int32_t>(hitLoc) - (rlen - queryStart);
+                        //std::cerr << "\t\t t2 (of length " << tlen << ") (hitLoc: " << hitLoc << ") has larger hit --- new hit length = " << len2 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
+                    }
+                }
+
+            }
+
+            auto hitIt = std::find_if(hits.begin(), hits.end(), [refID](CoverageCalculator& c) -> bool { return c.targetID == refID; });
+            if (isRev) {
+                if (hitIt == hits.end()) {
+                    CoverageCalculator hit;
+                    hit.targetID = refID;
+                    hit.addFragMatchRC(hitLoc, queryStart, slen, rlen);
+                    hits.emplace_back(hit);
+                } else {
+                    hitIt->addFragMatchRC(hitLoc, queryStart , slen, rlen);
+                    //hits[refID].addFragMatchRC(hitLoc, queryStart , slen, rlen);
+                }
+            } else {
+                if (hitIt == hits.end()) {
+                    CoverageCalculator hit;
+                    hit.targetID = refID;
+                    hit.addFragMatch(hitLoc, queryStart, slen);
+                    hits.emplace_back(hit);
+                } else {
+                    hitIt->addFragMatch(hitLoc, queryStart , slen);
+                    //hits[refID].addFragMatch(hitLoc, queryStart, slen);
+                }
+            }
+        } // for k
+    }
+}
+
+inline bool consistentNames(header_sequence_qual& r) {
+    return true;
+}
+
+bool consistentNames(std::pair<header_sequence_qual, header_sequence_qual>& rp) {
+        auto l1 = rp.first.header.length();
+        auto l2 = rp.second.header.length();
+        char* sptr = static_cast<char*>(memchr(&rp.first.header[0], ' ', l1));
+
+        bool compat = false;
+        // If we didn't find a space in the name of read1
+        if (sptr == NULL) {
+            if (l1 > 1) {
+                compat = (l1 == l2);
+                compat = compat and (memcmp(&rp.first.header[0], &rp.second.header[0], l1-1) == 0);
+                compat = compat and ((rp.first.header[l1-1] == '1' and rp.second.header[l2-1] == '2')
+                                or   (rp.first.header[l1-1] == rp.second.header[l2-1]));
+            } else {
+                compat = (l1 == l2);
+                compat = compat and (rp.first.header[0] == rp.second.header[0]);
+            }
+        } else {
+            size_t offset = sptr - (&rp.first.header[0]);
+
+            // If read2 matches read1 up to and including the space
+            if (offset + 1 < l2) {
+                compat = memcmp(&rp.first.header[0], &rp.second.header[0], offset) == 0;
+                // and after the space, read1 and read2 have an identical character or
+                // read1 has a '1' and read2 has a '2', then this is a consistent pair.
+                compat = compat and ((rp.first.header[offset+1] == rp.second.header[offset+1])
+                                or   (rp.first.header[offset+1] == '1' and rp.second.header[offset+1] == '2'));
+            } else {
+                compat = false;
+            }
+        }
+        return compat;
+}
+
+/**
+ *  Returns true if the @hit is within @cutoff bases of the end of
+ *  transcript @txp and false otherwise.
+ */
+template <typename CoverageCalculator>
+inline bool nearEndOfTranscript(
+            CoverageCalculator& hit,
+            Transcript& txp,
+            int32_t cutoff=std::numeric_limits<int32_t>::max()) {
+	// check if hit appears close to the end of the given transcript
+    bool isForward = hit.isForward();
+	int32_t hitPos = static_cast<int32_t>(hit.bestHitPos);
+    return (hitPos <= cutoff or std::abs(static_cast<int32_t>(txp.RefLength) - hitPos) <= cutoff);
+}
+
+template <typename CoverageCalculator>
+inline void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& frag,
+                        SalmonIndex* sidx,
+                        smem_i *itr,
+                        const bwtintv_v *a,
+                        smem_aux_t* auxHits,
+                        mem_opt_t* memOptions,
+                        ReadExperiment& readExp,
+                        const SalmonOpts& salmonOpts,
+                        double coverageThresh,
+                        uint64_t& upperBoundHits,
+                        AlignmentGroup<SMEMAlignment>& hitList,
+                        uint64_t& hitListCount,
+                        std::vector<Transcript>& transcripts) {
+
+    //std::unordered_map<uint64_t, CoverageCalculator> leftHits;
+    //std::unordered_map<uint64_t, CoverageCalculator> rightHits;
+
+    std::vector<CoverageCalculator> leftHits;
+    std::vector<CoverageCalculator> rightHits;
+
+
+    uint32_t leftReadLength{0};
+    uint32_t rightReadLength{0};
+
+    auto& eqBuilder = readExp.equivalenceClassBuilder();
+    bool allowOrphans{salmonOpts.allowOrphans};
+
+    /**
+    * As soon as we can decide on an acceptable way to validate read names,
+    * we'll inform the user and quit if we see something inconsistent.  However,
+    * we first need a reasonable way to verify potential naming formats from
+    * many different sources.
+    */
+    /*
+    if (!consistentNames(frag)) {
+        fmt::MemoryWriter errstream;
+
+        errstream << "Inconsistent paired-end reads!\n";
+        errstream << "mate1 : " << frag.first.header << "\n";
+        errstream << "mate2 : " << frag.second.header << "\n";
+        errstream << "Paired-end reads should appear consistently in their respective files.\n";
+        errstream << "Please fix the paire-end input before quantifying with salmon; exiting.\n";
+
+        std::cerr << errstream.str();
+        std::exit(-1);
+    }
+    */
+
+    //---------- End 1 ----------------------//
+    {
+        std::string readStr   = frag.first.seq;
+        uint32_t readLen      = readStr.size();
+
+        leftReadLength = readLen;
+
+        for (int p = 0; p < readLen; ++p) {
+            readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
+        }
+
+        collectHitsForRead(sidx, a, auxHits,
+                            memOptions,
+                            salmonOpts,
+                            reinterpret_cast<const uint8_t*>(readStr.c_str()),
+                            readLen,
+                            leftHits);
+    }
+
+    //---------- End 2 ----------------------//
+    {
+        std::string readStr   = frag.second.seq;
+        uint32_t readLen      = readStr.size();
+
+        rightReadLength = readLen;
+
+        for (int p = 0; p < readLen; ++p) {
+            readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
+        }
+
+        collectHitsForRead(sidx, a, auxHits,
+                            memOptions,
+                            salmonOpts,
+                            reinterpret_cast<const uint8_t*>(readStr.c_str()),
+                            readLen,
+                            rightHits);
+     } // end right
+
+    size_t numTrivialHits = (leftHits.size() + rightHits.size() > 0) ? 1 : 0;
+    upperBoundHits += (leftHits.size() + rightHits.size() > 0) ? 1 : 0;
+    size_t readHits{0};
+    auto& alnList = hitList.alignments();
+    hitList.isUniquelyMapped() = true;
+    alnList.clear();
+    // nothing more to do
+    if (numTrivialHits == 0) { return; }
+
+
+    double cutoffLeft{ coverageThresh };//* leftReadLength};
+    double cutoffRight{ coverageThresh };//* rightReadLength};
+
+    uint64_t leftHitCount{0};
+
+    // Fraction of the optimal coverage that a lightweight alignment
+    // must obtain in order to be retained.
+    float fOpt{0.95};
+
+    // First, see if there are transcripts where both ends of the
+    // fragments map
+    auto& minHitList = (leftHits.size() < rightHits.size()) ? leftHits : rightHits;
+    auto& maxHitList = (leftHits.size() < rightHits.size()) ? rightHits : leftHits;
+
+    struct JointHitPtr {
+        uint32_t transcriptID;
+        size_t leftIndex;
+        size_t rightIndex;
+    };
+
+    std::vector<JointHitPtr> jointHits; // haha (variable name)!
+    jointHits.reserve(minHitList.size());
+
+    // vector-based code
+    // Sort the left and right hits
+    std::sort(leftHits.begin(), leftHits.end(),
+              [](const CoverageCalculator& c1, const CoverageCalculator& c2) -> bool {
+                return c1.targetID < c2.targetID;
+               });
+    std::sort(rightHits.begin(), rightHits.end(),
+              [](const CoverageCalculator& c1, const CoverageCalculator& c2) -> bool {
+                return c1.targetID < c2.targetID;
+               });
+    // Take the intersection of these two hit lists
+    // Adopted from : http://en.cppreference.com/w/cpp/algorithm/set_intersection
+    {
+        auto leftIt = leftHits.begin();
+        auto leftEnd = leftHits.end();
+        auto rightIt = rightHits.begin();
+        auto rightEnd = rightHits.end();
+        while (leftIt != leftEnd && rightIt != rightEnd) {
+            if (leftIt->targetID < rightIt->targetID) {
+                ++leftIt;
+            } else {
+                if (!(rightIt->targetID < leftIt->targetID)) {
+                    jointHits.push_back({leftIt->targetID,
+                                         static_cast<size_t>(std::distance(leftHits.begin(), leftIt)),
+                                         static_cast<size_t>(std::distance(rightHits.begin(), rightIt))});
+                    ++leftIt;
+                }
+                ++rightIt;
+            }
+        }
+    }
+    // End vector-based code
+
+    /* map based code
+    {
+        auto notFound = maxHitList.end();
+        for (auto& kv : minHitList) {
+            uint64_t refID = kv.first;
+            if (maxHitList.find(refID) != notFound) {
+                jointHits.emplace_back(refID);
+            }
+        }
+    }
+    */
+
+    // Check if the fragment generated orphaned
+    // lightweight alignments.
+    bool isOrphan = (jointHits.size() == 0);
+
+    uint32_t firstTranscriptID = std::numeric_limits<uint32_t>::max();
+    double bestScore = -std::numeric_limits<double>::max();
+    bool sortedByTranscript = true;
+    int32_t lastTranscriptId = std::numeric_limits<int32_t>::min();
+
+    if (BOOST_UNLIKELY(isOrphan and allowOrphans)) {
+        //std::vector<CoverageCalculator> allHits;
+        //allHits.reserve(totalHits);
+        bool foundValidHit{false};
+
+        // search for a hit on the left
+        for (auto& tHitList : leftHits) {
+            auto transcriptID = tHitList.targetID;
+            auto& covChain = tHitList;
+            Transcript& t = transcripts[transcriptID];
+            if (!t.hasAnchorFragment()) { continue; }
+
+            covChain.computeBestChain(t, frag.first.seq);
+            double score = covChain.bestHitScore;
+
+    	    // make sure orphaned fragment is near the end of the transcript
+	    	// if (!nearEndOfTranscript(covChain, t, 1000)) { continue; }
+
+            if (score >= fOpt * bestScore and score >= cutoffLeft) {
+                foundValidHit = true;
+
+        		if (score > bestScore) { bestScore = score; }
+                bool isForward = covChain.isForward();
+                int32_t hitPos = covChain.bestHitPos;
+                auto fmt = salmon::utils::hitType(hitPos, isForward);
+
+                if (leftHitCount == 0) {
+                    firstTranscriptID = transcriptID;
+                } else if (hitList.isUniquelyMapped() and transcriptID != firstTranscriptID) {
+                    hitList.isUniquelyMapped() = false;
+                }
+
+                if (transcriptID  < lastTranscriptId) {
+                    sortedByTranscript = false;
+                }
+
+                alnList.emplace_back(transcriptID, fmt, score, hitPos);
+                alnList.back().fwd = isForward;
+                alnList.back().mateStatus = rapmap::utils::MateStatus::PAIRED_END_LEFT;
+                readHits += score;
+                ++hitListCount;
+                ++leftHitCount;
+            }
+        }
+
+        // search for a hit on the right
+        for (auto& tHitList : rightHits) {
+            // Prior
+            // auto transcriptID = tHitList.first;
+            auto transcriptID = tHitList.targetID;
+            auto& covChain = tHitList;
+            Transcript& t = transcripts[transcriptID];
+            if (!t.hasAnchorFragment()) { continue; }
+
+            covChain.computeBestChain(t, frag.second.seq);
+            double score = covChain.bestHitScore;
+
+            // make sure orphaned fragment is near the end of the transcript
+            // if (!nearEndOfTranscript(covChain, t, 1000)) { continue; }
+
+            if (score >= fOpt * bestScore and score >= cutoffRight) {
+                if (score > bestScore) { bestScore = score; }
+                foundValidHit = true;
+                bool isForward = covChain.isForward();
+                int32_t hitPos = covChain.bestHitPos;
+                auto fmt = salmon::utils::hitType(hitPos, isForward);
+                if (leftHitCount == 0) {
+                    firstTranscriptID = transcriptID;
+                } else if (hitList.isUniquelyMapped() and transcriptID != firstTranscriptID) {
+                    hitList.isUniquelyMapped() = false;
+                }
+
+                alnList.emplace_back(transcriptID, fmt, score, hitPos);
+                alnList.back().fwd = isForward;
+                alnList.back().mateStatus = rapmap::utils::MateStatus::PAIRED_END_RIGHT;
+                readHits += score;
+                ++hitListCount;
+                ++leftHitCount;
+            }
+        }
+
+        if (alnList.size() > 0) {
+            auto newEnd = std::stable_partition(alnList.begin(), alnList.end(),
+                           [bestScore, fOpt](SMEMAlignment& aln) -> bool {
+                                return aln.score() >= fOpt * bestScore;
+                           });
+            alnList.resize(std::distance(alnList.begin(), newEnd));
+            if (!sortedByTranscript) {
+                std::sort(alnList.begin(), alnList.end(),
+                          [](const SMEMAlignment& x, const SMEMAlignment& y) -> bool {
+                           return x.transcriptID() < y.transcriptID();
+                          });
+            }
+        } else {
+            return;
+            /*
+            // If we didn't have any *significant* hits --- add any *trivial* orphan hits
+            size_t totalHits = leftHits.size() + rightHits.size();
+            std::vector<uint32_t> txpIDs;
+            txpIDs.reserve(totalHits);
+            std::vector<double> auxProbs;
+            auxProbs.reserve(totalHits);
+
+            size_t txpIDsHash{0};
+            std::vector<CoverageCalculator> allHits;
+            allHits.reserve(totalHits);
+            std::merge(leftHits.begin(), leftHits.end(),
+                       rightHits.begin(), rightHits.end(),
+                       std::back_inserter(allHits),
+                       [](CoverageCalculator& c1, CoverageCalculator& c2) -> bool {
+                        return c1.targetID < c2.targetID;
+                       });
+            double totProb{0.0};
+            for (auto& h : allHits) {
+                boost::hash_combine(txpIDsHash, h.targetID);
+                txpIDs.push_back(h.targetID);
+                double refLen =  std::max(1.0, static_cast<double>(transcripts[h.targetID].RefLength));
+                double startProb = 1.0 / refLen;
+                auxProbs.push_back(startProb);
+                totProb += startProb;
+            }
+            if (totProb > 0.0) {
+                double norm = 1.0 / totProb;
+                for (auto& p : auxProbs) { p *= norm; }
+
+                TranscriptGroup tg(txpIDs, txpIDsHash);
+                eqBuilder.addGroup(std::move(tg), auxProbs);
+            } else {
+                salmonOpts.jointLog->warn("Unexpected empty hit group [orphaned]");
+            }
+            */
+        }
+    } else { // Not an orphan
+        for (auto jhp : jointHits) {
+            auto& jointHitPtr = jhp;
+            auto transcriptID = jhp.transcriptID;
+            Transcript& t = transcripts[transcriptID];
+            auto& leftHitList = leftHits[jhp.leftIndex];
+            leftHitList.computeBestChain(t, frag.first.seq);
+            if (leftHitList.bestHitScore >= cutoffLeft) {
+                auto& rightHitList = rightHits[jhp.rightIndex];
+
+                rightHitList.computeBestChain(t, frag.second.seq);
+                if (rightHitList.bestHitScore < cutoffRight) { continue; }
+
+                auto end1Start = leftHitList.bestHitPos;
+                auto end2Start = rightHitList.bestHitPos;
+
+                double score = (leftHitList.bestHitScore + rightHitList.bestHitScore) * 0.5;
+                if (score < fOpt * bestScore) { continue; }
+
+                if (score > bestScore) {
+                    bestScore = score;
+                }
+
+                uint32_t fragLength = std::abs(static_cast<int32_t>(end1Start) -
+                                               static_cast<int32_t>(end2Start)) + rightReadLength;
+
+                bool end1IsForward = leftHitList.isForward();
+                bool end2IsForward = rightHitList.isForward();
+
+                uint32_t end1Pos = (end1IsForward) ? leftHitList.bestHitPos : leftHitList.bestHitPos + leftReadLength;
+                uint32_t end2Pos = (end2IsForward) ? rightHitList.bestHitPos : rightHitList.bestHitPos + rightReadLength;
+        		bool canDovetail = false;
+                auto fmt = salmon::utils::hitType(end1Pos, end1IsForward, leftReadLength, end2Pos, end2IsForward, rightReadLength, canDovetail);
+
+                if (readHits == 0) {
+                    firstTranscriptID = transcriptID;
+                } else if (hitList.isUniquelyMapped() and transcriptID != firstTranscriptID) {
+                     hitList.isUniquelyMapped() = false;
+                }
+
+                int32_t minHitPos = std::min(end1Pos, end2Pos);
+                if (transcriptID  < lastTranscriptId) {
+                    sortedByTranscript = false;
+                }
+                // ANCHOR TEST
+                t.setAnchorFragment();
+                alnList.emplace_back(transcriptID, fmt, score, minHitPos, fragLength);
+                alnList.back().fwd = end1IsForward;
+                alnList.back().mateIsFwd = end2IsForward;
+                alnList.back().mateStatus = rapmap::utils::MateStatus::PAIRED_END_PAIRED;
+                ++readHits;
+                ++hitListCount;
+            }
+        } // end for jointHits
+        if (alnList.size() > 0) {
+            auto newEnd = std::stable_partition(alnList.begin(), alnList.end(),
+                           [bestScore, fOpt](SMEMAlignment& aln) -> bool {
+                                return aln.score() >= fOpt * bestScore;
+                           });
+            alnList.resize(std::distance(alnList.begin(), newEnd));
+            if (!sortedByTranscript) {
+                std::sort(alnList.begin(), alnList.end(),
+                          [](const SMEMAlignment& x, const SMEMAlignment& y) -> bool {
+                           return x.transcriptID() < y.transcriptID();
+                          });
+            }
+        } else {
+            // If we didn't have any *significant* hits --- add any *trivial* joint hits
+            return;
+            /*
+            std::vector<uint32_t> txpIDs;
+            txpIDs.reserve(jointHits.size());
+            std::vector<double> auxProbs;
+            auxProbs.reserve(jointHits.size());
+
+            size_t txpIDsHash{0};
+            double totProb{0.0};
+            for (auto& h : jointHits) {
+                boost::hash_combine(txpIDsHash, h.transcriptID);
+                txpIDs.push_back(h.transcriptID);
+                double refLen =  std::max(1.0, static_cast<double>(transcripts[h.transcriptID].RefLength));
+                double startProb = 1.0 / refLen;
+                auxProbs.push_back(startProb);
+                totProb += startProb;
+            }
+            if (totProb > 0.0) {
+            double norm = 1.0 / totProb;
+            for (auto& p : auxProbs) { p *= norm; }
+
+            TranscriptGroup tg(txpIDs, txpIDsHash);
+            eqBuilder.addGroup(std::move(tg), auxProbs);
+            } else {
+                salmonOpts.jointLog->warn("Unexpected empty hit group [paired]");
+            }
+            */
+        }
+
+    } // end else
+}
+
+/**
+  *   Get hits for single-end fragment
+  *
+  *
+  */
+template <typename CoverageCalculator>
+inline void getHitsForFragment(jellyfish::header_sequence_qual& frag,
+                        SalmonIndex* sidx,
+                        smem_i *itr,
+                        const bwtintv_v *a,
+                        smem_aux_t* auxHits,
+                        mem_opt_t* memOptions,
+                        ReadExperiment& readExp,
+                        const SalmonOpts& salmonOpts,
+                        double coverageThresh,
+                        uint64_t& upperBoundHits,
+                        AlignmentGroup<SMEMAlignment>& hitList,
+                        uint64_t& hitListCount,
+                        std::vector<Transcript>& transcripts) {
+
+    uint64_t leftHitCount{0};
+
+    //std::unordered_map<uint64_t, CoverageCalculator> hits;
+    std::vector<CoverageCalculator> hits;
+
+    auto& eqBuilder = readExp.equivalenceClassBuilder();
+
+    uint32_t readLength{0};
+
+    //---------- get hits ----------------------//
+    {
+        std::string readStr   = frag.seq;
+        uint32_t readLen      = frag.seq.size();
+
+        readLength = readLen;
+
+        for (int p = 0; p < readLen; ++p) {
+            readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
+        }
+
+        char* readPtr = const_cast<char*>(readStr.c_str());
+
+        collectHitsForRead(sidx, a, auxHits,
+                            memOptions,
+                            salmonOpts,
+                            reinterpret_cast<const uint8_t*>(readStr.c_str()),
+                            readLen,
+                            hits);
+
+    }
+
+    upperBoundHits += (hits.size() > 0) ? 1 : 0;
+
+    int32_t lastTranscriptId = std::numeric_limits<int32_t>::min();
+    bool sortedByTranscript{true};
+    double fOpt{0.95};
+    double bestScore = -std::numeric_limits<double>::max();
+
+    size_t readHits{0};
+    auto& alnList = hitList.alignments();
+    hitList.isUniquelyMapped() = true;
+    alnList.clear();
+
+    uint32_t firstTranscriptID = std::numeric_limits<uint32_t>::max();
+    double cutoff{ coverageThresh };//* readLength};
+    for (auto& tHitList : hits) {
+        // Prior
+        // auto hitID = tHitList.first;
+        // auto& covVec = tHitList.second;
+        auto hitID = tHitList.targetID;
+        auto& covVec = tHitList;
+
+        // Coverage score
+        Transcript& t = transcripts[hitID];
+        covVec.computeBestChain(t, frag.seq);
+        double score = covVec.bestHitScore;
+        if (score >= fOpt * bestScore and covVec.bestHitScore >= cutoff) {
+
+            bool isForward = covVec.isForward();
+            if (score < fOpt * bestScore) { continue; }
+
+        	if (score > bestScore) { bestScore = score; }
+
+            auto hitPos = covVec.bestHitPos;
+            auto fmt = salmon::utils::hitType(hitPos, isForward);
+
+            if (leftHitCount == 0) {
+                firstTranscriptID = hitID;
+            } else if (hitList.isUniquelyMapped() and hitID != firstTranscriptID) {
+                hitList.isUniquelyMapped() = false;
+            }
+
+            auto transcriptID = hitID;
+
+            if (transcriptID  < lastTranscriptId) {
+                sortedByTranscript = false;
+            }
+
+            alnList.emplace_back(transcriptID, fmt, score, hitPos);
+            alnList.back().fwd = isForward;
+            alnList.back().mateStatus = rapmap::utils::MateStatus::SINGLE_END;
+            readHits += score;
+            ++hitListCount;
+            ++leftHitCount;
+        }
+    }
+    if (alnList.size() > 0) {
+        auto newEnd = std::stable_partition(alnList.begin(), alnList.end(),
+                [bestScore, fOpt](SMEMAlignment& aln) -> bool {
+                return aln.score() >= fOpt * bestScore;
+                });
+        alnList.resize(std::distance(alnList.begin(), newEnd));
+        if (!sortedByTranscript) {
+            std::sort(alnList.begin(), alnList.end(),
+                    [](const SMEMAlignment& x, const SMEMAlignment& y) -> bool {
+                     return x.transcriptID() < y.transcriptID();
+                    });
+        }
+    }
+    else {
+        // If we didn't have any *significant* hits --- add any *trivial* joint hits
+        return;
+        /*
+        std::vector<uint32_t> txpIDs;
+        txpIDs.reserve(hits.size());
+        double uniProb = 1.0 / hits.size();
+        std::vector<double> auxProbs(hits.size(), uniProb);
+
+        size_t txpIDsHash{0};
+        for (auto& h : hits) {
+            boost::hash_combine(txpIDsHash, h.targetID);
+            txpIDs.push_back(h.targetID);
+        }
+
+        TranscriptGroup tg(txpIDs, txpIDsHash);
+        eqBuilder.addGroup(std::move(tg), auxProbs);
+        */
+    }
+
+
+}
+
+// To use the parser in the following, we get "jobs" until none is
+// available. A job behaves like a pointer to the type
+// jellyfish::sequence_list (see whole_sequence_parser.hpp).
+template <typename ParserT, typename CoverageCalculator>
+void processReadsMEM(ParserT* parser,
+               ReadExperiment& readExp,
+               ReadLibrary& rl,
+               AlnGroupVec<QuasiAlignment>& structureVec,
+               std::atomic<uint64_t>& numObservedFragments,
+               std::atomic<uint64_t>& numAssignedFragments,
+               std::atomic<uint64_t>& validHits,
+               std::atomic<uint64_t>& upperBoundHits,
+               SalmonIndex* sidx,
+               std::vector<Transcript>& transcripts,
+               ForgettingMassCalculator& fmCalc,
+               ClusterForest& clusterForest,
+               FragmentLengthDistribution& fragLengthDist,
+               mem_opt_t* memOptions,
+               const SalmonOpts& salmonOpts,
+               double coverageThresh,
+	           std::mutex& iomutex,
+               bool initialRound,
+               std::atomic<bool>& burnedIn,
+               volatile bool& writeToCache) {
+    	// ERROR
+	salmonOpts.jointLog->error("Quasimapping cannot be used with the FMD index --- please report this bug on GitHub");
+	std::exit(1);
+}
+
+template <typename ParserT, typename CoverageCalculator>
+void processReadsMEM(ParserT* parser,
+               ReadExperiment& readExp,
+               ReadLibrary& rl,
+               AlnGroupVec<SMEMAlignment>& structureVec,
+               std::atomic<uint64_t>& numObservedFragments,
+               std::atomic<uint64_t>& numAssignedFragments,
+               std::atomic<uint64_t>& validHits,
+               std::atomic<uint64_t>& upperBoundHits,
+               SalmonIndex* sidx,
+               std::vector<Transcript>& transcripts,
+               ForgettingMassCalculator& fmCalc,
+               ClusterForest& clusterForest,
+               FragmentLengthDistribution& fragLengthDist,
+               mem_opt_t* memOptions,
+               const SalmonOpts& salmonOpts,
+               double coverageThresh,
+	           std::mutex& iomutex,
+               bool initialRound,
+               std::atomic<bool>& burnedIn,
+               volatile bool& writeToCache) {
+  uint64_t count_fwd = 0, count_bwd = 0;
+  // Seed with a real random value, if available
+  std::random_device rd;
+
+  // Create a random uniform distribution
+  std::default_random_engine eng(rd());
+
+  uint64_t prevObservedFrags{1};
+  uint64_t leftHitCount{0};
+  uint64_t hitListCount{0};
+
+  // Super-MEM iterator
+  smem_i *itr = smem_itr_init(sidx->bwaIndex()->bwt);
+  const bwtintv_v *a = nullptr;
+  smem_aux_t* auxHits = smem_aux_init();
+
+  auto expectedLibType = rl.format();
+
+  uint64_t firstTimestepOfRound = fmCalc.getCurrentTimestep();
+
+  size_t locRead{0};
+  uint64_t localUpperBoundHits{0};
+  size_t rangeSize{0};
+
+  while(true) {
+    typename ParserT::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
+    if(j.is_empty()) break;           // If got nothing, quit
+
+    rangeSize = j->nb_filled;
+    if (rangeSize > structureVec.size()) {
+        salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} --- this shouldn't happen.\n"
+                                   "Please report this bug on GitHub", rangeSize, structureVec.size());
+        std::exit(1);
+    }
+
+    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read in this batch
+        localUpperBoundHits = 0;
+
+        auto& hitList = structureVec[i];
+        getHitsForFragment<CoverageCalculator>(j->data[i], sidx, itr, a,
+                                               auxHits,
+                                               memOptions,
+                                               readExp,
+                                               salmonOpts,
+                                               coverageThresh,
+                                               localUpperBoundHits,
+                                               hitList, hitListCount,
+                                               transcripts);
+        if (initialRound) {
+            upperBoundHits += localUpperBoundHits;
+        }
+
+        // If the read mapped to > maxReadOccs places, discard it
+        if (hitList.size() > salmonOpts.maxReadOccs ) { hitList.alignments().clear(); }
+        validHits += hitList.size();
+        locRead++;
+        ++numObservedFragments;
+        if (numObservedFragments % 50000 == 0) {
+    	    iomutex.lock();
+            const char RESET_COLOR[] = "\x1b[0m";
+            char green[] = "\x1b[30m";
+            green[3] = '0' + static_cast<char>(fmt::GREEN);
+            char red[] = "\x1b[30m";
+            red[3] = '0' + static_cast<char>(fmt::RED);
+            if (initialRound) {
+                fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n", green, red, numObservedFragments, green, RESET_COLOR);
+                fmt::print(stderr, "hits: {}; hits per frag:  {}",
+                           validHits,
+                           validHits / static_cast<float>(prevObservedFrags));
+            } else {
+                fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red, numObservedFragments, green, RESET_COLOR);
+            }
+    	    iomutex.unlock();
+        }
+
+
+    } // end for i < j->nb_filled
+
+    prevObservedFrags = numObservedFragments;
+    AlnGroupVecRange<SMEMAlignment> hitLists = boost::make_iterator_range(structureVec.begin(), structureVec.begin() + rangeSize);
+    processMiniBatch<SMEMAlignment>(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
+                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
+  }
+  smem_aux_destroy(auxHits);
+  smem_itr_destroy(itr);
+}
+
+
+
+#endif // LIGHTWEIGHT_ALIGNMENT_DEFS_HPP
diff --git a/include/MultinomialSampler.hpp b/include/MultinomialSampler.hpp
index 9b3d91c..dc3a72c 100644
--- a/include/MultinomialSampler.hpp
+++ b/include/MultinomialSampler.hpp
@@ -3,50 +3,70 @@
 
 #include <random>
 #include <vector>
+#include <algorithm>
 
 class MultinomialSampler {
- public:
-     MultinomialSampler(std::random_device& rd) :
-         gen_(rd()), u01_(0.0, 1.0) {}
-
-     void operator()(
-             std::vector<int>::iterator sampleBegin,
-             uint32_t n,
-             uint32_t k,
-             std::vector<double>::iterator probsBegin,
-	     bool clearCounts = true) {
-         int i, j;
-         double u, sum;
-         std::vector<double> z(k+1, 0.0);
-
-	 if (clearCounts) {
-		 for (uint32_t i = 0; i < k; i++) {
-		     *(sampleBegin + i) = 0;
-		 }
-	 }
-
-         z[0] = 0;
-         for (i = 1; i <= k; i++) {
-             sum = 0;
-             for (j = 0; j < i; j++) sum+= *(probsBegin + j);
-             z[i] = sum;
-         }
-
-         for (j = 0; j < n; j++) {
-             u = u01_(gen_);
-
-             for (i = 0; i < k; i++) {
-                 if ((z[i] < u) && (u <= z[i+1])) {
-                     (*(sampleBegin + i))++;
-                 }
-             }
-         }
-     }
-
-
-private:
-   std::mt19937 gen_;
-   std::uniform_real_distribution<> u01_;
+    public:
+        MultinomialSampler(std::random_device& rd) :
+            gen_(rd()), u01_(0.0, 1.0) {}
+
+        void operator()(
+                std::vector<uint64_t>::iterator sampleBegin,
+                uint32_t n,
+                uint32_t k,
+                std::vector<double>::iterator probsBegin,
+                bool clearCounts = true) {
+            uint32_t i, j;
+            double u, sum;
+            std::vector<double> z(k+1, 0.0);
+
+            if (clearCounts) {
+                for (i = 0; i < k; i++) {
+                    *(sampleBegin + i) = 0;
+                }
+            }
+
+            z[0] = 0;
+            for (i = 1; i <= k; i++) {
+                sum = 0;
+                for (j = 0; j < i; j++) sum+= *(probsBegin + j);
+                z[i] = sum;
+            }
+
+            // If k is small (<= 100), linear search is usually faster
+            if (k <= 100) {
+                for (j = 0; j < n; j++) {
+                    u = u01_(gen_);
+
+                    for (i = 0; i < k; i++) {
+                        if ((z[i] < u) && (u <= z[i+1])) {
+                            (*(sampleBegin + i))++;
+                            break;
+                        }
+                    }
+                }
+            } else { // k is large enough to warrant binary search
+                for (j = 0; j < n; j++) {
+                    u = u01_(gen_);
+
+                    // Find the offset of the element to increment
+                    auto it = std::lower_bound(z.begin(), z.end()-1, u);
+                    size_t offset = static_cast<size_t>(
+                            std::distance(z.begin(), it));
+
+                    if (*it > u and offset > 0) {
+                        offset -= 1;
+                    }
+
+                    (*(sampleBegin + offset))++;
+                }
+            }
+        }
+
+
+    private:
+        std::mt19937 gen_;
+        std::uniform_real_distribution<> u01_;
 };
 
 #endif //_MULTINOMIAL_SAMPLER_HPP_
diff --git a/include/PCA.hpp b/include/PCA.hpp
index 7d7f42d..9bdef3f 100644
--- a/include/PCA.hpp
+++ b/include/PCA.hpp
@@ -27,8 +27,9 @@ class PCA {
             Eigen::VectorXd means(dat_.cols());
             means = dat_.colwise().mean();
             Eigen::VectorXd stdDev(dat_.cols());
-            for (size_t i = 0; i < dat_.cols(); ++i) {
-                for (size_t j = 0; j < dat_.rows(); ++j) {
+            decltype(dat_.cols()) i, j;
+            for (i = 0; i < dat_.cols(); ++i) {
+                for (j = 0; j < dat_.rows(); ++j) {
                     dat_(j,i) -= means(i);
                 }
                 auto x = dat_.col(i).dot(dat_.col(i));
@@ -45,7 +46,8 @@ class PCA {
             if (droppedCols.size() > 0) {
                 Eigen::MatrixXd tmpDat(dat_.rows(), dat_.cols() - droppedCols.size());
                 size_t curCol{0};
-                for (size_t i = 0; i < dat_.cols(); ++i) {
+                decltype(dat_.cols()) i;
+                for (i = 0; i < dat_.cols(); ++i) {
                     if (droppedCols.find(i) == droppedCols.end()) {
                         tmpDat.col(curCol) = dat_.col(i);
                         ++curCol;
diff --git a/include/PairSequenceParser.hpp b/include/PairSequenceParser.hpp
index e0d1f23..6b0f4dd 100644
--- a/include/PairSequenceParser.hpp
+++ b/include/PairSequenceParser.hpp
@@ -130,9 +130,9 @@ protected:
     if(type1 == DONE_TYPE || type2 == DONE_TYPE)
       return open_next_files(st);
     if(type1 != type2)
-      eraise(std::runtime_error) << "Paired files are of different format";
+      throw std::runtime_error("Paired files are of different format");
     if(type1 == ERROR_TYPE || type2 == ERROR_TYPE)
-      eraise(std::runtime_error) << "Unsupported format";
+      throw std::runtime_error("Unsupported format");
     st.type = type1;
   }
 
@@ -165,7 +165,7 @@ protected:
       hsq.seq.append(tmp);             // two lines avoiding copying
     }
     if(!is.good())
-      eraise(std::runtime_error) << "Truncated fastq file";
+      throw std::runtime_error("Truncated fastq file");
     is.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
     hsq.qual.clear();
     while(hsq.qual.size() < hsq.seq.size() && is.good()) {
@@ -173,9 +173,9 @@ protected:
       hsq.qual.append(tmp);
     }
     if(hsq.qual.size() != hsq.seq.size())
-      eraise(std::runtime_error) << "Invalid fastq file: wrong number of quals";
+      throw std::runtime_error("Invalid fastq file: wrong number of quals");
     if(is.peek() != EOF && is.peek() != '@')
-      eraise(std::runtime_error) << "Invalid fastq file: header missing";
+      throw std::runtime_error("Invalid fastq file: header missing");
 
   }
 
diff --git a/include/ReadExperiment.hpp b/include/ReadExperiment.hpp
index 43d5f28..21bc7c6 100644
--- a/include/ReadExperiment.hpp
+++ b/include/ReadExperiment.hpp
@@ -16,7 +16,11 @@ extern "C" {
 #include "FragmentStartPositionDistribution.hpp"
 #include "SequenceBiasModel.hpp"
 #include "SalmonOpts.hpp"
+#include "SalmonIndex.hpp"
 #include "EquivalenceClassBuilder.hpp"
+#include "SpinLock.hpp" // RapMap's with try_lock
+#include "UtilityFunctions.hpp"
+#include "ReadKmerDist.hpp"
 
 // Logger includes
 #include "spdlog/spdlog.h"
@@ -30,6 +34,7 @@ extern "C" {
 #include <memory>
 #include <fstream>
 
+
 /**
   *  This class represents a library of alignments used to quantify
   *  a set of target transcripts.  The AlignmentLibrary contains info
@@ -44,14 +49,15 @@ class ReadExperiment {
     ReadExperiment(std::vector<ReadLibrary>& readLibraries,
                    //const boost::filesystem::path& transcriptFile,
                    const boost::filesystem::path& indexDirectory,
-		   SalmonOpts& sopt) :
+		           SalmonOpts& sopt) :
         readLibraries_(readLibraries),
         //transcriptFile_(transcriptFile),
         transcripts_(std::vector<Transcript>()),
         totalAssignedFragments_(0),
         fragStartDists_(5),
         seqBiasModel_(1.0),
-	eqBuilder_(sopt.jointLog) {
+	eqBuilder_(sopt.jointLog),
+        expectedBias_(constExprPow(4, readBias_.getK()), 1.0) {
             namespace bfs = boost::filesystem;
 
             // Make sure the read libraries are valid.
@@ -77,93 +83,40 @@ class ReadExperiment {
             }
             */
 
-            // ====== Load the transcripts from file
-            { // mem-based
-                bfs::path indexPath = indexDirectory / "bwaidx";
-                if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_BWT|BWA_IDX_BNS|BWA_IDX_PAC)) == 0) {
-                    fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
-                    fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
-                    std::exit(1);
-                }
+            // ==== Figure out the index type
+            boost::filesystem::path versionPath = indexDirectory / "versionInfo.json";
+            SalmonIndexVersionInfo versionInfo;
+            versionInfo.load(versionPath);
+            if (versionInfo.indexVersion() == 0) {
+                fmt::MemoryWriter infostr;
+                infostr << "Error: The index version file " << versionPath.string()
+                    << " doesn't seem to exist.  Please try re-building the salmon "
+                    "index.";
+                throw std::invalid_argument(infostr.str());
             }
+            // Check index version compatibility here
+            auto indexType = versionInfo.indexType();
+            // ==== Figure out the index type
 
-            size_t numRecords = idx_->bns->n_seqs;
-            std::vector<Transcript> transcripts_tmp;
-
-            fmt::print(stderr, "Index contained {} targets\n", numRecords);
-            //transcripts_.resize(numRecords);
-            for (auto i : boost::irange(size_t(0), numRecords)) {
-                uint32_t id = i;
-                char* name = idx_->bns->anns[i].name;
-                uint32_t len = idx_->bns->anns[i].len;
-                // copy over the length, then we're done.
-                transcripts_tmp.emplace_back(id, name, len);
-            }
+            salmonIndex_.reset(new SalmonIndex(sopt.jointLog, indexType));
+            salmonIndex_->load(indexDirectory);
 
-            std::sort(transcripts_tmp.begin(), transcripts_tmp.end(),
-                    [](const Transcript& t1, const Transcript& t2) -> bool {
-                    return t1.id < t2.id;
-                    });
-            double alpha = 0.005;
-            char nucTab[256];
-            nucTab[0] = 'A'; nucTab[1] = 'C'; nucTab[2] = 'G'; nucTab[3] = 'T';
-            for (size_t i = 4; i < 256; ++i) { nucTab[i] = 'N'; }
-
-            // Load the transcript sequence from file
-            for (auto& t : transcripts_tmp) {
-                transcripts_.emplace_back(t.id, t.RefName.c_str(), t.RefLength, alpha);
-                /* from BWA */
-                uint8_t* rseq = nullptr;
-                int64_t tstart, tend, compLen, l_pac = idx_->bns->l_pac;
-                tstart  = idx_->bns->anns[t.id].offset;
-                tend = tstart + t.RefLength;
-                rseq = bns_get_seq(l_pac, idx_->pac, tstart, tend, &compLen);
-                if (compLen != t.RefLength) {
-                    fmt::print(stderr,
-                               "For transcript {}, stored length ({}) != computed length ({}) --- index may be corrupt. exiting\n",
-                               t.RefName, compLen, t.RefLength);
-                    std::exit(1);
-                }
-                std::string seq(t.RefLength, ' ');
-                if (rseq != 0) {
-                    for (size_t i = 0; i < compLen; ++i) { seq[i] = nucTab[rseq[i]]; }
-                }
-                auto& txp = transcripts_.back();
-                txp.Sequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), t.RefLength);
-                // Length classes taken from
-                // ======
-                // Roberts, Adam, et al.
-                // "Improving RNA-Seq expression estimates by correcting for fragment bias."
-                // Genome Biol 12.3 (2011): R22.
-                // ======
-                // perhaps, define these in a more data-driven way
-                if (t.RefLength <= 1334) {
-                    txp.lengthClassIndex(0);
-                } else if (t.RefLength <= 2104) {
-                    txp.lengthClassIndex(0);
-                } else if (t.RefLength <= 2988) {
-                    txp.lengthClassIndex(0);
-                } else if (t.RefLength <= 4389) {
-                    txp.lengthClassIndex(0);
+	    // Now we'll have either an FMD-based index or a QUASI index
+	    // dispatch on the correct type.
+
+	    switch (salmonIndex_->indexType()) {
+            case SalmonIndexType::QUASI:
+                if (salmonIndex_->is64BitQuasi()) {
+                  loadTranscriptsFromQuasi(salmonIndex_->quasiIndex64());
                 } else {
-                    txp.lengthClassIndex(0);
+                  loadTranscriptsFromQuasi(salmonIndex_->quasiIndex32());
                 }
-                /*
-                std::cerr << "TS = " << t.RefName << " : \n";
-                std::cerr << seq << "\n VS \n";
-                for (size_t i = 0; i < t.RefLength; ++i) {
-                    std::cerr << transcripts_.back().charBaseAt(i);
-                }
-                std::cerr << "\n\n";
-                */
-                free(rseq);
-                /* end BWA code */
-            }
-            // Since we have the de-coded reference sequences, we no longer need
-            // the encoded sequences, so free them.
-            free(idx_->pac); idx_->pac = nullptr;
-            transcripts_tmp.clear();
-            // ====== Done loading the transcripts from file
+                break;
+            case SalmonIndexType::FMD:
+                loadTranscriptsFromFMD();
+                break;
+	    }
+
 
             // Create the cluster forest for this set of transcripts
             clusters_.reset(new ClusterForest(transcripts_.size(), transcripts_));
@@ -174,9 +127,38 @@ class ReadExperiment {
     }
 
     std::vector<Transcript>& transcripts() { return transcripts_; }
+    const std::vector<Transcript>& transcripts() const { return transcripts_; }
+
+    void updateTranscriptLengthsAtomic(std::atomic<bool>& done) {
+        if (sl_.try_lock()) {
+            if (!done) {
+                auto& fld = *(fragLengthDist_.get());
+
+                std::vector<double> logPMF;
+                size_t minVal;
+                size_t maxVal;
+                double logFLDMean = fld.mean();
+                fld.dumpPMF(logPMF, minVal, maxVal);
+                double sum = salmon::math::LOG_0;
+                for (auto v : logPMF) {
+                    sum = salmon::math::logAdd(sum, v);
+                }
+                for (auto& v : logPMF) {
+                    v -= sum;
+                }
+                // Update the effective length of *every* transcript
+                for( auto& t : transcripts_ ) {
+                    t.updateEffectiveLength(logPMF, logFLDMean, minVal, maxVal);
+                }
+                // then declare that we are done
+                done = true;
+                sl_.unlock();
+            }
+        }
+    }
 
     uint64_t numAssignedFragments() { return numAssignedFragments_; }
-    uint64_t numMappedReads() { return numAssignedFragments_; }
+    uint64_t numMappedFragments() const { return numAssignedFragments_; }
 
     uint64_t upperBoundHits() { return upperBoundHits_; }
     void setUpperBoundHits(uint64_t ubh) { upperBoundHits_ = ubh; }
@@ -185,6 +167,10 @@ class ReadExperiment {
 
     void setNumObservedFragments(uint64_t numObserved) { numObservedFragments_ = numObserved; }
 
+    uint64_t numObservedFragments() const {
+        return numObservedFragments_;
+    }
+
     double mappingRate() {
         if (quantificationPasses_ > 0) {
             return static_cast<double>(numAssignedFragsInFirstPass_) / numObservedFragsInFirstPass_;
@@ -193,11 +179,153 @@ class ReadExperiment {
         }
     }
 
+    SalmonIndex* getIndex() { return salmonIndex_.get(); }
+
+    template <typename QuasiIndexT>
+    void loadTranscriptsFromQuasi(QuasiIndexT* idx_) {
+	    size_t numRecords = idx_->txpNames.size();
+
+	    fmt::print(stderr, "Index contained {} targets\n", numRecords);
+	    //transcripts_.resize(numRecords);
+	    double alpha = 0.005;
+	    for (auto i : boost::irange(size_t(0), numRecords)) {
+		    uint32_t id = i;
+		    const char* name = idx_->txpNames[i].c_str();
+		    uint32_t len = idx_->txpLens[i];
+		    // copy over the length, then we're done.
+		    transcripts_.emplace_back(id, name, len, alpha);
+		    auto& txp = transcripts_.back();
+		    // The transcript sequence
+		    //auto txpSeq = idx_->seq.substr(idx_->txpOffsets[i], len);
+
+		    // Set the transcript sequence
+		    txp.Sequence = idx_->seq.c_str() + idx_->txpOffsets[i];
+		    // Length classes taken from
+		    // ======
+		    // Roberts, Adam, et al.
+		    // "Improving RNA-Seq expression estimates by correcting for fragment bias."
+		    // Genome Biol 12.3 (2011): R22.
+		    // ======
+		    // perhaps, define these in a more data-driven way
+        if (txp.RefLength <= 1334) {
+          txp.lengthClassIndex(0);
+        } else if (txp.RefLength <= 2104) {
+          txp.lengthClassIndex(0);
+        } else if (txp.RefLength <= 2988) {
+          txp.lengthClassIndex(0);
+        } else if (txp.RefLength <= 4389) {
+          txp.lengthClassIndex(0);
+        } else {
+          txp.lengthClassIndex(0);
+        }
+      }
+	    // ====== Done loading the transcripts from file
+    }
+
+    void loadTranscriptsFromFMD() {
+	    bwaidx_t* idx_ = salmonIndex_->bwaIndex();
+	    size_t numRecords = idx_->bns->n_seqs;
+	    std::vector<Transcript> transcripts_tmp;
+        //transcripts_tmp.reserve(numRecords);
+        //transcripts_.reserve(numRecords);
+
+	    fmt::print(stderr, "Index contained {} targets\n", numRecords);
+	    //transcripts_.resize(numRecords);
+	    for (auto i : boost::irange(size_t(0), numRecords)) {
+		    uint32_t id = i;
+		    char* name = idx_->bns->anns[i].name;
+		    uint32_t len = idx_->bns->anns[i].len;
+		    // copy over the length, then we're done.
+		    transcripts_tmp.emplace_back(id, name, len);
+	    }
+
+	    std::sort(transcripts_tmp.begin(), transcripts_tmp.end(),
+			    [](const Transcript& t1, const Transcript& t2) -> bool {
+			    return t1.id < t2.id;
+			    });
+
+
+	    double alpha = 0.005;
+	    char nucTab[256];
+	    nucTab[0] = 'A'; nucTab[1] = 'C'; nucTab[2] = 'G'; nucTab[3] = 'T';
+	    for (size_t i = 4; i < 256; ++i) { nucTab[i] = 'N'; }
+
+        size_t tnum = 0;
+	    // Load the transcript sequence from file
+	    for (auto& t : transcripts_tmp) {
+		    transcripts_.emplace_back(t.id, t.RefName.c_str(), t.RefLength, alpha);
+		    /* from BWA */
+		    uint8_t* rseq = nullptr;
+		    int64_t tstart, tend, compLen, l_pac = idx_->bns->l_pac;
+		    tstart  = idx_->bns->anns[t.id].offset;
+		    tend = tstart + t.RefLength;
+		    rseq = bns_get_seq(l_pac, idx_->pac, tstart, tend, &compLen);
+		    if (compLen != t.RefLength) {
+			    fmt::print(stderr,
+					    "For transcript {}, stored length ({}) != computed length ({}) --- index may be corrupt. exiting\n",
+					    t.RefName, compLen, t.RefLength);
+			    std::exit(1);
+		    }
+		    std::string seq(t.RefLength, ' ');
+		    if (rseq != 0) {
+			    for (int64_t i = 0; i < compLen; ++i) { seq[i] = nucTab[rseq[i]]; }
+		    }
+
+            auto& txp = transcripts_.back();
+
+            // allocate space for the new copy
+            char* seqCopy = new char[seq.length()+1];
+            std::strcpy(seqCopy, seq.c_str());
+            txp.Sequence = seqCopy;
+            txp.freeSeqOnDestruct = false;
+
+		    txp.SAMSequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), t.RefLength);
+		    // Length classes taken from
+		    // ======
+		    // Roberts, Adam, et al.
+		    // "Improving RNA-Seq expression estimates by correcting for fragment bias."
+		    // Genome Biol 12.3 (2011): R22.
+		    // ======
+		    // perhaps, define these in a more data-driven way
+		    if (t.RefLength <= 1334) {
+			    txp.lengthClassIndex(0);
+		    } else if (t.RefLength <= 2104) {
+			    txp.lengthClassIndex(0);
+		    } else if (t.RefLength <= 2988) {
+			    txp.lengthClassIndex(0);
+		    } else if (t.RefLength <= 4389) {
+			    txp.lengthClassIndex(0);
+		    } else {
+			    txp.lengthClassIndex(0);
+		    }
+		    /*
+		       std::cerr << "TS = " << t.RefName << " : \n";
+		       std::cerr << seq << "\n VS \n";
+		       for (size_t i = 0; i < t.RefLength; ++i) {
+		       std::cerr << transcripts_.back().charBaseAt(i);
+		       }
+		       std::cerr << "\n\n";
+		       */
+		    free(rseq);
+		    /* end BWA code */
+            ++tnum;
+	    }
+
+	    // Since we have the de-coded reference sequences, we no longer need
+	    // the encoded sequences, so free them.
+	    /** TEST OPT **/
+	    // free(idx_->pac); idx_->pac = nullptr;
+	    /** END TEST OPT **/
+	    transcripts_tmp.clear();
+	    // ====== Done loading the transcripts from file
+    }
+
+
     template <typename CallbackT>
-    bool processReads(const uint32_t& numThreads, CallbackT& processReadLibrary) {
-        bool burnedIn = (totalAssignedFragments_ + numAssignedFragments_ > 5000000);
+    bool processReads(const uint32_t& numThreads, const SalmonOpts& sopt, CallbackT& processReadLibrary) {
+        std::atomic<bool> burnedIn{totalAssignedFragments_ + numAssignedFragments_ > sopt.numBurninFrags};
         for (auto& rl : readLibraries_) {
-            processReadLibrary(rl, idx_, transcripts_, clusterForest(),
+            processReadLibrary(rl, salmonIndex_.get(), transcripts_, clusterForest(),
                                *(fragLengthDist_.get()), numAssignedFragments_,
                                numThreads, burnedIn);
         }
@@ -206,7 +334,7 @@ class ReadExperiment {
 
     ~ReadExperiment() {
         // ---- Get rid of things we no longer need --------
-        bwa_idx_destroy(idx_);
+        // bwa_idx_destroy(idx_);
     }
 
     ClusterForest& clusterForest() { return *clusters_.get(); }
@@ -231,11 +359,11 @@ class ReadExperiment {
         return numObservedFragsInFirstPass_;
     }
 
-    double effectiveMappingRate() {
+    double effectiveMappingRate() const {
         return effectiveMappingRate_;
     }
 
-    void setEffetiveMappingRate(double emr) {
+    void setEffectiveMappingRate(double emr) {
         effectiveMappingRate_ = emr;
     }
 
@@ -403,7 +531,22 @@ class ReadExperiment {
     }
 
     std::vector<ReadLibrary>& readLibraries() { return readLibraries_; }
-    FragmentLengthDistribution* fragmentLengthDistribution() { return fragLengthDist_.get(); }
+    FragmentLengthDistribution* fragmentLengthDistribution() const { return fragLengthDist_.get(); }
+
+    void setExpectedBias(const std::vector<double>& expectedBiasIn) {
+        expectedBias_ = expectedBiasIn;
+    }
+
+    std::vector<double>& expectedBias() {
+        return expectedBias_;
+    }
+
+    const std::vector<double>& expectedBias() const {
+        return expectedBias_;
+    }
+
+    ReadKmerDist<6, std::atomic<uint32_t>>& readBias() { return readBias_; }
+    const ReadKmerDist<6, std::atomic<uint32_t>>& readBias() const { return readBias_; }
 
     private:
     /**
@@ -424,7 +567,8 @@ class ReadExperiment {
     /**
      * The index we've built on the set of transcripts.
      */
-    bwaidx_t *idx_{nullptr};
+    std::unique_ptr<SalmonIndex> salmonIndex_{nullptr};
+    //bwaidx_t *idx_{nullptr};
     /**
      * The cluster forest maintains the dynamic relationship
      * defined by transcripts and reads --- if two transcripts
@@ -451,8 +595,15 @@ class ReadExperiment {
     uint64_t numObservedFragsInFirstPass_{0};
     uint64_t upperBoundHits_{0};
     double effectiveMappingRate_{0.0};
+    SpinLock sl_;
     std::unique_ptr<FragmentLengthDistribution> fragLengthDist_;
     EquivalenceClassBuilder eqBuilder_;
+
+    /** Sequence specific bias things **/
+    // Since multiple threads can touch this dist, we
+    // need atomic counters.
+    ReadKmerDist<6, std::atomic<uint32_t>> readBias_;
+    std::vector<double> expectedBias_;
 };
 
 #endif // EXPERIMENT_HPP
diff --git a/include/ReadKmerDist.hpp b/include/ReadKmerDist.hpp
new file mode 100644
index 0000000..e4c900b
--- /dev/null
+++ b/include/ReadKmerDist.hpp
@@ -0,0 +1,76 @@
+#ifndef READ_KMER_DIST_HPP
+#define READ_KMER_DIST_HPP
+
+#include <fstream>
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <algorithm>
+#include <cstdint>
+#include "UtilityFunctions.hpp"
+#include "SalmonUtils.hpp"
+
+template <uint32_t K, typename CountT = uint32_t>
+class ReadKmerDist {
+  public:
+    std::array<CountT, constExprPow(4,K)> counts;
+
+    ReadKmerDist() {
+      // set a pseudo-count of 1
+      for (size_t i = 0; i < counts.size(); ++i) {
+	counts[i] = 1;
+      }
+    }
+
+    inline constexpr uint32_t getK() { return K; }
+
+    inline uint64_t totalCount() {
+      CountT c{0};
+      for (auto const& rc : counts) { c += rc; }
+      return c;
+    }
+
+    // update the k-mer context for the hit at position p.
+    // The underlying transcript is from [start, end)
+    inline bool update(const char* start, const char *p, const char *end,
+	salmon::utils::Direction dir) {
+      using salmon::utils::Direction;
+      int posBeforeHit = 2;
+      int posAfterHit = 4;
+      bool success{false};
+      switch (dir) {
+	case Direction::FORWARD :
+	  {
+	    // If we can fit the window before and after the read
+	    if ((p - start) >= posBeforeHit and
+		((p - posBeforeHit + K) < end) ) {
+	      p -= posBeforeHit;
+	      // If the read matches in the forward direction, we take
+	      // the RC sequence.
+	      auto idx = indexForKmer(p, K, Direction::REVERSE_COMPLEMENT);
+	      if (idx > counts.size()) { return false; }
+	      counts[idx]++;
+	      success = true;
+	    }
+	  }
+	  break;
+	case Direction::REVERSE_COMPLEMENT :
+	  {
+	    if ((p - start) >= posAfterHit and
+		((p - posAfterHit + K) < end) ) {
+	      p -= posAfterHit;
+	      auto idx = indexForKmer(p, K, Direction::FORWARD);
+	      if (idx > counts.size()) { return false; }
+	      counts[idx]++;
+	      success = true;
+	    }
+	  }
+	  break;
+	default:
+	  break;
+      }
+      return success;
+    }
+
+};
+#endif // READ_KMER_DIST_HPP 
diff --git a/include/ReadLibrary.hpp b/include/ReadLibrary.hpp
index b3a0070..834f40d 100644
--- a/include/ReadLibrary.hpp
+++ b/include/ReadLibrary.hpp
@@ -31,7 +31,7 @@ public:
         mateOneFilenames_(rl.mateOneFilenames_),
         mateTwoFilenames_(rl.mateTwoFilenames_),
         libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)) {
-            auto mc = LibraryFormat::maxLibTypeID() + 1;
+            size_t mc = LibraryFormat::maxLibTypeID() + 1;
             for (size_t i = 0; i < mc; ++i) { libTypeCounts_[i].store(rl.libTypeCounts_[i].load()); }
         }
 
@@ -44,7 +44,7 @@ public:
         mateOneFilenames_(std::move(rl.mateOneFilenames_)),
         mateTwoFilenames_(std::move(rl.mateTwoFilenames_)),
         libTypeCounts_(std::vector<std::atomic<uint64_t>>(LibraryFormat::maxLibTypeID() + 1)) {
-            auto mc = LibraryFormat::maxLibTypeID() + 1;
+            size_t mc = LibraryFormat::maxLibTypeID() + 1;
             for (size_t i = 0; i < mc; ++i) { libTypeCounts_[i].store(rl.libTypeCounts_[i].load()); }
         }
 
diff --git a/include/ReadPair.hpp b/include/ReadPair.hpp
index cf68db9..1a06f5c 100644
--- a/include/ReadPair.hpp
+++ b/include/ReadPair.hpp
@@ -5,8 +5,9 @@
 #include "SalmonMath.hpp"
 #include "LibraryFormat.hpp"
 #include "SalmonUtils.hpp"
+#include "RapMapUtils.hpp"
 
-#include "format.h"
+#include "spdlog/details/format.h"
 
 struct ReadPair {
     bam_seq_t* read1 = nullptr;
@@ -43,7 +44,6 @@ struct ReadPair {
         return *this;
     }
 
-
    ReadPair(ReadPair& other) = default;
 
    ReadPair& operator=(ReadPair& other) = default;
@@ -61,6 +61,27 @@ struct ReadPair {
     inline bool isPaired() const { return (orphanStatus == salmon::utils::OrphanStatus::Paired); }
     inline bool isLeftOrphan() const { return (orphanStatus == salmon::utils::OrphanStatus::LeftOrphan); }
     inline bool isRightOrphan() const { return (orphanStatus == salmon::utils::OrphanStatus::RightOrphan); }
+    inline bam_seq_t* get5PrimeRead() {
+        return (isPaired() or isLeftOrphan()) ? read1 : nullptr;
+    }
+
+    inline rapmap::utils::MateStatus mateStatus() const {
+        if (isPaired()) {
+            return rapmap::utils::MateStatus::PAIRED_END_PAIRED;
+        } else if (isLeftOrphan()) {
+            return rapmap::utils::MateStatus::PAIRED_END_LEFT;
+        } else if (isRightOrphan()) {
+            return rapmap::utils::MateStatus::PAIRED_END_RIGHT;
+        }
+
+        std::cerr << "ReadPair.hpp : mateStatus() --- should not get here ";
+        std::cerr << "this may be a bug.  Please report it\n";
+
+        return rapmap::utils::MateStatus::PAIRED_END_PAIRED;
+    }
+
+    inline int32_t pos() const { return left(); }
+    inline bool fwd() const { return !bam_strand(read1); }
 
     /**
       * returns 0 on success, -1 on failure.
diff --git a/include/SalmonConfig.hpp b/include/SalmonConfig.hpp
index 6fb94fc..62f0568 100644
--- a/include/SalmonConfig.hpp
+++ b/include/SalmonConfig.hpp
@@ -27,9 +27,10 @@
 
 namespace salmon {
 	constexpr char majorVersion[] = "0";
-	constexpr char minorVersion[] = "4";
-	constexpr char patchVersion[] = "2";
-	constexpr char version[] = "0.4.2";
+	constexpr char minorVersion[] = "6";
+	constexpr char patchVersion[] = "0";
+	constexpr char version[] = "0.6.0";
+    constexpr uint32_t indexVersion = 2;
 }
 
 #endif // SALMON_CONFIG_HPP
diff --git a/include/SalmonIndex.hpp b/include/SalmonIndex.hpp
new file mode 100644
index 0000000..7e65d64
--- /dev/null
+++ b/include/SalmonIndex.hpp
@@ -0,0 +1,321 @@
+#ifndef __SALMON_INDEX_HPP__
+#define __SALMON_INDEX_HPP__
+
+extern "C" {
+#include "bwa.h"
+#include "bwamem.h"
+#include "kvec.h"
+#include "utils.h"
+}
+
+#include <memory>
+
+#include <boost/filesystem.hpp>
+#include <boost/range/irange.hpp>
+
+#include "spdlog/spdlog.h"
+#include "cereal/archives/json.hpp"
+#include "cereal/types/vector.hpp"
+
+#include "RapMapSAIndex.hpp"
+#include "IndexHeader.hpp"
+#include "BWAUtils.hpp"
+#include "SalmonConfig.hpp"
+#include "SalmonIndexVersionInfo.hpp"
+#include "KmerIntervalMap.hpp"
+
+extern "C" {
+int bwa_index(int argc, char* argv[]);
+}
+// declaration of quasi index function
+int rapMapSAIndex(int argc, char* argv[]);
+
+
+class SalmonIndex{
+        public:
+            SalmonIndex(std::shared_ptr<spdlog::logger>& logger, SalmonIndexType indexType) :
+                loaded_(false), versionInfo_(0, false, 0, indexType), logger_(logger) {}
+
+            ~SalmonIndex() {
+                if (idx_) { bwa_idx_destroy(idx_); }
+            }
+
+            void load(const boost::filesystem::path& indexDir) {
+                namespace bfs = boost::filesystem;
+
+                // Check if version file exists and, if so, read it.
+                boost::filesystem::path versionPath = indexDir / "versionInfo.json";
+                versionInfo_.load(versionPath);
+                if (versionInfo_.indexVersion() == 0) {
+                    fmt::MemoryWriter infostr;
+                    infostr << "Error: The index version file " << versionPath.string()
+                            << " doesn't seem to exist.  Please try re-building the salmon "
+                               "index.";
+                    throw std::invalid_argument(infostr.str());
+                }
+                // Check index version compatibility here
+
+                auto indexType = versionInfo_.indexType();
+                // Load the appropriate index type
+                if (indexType == SalmonIndexType::FMD) {
+                    loadFMDIndex_(indexDir);
+                } else {
+                    loadQuasiIndex_(indexDir);
+                }
+
+                loaded_ = true;
+            }
+
+            bool buildAux_(boost::filesystem::path indexDir, uint32_t k) {
+                       namespace bfs = boost::filesystem;
+
+                       bfs::path indexPath = indexDir / "bwaidx";
+                       // Load the bwa index
+                       {
+                           logger_->info("Reading BWT index from file");
+                           if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_BWT|BWA_IDX_BNS|BWA_IDX_PAC)) == 0) {
+                               logger_->error("Couldn't open index [{}] --- ", indexPath);
+                               logger_->error("Please make sure that 'salmon index' has been run successfully");
+                               std::exit(1);
+                           }
+                       }
+
+                       auxIdx_.setK(k);
+                       size_t numRecords = idx_->bns->n_seqs;
+                       { // Load transcripts from file
+                          logger_->info("Index contained {} targets; streaming through them", numRecords);
+                          for (auto i : boost::irange(size_t(0), numRecords)) {
+                              char* name = idx_->bns->anns[i].name;
+                              uint32_t len = idx_->bns->anns[i].len;
+                              uint8_t* rseq = nullptr;
+                              int64_t tstart, tend, compLen, l_pac = idx_->bns->l_pac;
+                              tstart  = idx_->bns->anns[i].offset;
+                              tend = tstart + len;
+                              rseq = bns_get_seq(l_pac, idx_->pac, tstart, tend, &compLen);
+                              if (compLen != len) {
+                                  fmt::print(stderr,
+                                          "For transcript {}, stored length ({}) != computed length ({}) --- index may be corrupt. exiting\n",
+                                          name, compLen, len);
+                                  std::exit(1);
+                              }
+                              if (len < k) { continue; }
+                              for (uint32_t s = 0; s < len - k + 1; ++s) {
+                                  bwtintv_t resInterval;
+                                  KmerKey key(&(rseq[s]), k);
+                                  if (!auxIdx_.hasKmer(key)) {
+                                      bool foundInterval = bwautils::getIntervalForKmer(idx_->bwt, k, &(rseq[s]), resInterval);
+                                      // If we found the interval for this k-mer
+                                      if (foundInterval) {
+                                          // If the interval for this k-mer isn't already in the hash
+                                          // then put it in the hash
+                                          auxIdx_[key] = resInterval;
+                                      }
+                                  }
+                              }
+                          }
+                          // Since we have the de-coded reference sequences, we no longer need
+                          // the encoded sequences, so free them.
+                          free(idx_->pac); idx_->pac = nullptr;
+                          // ====== Done streaming through transcripts
+                       }
+
+                       bfs::path auxIndexFile = indexDir / "aux.idx";
+                       auxIdx_.save(auxIndexFile);
+                       return true;
+            }
+
+
+            bool build(boost::filesystem::path indexDir,
+                       std::vector<std::string>& argVec,
+                       uint32_t k) {
+                namespace bfs = boost::filesystem;
+                switch (versionInfo_.indexType()) {
+                    case SalmonIndexType::QUASI:
+                        return buildQuasiIndex_(indexDir, argVec, k);
+                    case SalmonIndexType::FMD:
+                        return buildFMDIndex_(indexDir, argVec, k);
+                    default:
+                        logger_->warn("Unexpected index type; cannot build");
+                        return false;
+                }
+            }
+
+            bool loaded() { return loaded_; }
+            bwaidx_t* bwaIndex() { return idx_; }
+
+            bool is64BitQuasi() { return largeQuasi_; }
+            RapMapSAIndex<int32_t>* quasiIndex32() { return quasiIndex32_.get(); }
+            RapMapSAIndex<int64_t>* quasiIndex64() { return quasiIndex64_.get(); }
+
+            bool hasAuxKmerIndex() { return versionInfo_.hasAuxKmerIndex(); }
+            KmerIntervalMap& auxIndex() { return auxIdx_; }
+
+            SalmonIndexType indexType() { return versionInfo_.indexType(); }
+
+	    const char* transcriptomeSeq() {
+	      if (loaded_) {
+		if (is64BitQuasi()) {
+		  return quasiIndex64_->seq.c_str();
+		} else {
+		  return quasiIndex32_->seq.c_str();
+		}
+	      } else {
+		return nullptr;
+	      }
+	    }
+
+	    uint64_t transcriptOffset(uint64_t id) {
+	      if (loaded_) {
+		if (is64BitQuasi()) {
+		  return quasiIndex64_->txpOffsets[id];
+		} else {
+		  return quasiIndex32_->txpOffsets[id];
+		}
+	      } else {
+		return std::numeric_limits<uint64_t>::max();
+	      }
+	    }
+
+
+        private:
+            bool buildFMDIndex_(boost::filesystem::path indexDir,
+                                std::vector<std::string>& bwaArgVec,
+                                uint32_t k) {
+                namespace bfs = boost::filesystem;
+                char* bwaArgv[] = {
+                    const_cast<char*>(bwaArgVec[0].c_str()),
+                    const_cast<char*>(bwaArgVec[1].c_str()),
+                    const_cast<char*>(bwaArgVec[2].c_str()),
+                    const_cast<char*>(bwaArgVec[3].c_str()),
+                    const_cast<char*>(bwaArgVec[4].c_str()),
+                    const_cast<char*>(bwaArgVec[5].c_str()) };
+                int bwaArgc = 6;
+                int ret = bwa_index(bwaArgc, bwaArgv);
+
+                bool buildAux = (k > 0);
+                if (buildAux) {
+                    buildAux_(indexDir, k);
+                }
+
+                bfs::path versionFile = indexDir / "versionInfo.json";
+                versionInfo_.indexVersion(salmon::indexVersion);
+                versionInfo_.hasAuxKmerIndex(buildAux);
+                versionInfo_.auxKmerLength(k);
+                versionInfo_.save(versionFile);
+                return (ret == 0);
+            }
+
+            bool buildQuasiIndex_(boost::filesystem::path indexDir,
+                                  std::vector<std::string>& quasiArgVec,
+                                  uint32_t k) {
+                namespace bfs = boost::filesystem;
+                char* quasiArgv[] = {
+                    const_cast<char*>(quasiArgVec[0].c_str()),
+                    const_cast<char*>(quasiArgVec[1].c_str()),
+                    const_cast<char*>(quasiArgVec[2].c_str()),
+                    const_cast<char*>(quasiArgVec[3].c_str()),
+                    const_cast<char*>(quasiArgVec[4].c_str()),
+                    const_cast<char*>(quasiArgVec[5].c_str()),
+                    const_cast<char*>(quasiArgVec[6].c_str())
+                };
+                int quasiArgc = 7;
+
+                int ret = rapMapSAIndex(quasiArgc, quasiArgv);
+
+                bfs::path versionFile = indexDir / "versionInfo.json";
+                versionInfo_.indexVersion(salmon::indexVersion);
+                versionInfo_.hasAuxKmerIndex(false);
+                versionInfo_.auxKmerLength(k);
+                versionInfo_.indexType(SalmonIndexType::QUASI);
+                versionInfo_.save(versionFile);
+                return (ret == 0);
+            }
+
+          bool loadFMDIndex_(const boost::filesystem::path& indexDir) {
+              namespace bfs = boost::filesystem;
+              if (versionInfo_.hasAuxKmerIndex()) {
+                  // Read the aux index
+                  logger_->info("Loading auxiliary index");
+                  bfs::path auxIdxFile = indexDir / "aux.idx";
+                  auxIdx_.setK(versionInfo_.auxKmerLength());
+                  auxIdx_.load(auxIdxFile);
+                  logger_->info("Auxiliary index contained {} k-mers", auxIdx_.size());
+                  logger_->info("done");
+              }
+
+              logger_->info("Loading BWA index");
+              // Read the actual BWA index
+              { // mem-based
+                  boost::filesystem::path indexPath = indexDir / "bwaidx";
+                  //if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_BWT|BWA_IDX_BNS|BWA_IDX_PAC)) == 0) {
+                  if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_ALL)) == 0) {
+                      fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                      fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                      std::exit(1);
+                  }
+              }
+              logger_->info("done");
+              return true;
+          }
+
+          bool loadQuasiIndex_(const boost::filesystem::path& indexDir) {
+              namespace bfs = boost::filesystem;
+              logger_->info("Loading Quasi index");
+              // Read the actual Quasi index
+              { // quasi-based
+                  boost::filesystem::path indexPath = indexDir;
+                  std::string indexStr = indexDir.string();
+                  if (indexStr.back() != '/') { indexStr.push_back('/'); }
+
+                  IndexHeader h;
+                  std::ifstream indexStream(indexStr + "header.json");
+                  {
+                    cereal::JSONInputArchive ar(indexStream);
+                    ar(h);
+                  }
+                  indexStream.close();
+
+                  if (h.indexType() != IndexType::QUASI) {
+                    fmt::print(stderr, "The index {} does not appear to be of the "
+                                        "appropriate type (quasi)", indexStr);
+                    std::exit(1);
+                  }
+
+                  if (h.bigSA()) {
+                    largeQuasi_ = true;
+                    fmt::print(stderr, "Loading 64-bit quasi index");
+                    quasiIndex64_.reset(new RapMapSAIndex<int64_t>);
+                    if (!quasiIndex64_->load(indexStr)) {
+                      fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                      fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                      std::exit(1);
+                    }
+                  } else {
+                    fmt::print(stderr, "Loading 32-bit quasi index");
+                    quasiIndex32_.reset(new RapMapSAIndex<int32_t>);
+                    if(!quasiIndex32_->load(indexStr)) {
+                      fmt::print(stderr, "Couldn't open index [{}] --- ", indexPath);
+                      fmt::print(stderr, "Please make sure that 'salmon index' has been run successfully\n");
+                      std::exit(1);
+                    }
+                  }
+              }
+              logger_->info("done");
+              return true;
+          }
+
+
+          bool loaded_;
+          SalmonIndexVersionInfo versionInfo_;
+          // Can't think of a generally better way to do this now
+          // without making the entire code-base look crazy
+          bool largeQuasi_{false};
+      	  std::unique_ptr<RapMapSAIndex<int32_t>> quasiIndex32_{nullptr};
+      	  std::unique_ptr<RapMapSAIndex<int64_t>> quasiIndex64_{nullptr};
+
+          bwaidx_t *idx_{nullptr};
+          KmerIntervalMap auxIdx_;
+          std::shared_ptr<spdlog::logger> logger_;
+};
+
+#endif //__SALMON_INDEX_HPP
diff --git a/include/SalmonIndexVersionInfo.hpp b/include/SalmonIndexVersionInfo.hpp
new file mode 100644
index 0000000..ea885bb
--- /dev/null
+++ b/include/SalmonIndexVersionInfo.hpp
@@ -0,0 +1,80 @@
+#ifndef __SALMON_INDEX_VERSION_INFO_HPP__
+#define __SALMON_INDEX_VERSION_INFO_HPP__
+
+#include "spdlog/details/format.h"
+#include "boost/filesystem.hpp"
+#include "cereal/archives/json.hpp"
+
+enum class SalmonIndexType : uint8_t { FMD, QUASI };
+
+
+class SalmonIndexVersionInfo {
+    public:
+        /**
+         * default constructor(s)
+         */
+       SalmonIndexVersionInfo() : indexVersion_(0), hasAuxKmerIndex_(false),
+                                  auxKmerLength_(0), indexType_(SalmonIndexType::FMD) {}
+
+       SalmonIndexVersionInfo(uint32_t indexVersionIn, bool hasAuxKmerIndexIn,
+                              uint32_t auxKmerLengthIn, SalmonIndexType indexTypeIn) :
+			      indexVersion_(indexVersionIn),
+                              hasAuxKmerIndex_(hasAuxKmerIndexIn), auxKmerLength_(auxKmerLengthIn),
+			      indexType_(indexTypeIn) {}
+
+       /**
+        * Read the index version info from file
+        */
+       bool load(boost::filesystem::path& versionFile) {
+           namespace bfs = boost::filesystem;
+            if(!bfs::exists(versionFile)) {
+                fmt::MemoryWriter infostr;
+                infostr << "Error: The index version file " << versionFile.string()
+                        << " doesn't seem to exist.  Please try re-building the salmon "
+                           "index.";
+                throw std::invalid_argument(infostr.str());
+            }
+            std::ifstream ifs(versionFile.string());
+            {
+                cereal::JSONInputArchive iarchive(ifs); // Create an input archive
+                iarchive(cereal::make_nvp("indexVersion", indexVersion_),
+                        cereal::make_nvp("hasAuxIndex", hasAuxKmerIndex_),
+                        cereal::make_nvp("auxKmerLength", auxKmerLength_),
+            			cereal::make_nvp("indexType", indexType_));
+            }
+            ifs.close();
+            return true;
+       }
+
+       bool save(boost::filesystem::path& versionFile) {
+           std::ofstream ofs(versionFile.string());
+           {
+               cereal::JSONOutputArchive oarchive(ofs);
+               oarchive(cereal::make_nvp("indexVersion", indexVersion_),
+                       cereal::make_nvp("hasAuxIndex", hasAuxKmerIndex_),
+                       cereal::make_nvp("auxKmerLength", auxKmerLength_),
+		               cereal::make_nvp("indexType", indexType_));
+           }
+           ofs.close();
+           return true;
+       }
+
+       bool hasAuxKmerIndex() { return hasAuxKmerIndex_; }
+       void hasAuxKmerIndex(bool val) { hasAuxKmerIndex_ = val; }
+
+       uint32_t indexVersion() { return indexVersion_; }
+       void indexVersion(uint32_t version) { indexVersion_ = version; }
+
+       uint32_t auxKmerLength() { return auxKmerLength_; }
+       void auxKmerLength(uint32_t len) { auxKmerLength_ = len; };
+
+       SalmonIndexType indexType() { return indexType_; }
+       void indexType(SalmonIndexType indexTypeIn) { indexType_ = indexTypeIn; };
+    private:
+        uint32_t indexVersion_;
+        bool hasAuxKmerIndex_;
+        uint32_t auxKmerLength_;
+        SalmonIndexType indexType_;
+};
+
+#endif // __SALMON_INDEX_VERSION_INFO_HPP__
diff --git a/include/SalmonMath.hpp b/include/SalmonMath.hpp
index e439a69..6729232 100644
--- a/include/SalmonMath.hpp
+++ b/include/SalmonMath.hpp
@@ -1,6 +1,34 @@
 #ifndef SALMON_MATH_HPP
 #define SALMON_MATH_HPP
 
+// If we have built-ins, do as Boost does
+
+#ifndef BOOST_LIKELY
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_expect)
+#define BOOST_LIKELY(x) __builtin_expect(x, 1)
+#endif
+#endif
+#endif
+
+#ifndef BOOST_UNLIKELY
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_expect)
+#define BOOST_UNLIKELY(x) __builtin_expect(x, 0)
+#endif
+#endif
+#endif
+
+// If we didn't have those built-ins fall back to this
+
+#ifndef BOOST_LIKELY
+#define BOOST_LIKELY(x) (x)
+#endif
+
+#ifndef BOOST_UNLIKELY
+#define BOOST_UNLIKELY(x) (x)
+#endif
+
 #include <cmath>
 #include <cassert>
 
diff --git a/include/SalmonOpts.hpp b/include/SalmonOpts.hpp
index fcf0a00..f3cd122 100644
--- a/include/SalmonOpts.hpp
+++ b/include/SalmonOpts.hpp
@@ -26,20 +26,23 @@ struct SalmonOpts {
     bool maxMEMIntervals; // If true, don't split (S)MEMs into MEMs
     */
 
-    SalmonOpts() : splitSpanningSeeds(false), noFragLengthDist(false),
+    SalmonOpts() : allowOrphans(false), splitSpanningSeeds(false), noFragLengthDist(false),
                    noEffectiveLengthCorrection(false), useReadCompat(false),
                    maxReadOccs(200), extraSeedPass(false),
-                   mappingCacheMemoryLimit(5000000) {}
+                   mappingCacheMemoryLimit(5000000), useQuasi(false) {}
+
+    bool allowOrphans; // Consider orphaned reads when performing lightweight alignemnt.
+
     bool splitSpanningSeeds; // Attempt to split seeds that span multiple transcripts.
 
-    bool noFragLengthDist; // Do not give a fragment assignment a likelihood based on an emperically
-                           // observed fragment length distribution.
+    bool noFragLengthDist ; // Don't give a fragment assignment a likelihood based on an emperically
+	                    // observed fragment length distribution.
 
     bool noEffectiveLengthCorrection; // Don't take the fragment length distribution into
                                       // account when computing the probability that a
                                      // fragment was generated from a transcript.
 
-    bool noFragStartPosDist; // Don't learn a non-uniform start distribution
+    bool useFSPD; // Learn a non-uniform start distribution
 
     bool noSeqBiasModel; // Don't learn and use a sequence-specific bias model.
 
@@ -76,6 +79,10 @@ struct SalmonOpts {
 
     uint32_t maxExpectedReadLen; // Maximum expected length of an observed read.
 
+    bool useMassBanking; // DEPRECATED
+
+    bool sensitive; // Perform splitting of long SMEMs into MEMs
+
     bool extraSeedPass; // Perform extra pass trying to find seeds to cover the read
 
     bool disableMappingCache; // Don't write mapping results to temporary mapping cache file
@@ -84,12 +91,20 @@ struct SalmonOpts {
 
     boost::filesystem::path indexDirectory; // Index directory
 
-    bool useMassBanking; // Bank unique mass in subsequent epochs of inference
-
     bool useVBOpt; // Use Variational Bayesian EM instead of "regular" EM in the batch passes
-    bool useGSOpt; // Do Gibbs Sampling after optimization
 
-    uint32_t numGibbsSamples; // Number of rounds of Gibbs sampling to perform 
+    bool useQuasi; // Are we using the quasi-mapping based index or not.
+
+    bool sampleOutput; // Sample alignments according to posterior estimates of transcript abundance.
+    bool sampleUnaligned; // Pass along un-aligned reads in the sampling.
+
+    uint32_t numGibbsSamples; // Number of rounds of Gibbs sampling to perform
+    uint32_t numBootstraps; // Number of bootstrap samples to draw
+
+    bool alnMode{false};     // true if we're in alignment based mode, false otherwise
+    bool biasCorrect{false}; // Perform sequence-specific bias correction
+    std::atomic<int32_t> numBiasSamples{1000000}; // The number of fragment mappings to consider when building
+						  // the sequence-specific "foreground" distribution.
 
     // Related to the fragment length distribution
     size_t fragLenDistMax;
diff --git a/include/SpinLock.hpp b/include/SalmonSpinLock.hpp
similarity index 88%
rename from include/SpinLock.hpp
rename to include/SalmonSpinLock.hpp
index e2cc9c8..1dcecca 100644
--- a/include/SpinLock.hpp
+++ b/include/SalmonSpinLock.hpp
@@ -1,5 +1,5 @@
-#ifndef __SPIN_LOCK_HPP__
-#define __SPIN_LOCK_HPP__
+#ifndef __SALMON_SPIN_LOCK_HPP__
+#define __SALMON_SPIN_LOCK_HPP__
 
 #include <atomic>
 
@@ -35,4 +35,4 @@ public:
   };
 };
 
-#endif // __SPIN_LOCK_HPP__
+#endif // __SALMON_SPIN_LOCK_HPP__
diff --git a/include/SalmonUtils.hpp b/include/SalmonUtils.hpp
index 7a6de01..6fd8ed5 100644
--- a/include/SalmonUtils.hpp
+++ b/include/SalmonUtils.hpp
@@ -18,7 +18,9 @@ extern "C" {
 #include <boost/filesystem.hpp>
 #include <boost/program_options.hpp>
 
-#include "format.h"
+#include <Eigen/Dense>
+
+#include "spdlog/details/format.h"
 
 #include "SalmonOpts.hpp"
 #include "SalmonMath.hpp"
@@ -27,9 +29,11 @@ extern "C" {
 #include "ReadLibrary.hpp"
 #include "TranscriptGeneMap.hpp"
 #include "GenomicFeature.hpp"
+#include "RapMapUtils.hpp"
 
 class ReadExperiment;
 class LibraryFormat;
+class FragmentLengthDistribution;
 
 namespace salmon{
 namespace utils {
@@ -38,6 +42,16 @@ using std::string;
 using NameVector = std::vector<string>;
 using IndexVector = std::vector<size_t>;
 using KmerVector = std::vector<uint64_t>;
+using MateStatus = rapmap::utils::MateStatus;
+
+// An enum class for direction to avoid potential errors
+// with keeping everything as a bool
+enum class Direction { FORWARD = 0, REVERSE_COMPLEMENT = 1 };
+
+// Returns FORWARD if isFwd is true and REVERSE_COMPLEMENT otherwise
+constexpr inline Direction boolToDirection(bool isFwd) {
+  return isFwd ? Direction::FORWARD : Direction::REVERSE_COMPLEMENT;
+}
 
 // Returns a uint64_t where the upper 32-bits
 // contain tid and the lower 32-bits contain offset
@@ -74,6 +88,12 @@ TranscriptGeneMap readTranscriptToGeneMap( std::ifstream &ifile );
 
 TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile );
 
+template <typename AbundanceVecT, typename ReadExpT>
+Eigen::VectorXd updateEffectiveLengths(ReadExpT& readExp,
+    Eigen::VectorXd& effLensIn,
+    AbundanceVecT& alphas,
+    std::vector<double>& transcriptKmerDist);
+
 /*
  * Use atomic compare-and-swap to update val to
  * val + inc (*in log-space*).  Update occurs in a loop in case other
@@ -90,6 +110,15 @@ inline void incLoopLog(tbb::atomic<double>& val, double inc) {
 	} while (returnedMass != oldMass);
 }
 
+
+/*
+ * Same as above, but overloaded for "plain" doubles
+ */
+inline void incLoop(double& val, double inc) {
+	val += inc;
+}
+
+
 /*
  * Use atomic compare-and-swap to update val to
  * val + inc.  Update occurs in a loop in case other
@@ -109,11 +138,12 @@ inline void incLoop(tbb::atomic<double>& val, double inc) {
 
 void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath);
 
+std::vector<int32_t> samplesFromLogPMF(FragmentLengthDistribution* fld, int32_t numSamples);
+
 // NOTE: Throws an invalid_argument exception of the quant or quant_bias_corrected files do
 // not exist!
 void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
-                                boost::filesystem::path& estDir,
-                                bool haveBiasCorrectedFile = false);
+                                boost::filesystem::path& estDir);
 
     enum class OrphanStatus: uint8_t { LeftOrphan = 0, RightOrphan = 1, Paired = 2 };
 
@@ -137,8 +167,14 @@ void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
     void normalizeAlphas(const SalmonOpts& sopt,
                          AlnLibT& alnLib);
 
+    double logAlignFormatProb(const LibraryFormat observed,
+                              const LibraryFormat expected,
+                              int32_t start, bool isForward,
+                              rapmap::utils::MateStatus ms,
+                              double incompatPrior);
 
-    double logAlignFormatProb(const LibraryFormat observed, const LibraryFormat expected, double incompatPrior);
+    bool compatibleHit(const LibraryFormat expected, int32_t start, bool isForward, MateStatus ms);
+    bool compatibleHit(const LibraryFormat expected, const LibraryFormat observed);
 
     std::ostream& operator<<(std::ostream& os, OrphanStatus s);
     /**
diff --git a/include/Sampler.hpp b/include/Sampler.hpp
index d083f55..c71310c 100644
--- a/include/Sampler.hpp
+++ b/include/Sampler.hpp
@@ -9,9 +9,8 @@ extern "C" {
 }
 
 // for cpp-format
-#include "format.h"
-
 #include "spdlog/spdlog.h"
+#include "spdlog/details/format.h"
 
 #include <tbb/atomic.h>
 #include <iostream>
@@ -99,12 +98,12 @@ namespace salmon {
                 using salmon::math::logAdd;
                 using salmon::math::logSub;
 
-                bool useFSPD{!salmonOpts.noFragStartPosDist};
+                bool useFSPD{salmonOpts.useFSPD};
                 auto& refs = alnLib.transcripts();
                 auto& clusterForest = alnLib.clusterForest();
                 auto& fragmentQueue = alnLib.fragmentQueue();
                 auto& alignmentGroupQueue = alnLib.alignmentGroupQueue();
-                auto& fragLengthDist = alnLib.fragmentLengthDistribution();
+                auto& fragLengthDist = *(alnLib.fragmentLengthDistribution());
                 auto& alnMod = alnLib.alignmentModel();
 
                 std::vector<FragmentStartPositionDistribution>& fragStartDists =
@@ -176,13 +175,25 @@ namespace salmon {
                                         }
                                     }
 
-                                    // The alignment probability is the product of a transcript-level term (based on abundance and) an alignment-level
-                                    // term below which is P(Q_1) * P(Q_2) * P(F | T)
-                                    double logRefLength = std::log(refLength);
+                                    // The alignment probability is the product of a
+                                    // transcript-level term (based on abundance and) an
+                                    // alignment-level term.
+                                    double logRefLength{salmon::math::LOG_0};
+                                    if (salmonOpts.noEffectiveLengthCorrection or !burnedIn) {
+                                        logRefLength = std::log(transcript.RefLength);
+                                    } else {
+                                        logRefLength = transcript.getCachedLogEffectiveLength();
+                                    }
+
 
-                                    double logAlignCompatProb = (salmonOpts.useReadCompat) ?
-                                        (salmon::utils::logAlignFormatProb(aln->libFormat(), expectedLibraryFormat, salmonOpts.incompatPrior)) :
-                                        LOG_1;
+                                    double logAlignCompatProb =
+                                        (salmonOpts.useReadCompat) ?
+                                        (salmon::utils::logAlignFormatProb(
+                                                  aln->libFormat(),
+                                                  expectedLibraryFormat,
+                                                  aln->pos(),
+                                                  aln->fwd(), aln->mateStatus(), salmonOpts.incompatPrior)
+                                        ) : LOG_1;
 
                                     // Adjustment to the likelihood due to the
                                     // error model
@@ -469,7 +480,7 @@ namespace salmon {
                 fmt::print(stderr, "\n");
                 consumedAllInput = true;
 
-                numObservedFragments += alnLib.numMappedReads();
+                numObservedFragments += alnLib.numMappedFragments();
                 fmt::print(stderr, "# observed = {} mapped fragments.\033[F\033[F\033[F\033[F",
                         numObservedFragments);
 
diff --git a/include/TextBootstrapWriter.hpp b/include/TextBootstrapWriter.hpp
new file mode 100644
index 0000000..7d4051f
--- /dev/null
+++ b/include/TextBootstrapWriter.hpp
@@ -0,0 +1,101 @@
+#ifndef __TEXT_BOOTSTRAP_WRITER_HPP__
+#define __TEXT_BOOTSTRAP_WRITER_HPP__
+
+#include <vector>
+#include <mutex>
+#include <fstream>
+#include <memory>
+
+#include "spdlog/spdlog.h"
+#include "BootstrapWriter.hpp"
+#include "SalmonSpinLock.hpp"
+#include "Transcript.hpp"
+
+class TextBootstrapWriter : public BootstrapWriter {
+    public:
+        TextBootstrapWriter(boost::filesystem::path& outputPath,
+                            std::shared_ptr<spdlog::logger> logger) :
+            outputPath_(outputPath),
+            logger_(logger) {
+                // Create the directory if it doesn't exist
+                if (!boost::filesystem::exists(outputPath_.parent_path())) {
+                    boost::filesystem::create_directories(outputPath_);
+                }
+                // open the file
+                ofile_.open(outputPath.string());
+            }
+
+        ~TextBootstrapWriter() {
+#if defined __APPLE__
+            spin_lock::scoped_lock sl(writeMutex_);
+#else
+            std::lock_guard<std::mutex> lock(writeMutex_);
+#endif
+            ofile_.close();
+        }
+
+        bool writeHeader(std::string& comments, std::vector<Transcript>& transcripts) override {
+#if defined __APPLE__
+            spin_lock::scoped_lock sl(writeMutex_);
+#else
+            std::lock_guard<std::mutex> lock(writeMutex_);
+#endif
+            ofile_ << comments;
+            size_t numTxps = transcripts.size();
+            if (numTxps == 0) { return false; }
+            for (size_t tn = 0; tn < numTxps; ++tn) {
+                auto& t  = transcripts[tn];
+                ofile_ << t.RefName;
+                if (tn < numTxps - 1) {
+                    ofile_ << '\t';
+                }
+            }
+            ofile_ << '\n';
+            /*
+            for (size_t tn = 0; tn < numTxps; ++tn) {
+                auto& t  = transcripts[tn];
+                ofile_ << t.EffectiveLength;
+                if (tn < numTxps - 1) {
+                    ofile_ << '\t';
+                }
+            }
+            ofile_ << '\n';
+            */
+            return true;
+        }
+
+        bool writeBootstrap(std::vector<double>& abund) override {
+#if defined __APPLE__
+            spin_lock::scoped_lock sl(writeMutex_);
+#else
+            std::lock_guard<std::mutex> lock(writeMutex_);
+#endif
+            size_t numTxps = abund.size();
+            for (size_t tn = 0; tn < numTxps; ++tn) {
+                auto& a  = abund[tn];
+                ofile_ << a;
+                if (tn < numTxps - 1) {
+                    ofile_ << '\t';
+                }
+            }
+            ofile_ << '\n';
+            logger_->info("wrote {} bootstraps", numWritten_.load()+1);
+            ++numWritten_;
+            return true;
+        }
+
+    private:
+        boost::filesystem::path outputPath_;
+        std::ofstream ofile_;
+        std::shared_ptr<spdlog::logger> logger_;
+// only one writer thread at a time
+#if defined __APPLE__
+        spin_lock writeMutex_;
+#else
+        std::mutex writeMutex_;
+#endif
+        std::atomic<uint32_t> numWritten_{0};
+};
+
+#endif // __TEXT_BOOTSTRAP_WRITER_HPP__
+
diff --git a/include/Transcript.hpp b/include/Transcript.hpp
index 0ee8207..7923230 100644
--- a/include/Transcript.hpp
+++ b/include/Transcript.hpp
@@ -13,24 +13,56 @@
 
 class Transcript {
 public:
+    Transcript() : RefName(""), RefLength(0), EffectiveLength(-1.0), id(std::numeric_limits<uint32_t>::max()), 
+	SAMSequence(nullptr), Sequence(nullptr),
+        logPerBasePrior_(salmon::math::LOG_0),
+        priorMass_(salmon::math::LOG_0),
+        mass_(salmon::math::LOG_0), sharedCount_(0.0),
+        avgMassBias_(salmon::math::LOG_0),
+        active_(false),
+    	freeSeqOnDestruct(false){}
+    
     Transcript(size_t idIn, const char* name, uint32_t len, double alpha = 0.05) :
-        RefName(name), RefLength(len), id(idIn), Sequence(nullptr),
+        RefName(name), RefLength(len), EffectiveLength(-1.0), id(idIn), SAMSequence(nullptr), Sequence(nullptr),
         logPerBasePrior_(std::log(alpha)),
         priorMass_(std::log(alpha*len)),
         mass_(salmon::math::LOG_0), sharedCount_(0.0),
-        avgMassBias_(salmon::math::LOG_0) {
+        avgMassBias_(salmon::math::LOG_0),
+        active_(false),
+    	freeSeqOnDestruct(false){
             uniqueCount_.store(0);
             lastUpdate_.store(0);
             lastTimestepUpdated_.store(0);
             cachedEffectiveLength_.store(std::log(static_cast<double>(RefLength)));
         }
 
+    ~Transcript() {
+      // Free the sequence if it belongs to us
+      if (freeSeqOnDestruct) { delete [] Sequence; }
+      // Free the SAMSequence if it exists
+      if (SAMSequence) { delete [] SAMSequence; }
+    }
+
     Transcript(Transcript&& other) {
         id = other.id;
         //std::swap(RefName, other.RefName);
         RefName = std::move(other.RefName);
         RefLength = other.RefLength;
+        EffectiveLength = other.EffectiveLength;
+        SAMSequence = other.SAMSequence;
+        // If this is an owned-resource, then move it
+        if (other.SAMSequence) {
+            other.SAMSequence = nullptr;
+        }
+
         Sequence = other.Sequence;
+        // If this is an owned-resource, then move it
+        if (other.freeSeqOnDestruct) {
+            freeSeqOnDestruct = true;
+            other.freeSeqOnDestruct = false;
+            other.Sequence = nullptr;
+        }
+
         uniqueCount_.store(other.uniqueCount_);
         totalCount_.store(other.totalCount_.load());
         lastTimestepUpdated_.store(other.lastTimestepUpdated_.load());
@@ -42,6 +74,8 @@ public:
         logPerBasePrior_ = other.logPerBasePrior_;
         priorMass_ = other.priorMass_;
         avgMassBias_.store(other.avgMassBias_.load());
+        hasAnchorFragment_.store(other.hasAnchorFragment_.load());
+        active_ = other.active_;
     }
 
     Transcript& operator=(Transcript&& other) {
@@ -49,7 +83,21 @@ public:
         //std::swap(RefName, other.RefName);
         RefName = std::move(other.RefName);
         RefLength = other.RefLength;
+        EffectiveLength = other.EffectiveLength;
+        SAMSequence = other.SAMSequence;
+        // If this is an owned-resource, then move it
+        if (other.SAMSequence) {
+            other.SAMSequence = nullptr;
+        }
+
         Sequence = other.Sequence;
+        // If this is an owned-resource, then move it
+        if (other.freeSeqOnDestruct) {
+            freeSeqOnDestruct = true;
+            other.freeSeqOnDestruct = false;
+            other.Sequence = nullptr;
+        }
+
         uniqueCount_.store(other.uniqueCount_);
         totalCount_.store(other.totalCount_.load());
         lastTimestepUpdated_.store(other.lastTimestepUpdated_.load());
@@ -61,6 +109,8 @@ public:
         logPerBasePrior_ = other.logPerBasePrior_;
         priorMass_ = other.priorMass_;
         avgMassBias_.store(other.avgMassBias_.load());
+        hasAnchorFragment_.store(other.hasAnchorFragment_.load());
+        active_ = other.active_;
         return *this;
     }
 
@@ -92,16 +142,16 @@ public:
         switch(dir) {
         case strand::forward:
             if (nibble) {
-                return Sequence[byte] & 0x0F;
+                return SAMSequence[byte] & 0x0F;
             } else {
-                return ((Sequence[byte] & 0xF0) >> 4) & 0x0F;
+                return ((SAMSequence[byte] & 0xF0) >> 4) & 0x0F;
             }
             break;
         case strand::reverse:
             if (nibble) {
-                return encodedRevComp[Sequence[byte] & 0x0F];
+                return encodedRevComp[SAMSequence[byte] & 0x0F];
             } else {
-                return encodedRevComp[((Sequence[byte] & 0xF0) >> 4) & 0x0F];
+                return encodedRevComp[((SAMSequence[byte] & 0xF0) >> 4) & 0x0F];
             }
             break;
         }
@@ -109,8 +159,12 @@ public:
         return std::numeric_limits<uint8_t>::max();
     }
 
+    inline void setSharedCount(double sc) {
+        sharedCount_.store(sc);
+    }
+
     inline void addSharedCount(double sc) {
-	salmon::utils::incLoop(sharedCount_, sc);
+	    salmon::utils::incLoop(sharedCount_, sc);
     }
 
     inline void setLastTimestepUpdated(uint64_t currentTimestep) {
@@ -136,6 +190,9 @@ public:
         return (withPrior) ? salmon::math::logAdd(priorMass_, mass_.load()) : mass_.load();
     }
 
+    void setActive() { active_ = true; }
+    bool getActive() { return active_; }
+
     inline double bias() {
         return (totalCount_.load() > 0) ?
                     avgMassBias_ - std::log(totalCount_.load()) :
@@ -158,40 +215,68 @@ public:
       *
       *
       */
-    double updateEffectiveLength(const FragmentLengthDistribution& fragLengthDist) {
+    double computeLogEffectiveLength(
+            std::vector<double>& logPMF,
+            double logFLDMean,
+            size_t minVal,
+            size_t maxVal) {
 
         double effectiveLength = salmon::math::LOG_0;
         double refLen = static_cast<double>(RefLength);
-        double logLength = std::log(refLen);
+        double logRefLength = std::log(refLen);
 
-        if (logLength < fragLengthDist.mean()) {
-            effectiveLength = logLength;
+        if (logRefLength <= logFLDMean) {
+            effectiveLength = logRefLength;
         } else {
-            uint32_t mval = fragLengthDist.maxVal();
-            for (size_t l = fragLengthDist.minVal(); l <= std::min(RefLength, mval); ++l) {
+            uint32_t mval = maxVal;
+            size_t clen = minVal;
+            size_t maxLen = std::min(RefLength, mval);
+            while (clen <= maxLen) {
+                size_t i = clen - minVal;
                 effectiveLength = salmon::math::logAdd(
                         effectiveLength,
-                        fragLengthDist.pmf(l) + std::log(refLen - l + 1));
+                        logPMF[i] + std::log(refLen - clen + 1));
+                ++clen;
             }
         }
+        if (std::exp(effectiveLength) <= 1.0) {
+            effectiveLength = salmon::math::LOG_1;
+        }
 
         return effectiveLength;
     }
 
-    double getCachedEffectiveLength() {
+    /**
+     * Return the cached value for the log of the effective length.
+     */
+    double getCachedLogEffectiveLength() {
         return cachedEffectiveLength_.load();
     }
 
-    double getEffectiveLength(const FragmentLengthDistribution& fragLengthDist,
-                              size_t currObs,
-                              size_t burnInObs) {
-        if (lastUpdate_ == 0 or
+    void updateEffectiveLength(
+            std::vector<double>& logPMF,
+            double logFLDMean,
+            size_t minVal,
+            size_t maxVal) {
+        double cel = computeLogEffectiveLength(logPMF, logFLDMean, minVal, maxVal);
+        cachedEffectiveLength_.store(cel);
+    }
+
+    /**
+     * If we should update the effective length, then do it and cache the result.
+     * Otherwise, return the cached result.
+     */
+    /*
+    double getLogEffectiveLength(const FragmentLengthDistribution& fragLengthDist,
+                                 size_t currObs, size_t burnInObs, bool forceUpdate=false) {
+        if (forceUpdate or
+            (lastUpdate_ == 0) or
             (currObs - lastUpdate_ >= 250000) or
             (lastUpdate_ < burnInObs and currObs > burnInObs)) {
             // compute new number
-            double cel = updateEffectiveLength(fragLengthDist);
-            cachedEffectiveLength_.store(cel);
             lastUpdate_.store(currObs);
+            double cel = computeLogEffectiveLength(fragLengthDist);
+            cachedEffectiveLength_.store(cel);
             //priorMass_ = cel + logPerBasePrior_;
             return cachedEffectiveLength_.load();
         } else {
@@ -199,6 +284,7 @@ public:
             return cachedEffectiveLength_.load();
         }
     }
+    */
 
     double perBasePrior() { return std::exp(logPerBasePrior_); }
     inline size_t lastTimestepUpdated() { return lastTimestepUpdated_.load(); }
@@ -206,8 +292,17 @@ public:
     void lengthClassIndex(uint32_t ind) { lengthClassIndex_ = ind; }
     uint32_t lengthClassIndex() { return lengthClassIndex_; }
 
+    void setAnchorFragment() {
+        hasAnchorFragment_.store(true);
+    }
+
+    bool hasAnchorFragment() {
+        return hasAnchorFragment_.load();
+    }
+
     std::string RefName;
     uint32_t RefLength;
+    double EffectiveLength;
     uint32_t id;
 
     double uniqueCounts{0.0};
@@ -215,7 +310,9 @@ public:
     double projectedCounts{0.0};
     double sharedCounts{0.0};
 
-    uint8_t* Sequence;
+    uint8_t* SAMSequence;
+    const char* Sequence;
+    bool freeSeqOnDestruct;
 
 private:
     std::atomic<size_t> uniqueCount_;
@@ -230,6 +327,11 @@ private:
     tbb::atomic<double> avgMassBias_;
     uint32_t lengthClassIndex_;
     double logPerBasePrior_;
+    // In a paired-end protocol, a transcript has
+    // an "anchor" fragment if it has a proper
+    // pair of reads mapping to it.
+    std::atomic<bool> hasAnchorFragment_{false};
+    bool active_;
 };
 
 #endif //TRANSCRIPT
diff --git a/include/TranscriptCluster.hpp b/include/TranscriptCluster.hpp
index 27c50c7..9394a3c 100644
--- a/include/TranscriptCluster.hpp
+++ b/include/TranscriptCluster.hpp
@@ -93,7 +93,7 @@ public:
             if (round % 100 == 0) {
                 std::cerr << "\r\rproject to polytope: " << round;
             }
-            if (round > 50000) {
+            if (round > 5000) {
                 return;
             }
 
diff --git a/include/UnpairedRead.hpp b/include/UnpairedRead.hpp
index 9dd7ff7..7bd8e37 100644
--- a/include/UnpairedRead.hpp
+++ b/include/UnpairedRead.hpp
@@ -15,6 +15,7 @@ extern "C" {
 #include "StadenUtils.hpp"
 #include "SalmonMath.hpp"
 #include "LibraryFormat.hpp"
+#include "RapMapUtils.hpp"
 
 struct UnpairedRead {
    bam_seq_t* read = nullptr;
@@ -53,6 +54,15 @@ struct UnpairedRead {
    inline bool isPaired() const { return false; }
    inline bool isLeftOrphan() const { return false; }
    inline bool isRightOrphan() const { return false; }
+   inline bam_seq_t* get5PrimeRead() { return read; }
+
+
+   inline rapmap::utils::MateStatus mateStatus() const {
+       return rapmap::utils::MateStatus::SINGLE_END;
+   }
+
+   inline int32_t pos() const { return left(); }
+   inline bool fwd() const { return !bam_strand(read); }
 
     // return 0 on success, -1 on failure
     int writeToFile(scram_fd* fp) {
diff --git a/include/UtilityFunctions.hpp b/include/UtilityFunctions.hpp
new file mode 100644
index 0000000..cf5ce23
--- /dev/null
+++ b/include/UtilityFunctions.hpp
@@ -0,0 +1,148 @@
+#ifndef UTILITY_FUNCTIONS_HPP
+#define UTILITY_FUNCTIONS_HPP
+
+#include <limits>
+#include "SalmonUtils.hpp"
+
+// from http://stackoverflow.com/questions/17719674/c11-fast-constexpr-integer-powers
+constexpr int64_t constExprPow(int64_t base, unsigned int exp, int64_t result = 1) {
+	return exp < 1 ? result : constExprPow(base*base, exp/2, (exp % 2) ? result*base : result);
+}
+
+inline std::string kmerForIndex(uint32_t idx, uint32_t K) {
+    std::string kmer(K, 'X');
+    // The number of bits we need to shift the
+    // current mask to the left.
+    uint32_t pos{0};
+    for (int32_t i = K - 1; i >= 0; --i) {
+        uint8_t c = (idx >> pos) & 0x3;
+        switch (c) {
+            case 0:
+                kmer[i] = 'A';
+                break;
+            case 1:
+                kmer[i] = 'C';
+                break;
+            case 2:
+                kmer[i] = 'G';
+                break;
+            case 3:
+                kmer[i] = 'T';
+                break;
+            default:
+                break;
+        }
+        pos += 2;
+    }
+    return kmer;
+}
+
+inline uint32_t nextKmerIndex(uint32_t idx, char n, uint32_t K,
+                              salmon::utils::Direction dir) {
+    using salmon::utils::Direction;
+    idx = idx << 2;
+    if(dir == Direction::REVERSE_COMPLEMENT) {
+        switch(n) {
+            case 'A':
+            case 'a':
+                n='T';
+                break;
+            case 'C':
+            case 'c':
+                n='G';
+                break;
+            case 'g':
+            case 'G':
+                n='C';
+                break;
+            case 'T':
+            case 't':
+            case 'U':
+            case 'u':
+                n='A';
+                break;
+        }
+    }
+
+    switch(n) {
+        case 'A':
+        case 'a': break;
+        case 'C':
+        case 'c': idx = idx + 1;
+                  break;
+        case 'G':
+        case 'g': idx = idx + 2;
+                  break;
+        case 'T':
+        case 't':
+        case 'U':
+        case 'u':
+                  idx = idx + 3;
+                  break;
+    }
+    // Clear the top 32 - 2*K bits.
+    uint32_t clearShift = (32 - 2*K);
+    return idx & (0xFFFFFFFF >> clearShift);
+}
+
+
+inline uint32_t indexForKmer(const char* s,
+        uint32_t K,
+        salmon::utils::Direction dir) {
+    using salmon::utils::Direction;
+    // The index we'll return
+    uint32_t idx{0};
+    // The number of bits we need to shift the
+    // current mask to the left.
+    if(dir == Direction::FORWARD) {
+        for (int32_t i = 0; i < K; ++i) {
+            switch (s[i]) {
+                case 'A':
+                case 'a':
+                    break;
+                case 'C':
+                case 'c':
+                    idx += 1;
+                    break;
+                case 'G':
+                case 'g':
+                    idx += 2;
+                    break;
+                case 'T':
+                case 't':
+                case 'U':
+                case 'u':
+                    idx += 3;
+                    break;
+                default:
+                    return std::numeric_limits<uint32_t>::max();
+            }
+            if (i < K - 1) {idx = idx << 2;}
+        }
+    } else {
+        for(int32_t i=K-1 ; i>=0 ; i--) {
+            switch(s[i]) {
+                case 'T':
+                case 't':
+                case 'u':
+                case 'U': break;
+                case 'C':
+                case 'c': idx += 2;
+                          break;
+                case 'G':
+                case 'g': idx += 1;
+                          break;
+                case 'A':
+                case 'a': idx += 3;
+                          break;
+                default:
+                    return std::numeric_limits<uint32_t>::max();
+            }
+            if (i > 0) {idx = idx << 2;}
+        }
+    }
+    return idx;
+}
+
+
+#endif //UTILITY_FUNCTIONS_HPP
diff --git a/include/blockingconcurrentqueue.h b/include/blockingconcurrentqueue.h
new file mode 100644
index 0000000..a0412a7
--- /dev/null
+++ b/include/blockingconcurrentqueue.h
@@ -0,0 +1,760 @@
+// Provides an efficient blocking version of moodycamel::ConcurrentQueue.
+// ©2015 Cameron Desrochers. Distributed under the terms of the simplified
+// BSD license, available at the top of concurrentqueue.h.
+// Uses Jeff Preshing's semaphore implementation (under the terms of its
+// separate zlib license, embedded below).
+
+#pragma once
+
+#include "concurrentqueue.h"
+#include <type_traits>
+#include <memory>
+
+#if defined(_WIN32)
+// Avoid including windows.h in a header; we only need a handful of
+// items, so we'll redeclare them here (this is relatively safe since
+// the API generally has to remain stable between Windows versions).
+// I know this is an ugly hack but it still beats polluting the global
+// namespace with thousands of generic names or adding a .cpp for nothing.
+extern "C" {
+	struct _SECURITY_ATTRIBUTES;
+	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+}
+#elif defined(__MACH__)
+#include <mach/mach.h>
+#elif defined(__unix__)
+#include <semaphore.h>
+#endif
+
+namespace moodycamel
+{
+namespace details
+{
+	// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
+	// portable + lightweight semaphore implementations, originally from
+	// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+	// LICENSE:
+	// Copyright (c) 2015 Jeff Preshing
+	//
+	// This software is provided 'as-is', without any express or implied
+	// warranty. In no event will the authors be held liable for any damages
+	// arising from the use of this software.
+	//
+	// Permission is granted to anyone to use this software for any purpose,
+	// including commercial applications, and to alter it and redistribute it
+	// freely, subject to the following restrictions:
+	//
+	// 1. The origin of this software must not be misrepresented; you must not
+	//    claim that you wrote the original software. If you use this software
+	//    in a product, an acknowledgement in the product documentation would be
+	//    appreciated but is not required.
+	// 2. Altered source versions must be plainly marked as such, and must not be
+	//    misrepresented as being the original software.
+	// 3. This notice may not be removed or altered from any source distribution.
+	namespace mpmc_sema
+	{
+#if defined(_WIN32)
+		class Semaphore
+		{
+		private:
+		    void* m_hSema;
+		    
+		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+		public:
+		    Semaphore(int initialCount = 0)
+		    {
+		        assert(initialCount >= 0);
+		        const long maxLong = 0x7fffffff;
+		        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+		    }
+
+		    ~Semaphore()
+		    {
+		        CloseHandle(m_hSema);
+		    }
+
+		    void wait()
+		    {
+		    	const unsigned long infinite = 0xffffffff;
+		        WaitForSingleObject(m_hSema, infinite);
+		    }
+
+		    void signal(int count = 1)
+		    {
+		        ReleaseSemaphore(m_hSema, count, nullptr);
+		    }
+		};
+#elif defined(__MACH__)
+		//---------------------------------------------------------
+		// Semaphore (Apple iOS and OSX)
+		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+		//---------------------------------------------------------
+		class Semaphore
+		{
+		private:
+		    semaphore_t m_sema;
+
+		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+		public:
+		    Semaphore(int initialCount = 0)
+		    {
+		        assert(initialCount >= 0);
+		        semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+		    }
+
+		    ~Semaphore()
+		    {
+		        semaphore_destroy(mach_task_self(), m_sema);
+		    }
+
+		    void wait()
+		    {
+		        semaphore_wait(m_sema);
+		    }
+
+		    void signal()
+		    {
+		        semaphore_signal(m_sema);
+		    }
+
+		    void signal(int count)
+		    {
+		        while (count-- > 0)
+		        {
+		            semaphore_signal(m_sema);
+		        }
+		    }
+		};
+#elif defined(__unix__)
+		//---------------------------------------------------------
+		// Semaphore (POSIX, Linux)
+		//---------------------------------------------------------
+		class Semaphore
+		{
+		private:
+		    sem_t m_sema;
+
+		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+		public:
+		    Semaphore(int initialCount = 0)
+		    {
+		        assert(initialCount >= 0);
+		        sem_init(&m_sema, 0, initialCount);
+		    }
+
+		    ~Semaphore()
+		    {
+		        sem_destroy(&m_sema);
+		    }
+
+		    void wait()
+		    {
+		        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+		        int rc;
+		        do
+		        {
+		            rc = sem_wait(&m_sema);
+		        }
+		        while (rc == -1 && errno == EINTR);
+		    }
+
+		    void signal()
+		    {
+		        sem_post(&m_sema);
+		    }
+
+		    void signal(int count)
+		    {
+		        while (count-- > 0)
+		        {
+		            sem_post(&m_sema);
+		        }
+		    }
+		};
+#else
+#error Unsupported platform! (No semaphore wrapper available)
+#endif
+
+		//---------------------------------------------------------
+		// LightweightSemaphore
+		//---------------------------------------------------------
+		class LightweightSemaphore
+		{
+		public:
+			typedef std::make_signed<std::size_t>::type ssize_t;
+			
+		private:
+		    std::atomic<ssize_t> m_count;
+		    Semaphore m_sema;
+
+		    void waitWithPartialSpinning()
+		    {
+		        ssize_t oldCount;
+		        // Is there a better way to set the initial spin count?
+		        // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
+		        // as threads start hitting the kernel semaphore.
+		        int spin = 10000;
+		        while (--spin >= 0)
+		        {
+		            oldCount = m_count.load(std::memory_order_relaxed);
+		            if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+		                return;
+		            std::atomic_signal_fence(std::memory_order_acquire);     // Prevent the compiler from collapsing the loop.
+		        }
+		        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+		        if (oldCount <= 0)
+		        {
+		            m_sema.wait();
+		        }
+		    }
+
+		    ssize_t waitManyWithPartialSpinning(ssize_t max)
+		    {
+		    	assert(max > 0);
+		        ssize_t oldCount;
+		        int spin = 10000;
+		        while (--spin >= 0)
+		        {
+		            oldCount = m_count.load(std::memory_order_relaxed);
+		            if (oldCount > 0)
+	            	{
+	            		ssize_t newCount = oldCount > max ? oldCount - max : 0;
+			        	if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+			        		return oldCount - newCount;
+		            }
+		            std::atomic_signal_fence(std::memory_order_acquire);
+		        }
+		        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+		        if (oldCount <= 0)
+		            m_sema.wait();
+		        if (max > 1)
+		        	return 1 + tryWaitMany(max - 1);
+		        return 1;
+		    }
+
+		public:
+		    LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
+		    {
+		        assert(initialCount >= 0);
+		    }
+
+		    bool tryWait()
+		    {
+		        ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+		        while (oldCount > 0)
+		        {
+		        	if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+		        		return true;
+		        }
+		        return false;
+		    }
+
+		    void wait()
+		    {
+		        if (!tryWait())
+		            waitWithPartialSpinning();
+		    }
+
+		    // Acquires between 0 and (greedily) max, inclusive
+		    ssize_t tryWaitMany(ssize_t max)
+		    {
+		    	assert(max >= 0);
+		    	ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+		        while (oldCount > 0)
+		        {
+		        	ssize_t newCount = oldCount > max ? oldCount - max : 0;
+		        	if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+		        		return oldCount - newCount;
+		        }
+		        return 0;
+		    }
+
+		    // Acquires at least one, and (greedily) at most max
+		    ssize_t waitMany(ssize_t max)
+		    {
+		    	assert(max >= 0);
+		    	ssize_t result = tryWaitMany(max);
+		    	if (result == 0 && max > 0)
+		            result = waitManyWithPartialSpinning(max);
+		        return result;
+		    }
+
+		    void signal(ssize_t count = 1)
+		    {
+		    	assert(count >= 0);
+		        ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+		        ssize_t toRelease = -oldCount < count ? -oldCount : count;
+		        if (toRelease > 0)
+		        {
+		            m_sema.signal((int)toRelease);
+		        }
+		    }
+		    
+		    ssize_t availableApprox() const
+		    {
+		    	ssize_t count = m_count.load(std::memory_order_relaxed);
+		    	return count > 0 ? count : 0;
+		    }
+		};
+	}	// end namespace mpmc_sema
+}	// end namespace details
+
+
+// This is a blocking version of the queue. It has an almost identical interface to
+// the normal non-blocking version, with the addition of various wait_dequeue() methods
+// and the removal of producer-specific dequeue methods.
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class BlockingConcurrentQueue
+{
+private:
+	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
+	typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore;
+
+public:
+	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
+	typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
+	
+	typedef typename ConcurrentQueue::index_t index_t;
+	typedef typename ConcurrentQueue::size_t size_t;
+	typedef typename std::make_signed<size_t>::type ssize_t;
+	
+	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
+	static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
+	
+public:
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+		: inner(capacity), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	{
+		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+		if (!sema) {
+			MOODYCAMEL_THROW(std::bad_alloc());
+		}
+	}
+	
+	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	{
+		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+		if (!sema) {
+			MOODYCAMEL_THROW(std::bad_alloc());
+		}
+	}
+	
+	// Disable copying and copy assignment
+	BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: inner(std::move(other.inner)), sema(std::move(other.sema))
+	{ }
+	
+	inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+	
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
+	
+private:
+	BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+		
+		inner.swap(other.inner);
+		sema.swap(other.sema);
+		return *this;
+	}
+	
+public:
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		if (details::likely(inner.enqueue(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		if (details::likely(inner.enqueue(std::move(item)))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		if (details::likely(inner.enqueue(token, item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		if (details::likely(inner.enqueue(token, std::move(item)))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (details::likely(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		if (details::likely(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		if (inner.try_enqueue(item)) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		if (inner.try_enqueue(std::move(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		if (inner.try_enqueue(token, item)) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		if (inner.try_enqueue(token, std::move(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue(U& item)
+	{
+		if (sema->tryWait()) {
+			while (!inner.try_dequeue(item)) {
+				continue;
+			}
+			return true;
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		if (sema->tryWait()) {
+			while (!inner.try_dequeue(token, item)) {
+				continue;
+			}
+			return true;
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	
+	
+	// Blocks the current thread until there's something to dequeue, then
+	// dequeues it.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline void wait_dequeue(U& item)
+	{
+		sema->wait();
+		while (!inner.try_dequeue(item)) {
+			continue;
+		}
+	}
+	
+	// Blocks the current thread until there's something to dequeue, then
+	// dequeues it using an explicit consumer token.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline void wait_dequeue(consumer_token_t& token, U& item)
+	{
+		sema->wait();
+		while (!inner.try_dequeue(token, item)) {
+			continue;
+		}
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which will
+	// always be at least one (this method blocks until the queue
+	// is non-empty) and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which will
+	// always be at least one (this method blocks until the queue
+	// is non-empty) and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	inline size_t size_approx() const
+	{
+		return (size_t)sema->availableApprox();
+	}
+	
+	
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static bool is_lock_free()
+	{
+		return ConcurrentQueue::is_lock_free();
+	}
+	
+
+private:
+	template<typename U>
+	static inline U* create()
+	{
+		auto p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U : nullptr;
+	}
+	
+	template<typename U, typename A1>
+	static inline U* create(A1&& a1)
+	{
+		auto p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+	}
+	
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr) {
+			p->~U();
+		}
+		(Traits::free)(p);
+	}
+	
+private:
+	ConcurrentQueue inner;
+	std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
+};
+
+
+template<typename T, typename Traits>
+inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+}	// end namespace moodycamel
diff --git a/include/concurrentqueue.h b/include/concurrentqueue.h
index a9f89a8..6983974 100644
--- a/include/concurrentqueue.h
+++ b/include/concurrentqueue.h
@@ -4,9 +4,8 @@
 // The full design is also described in excruciating detail at:
 //    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
 
-
 // Simplified BSD license:
-// Copyright (c) 2013-2014, Cameron Desrochers.
+// Copyright (c) 2013-2015, Cameron Desrochers.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -31,7 +30,31 @@
 
 #pragma once
 
+#if defined(__GNUC__)
+// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
+// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
+// upon assigning any computed values)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+
+#ifdef MCDBGQ_USE_RELACY
+#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
+#endif
+#endif
+
+#ifdef MCDBGQ_USE_RELACY
+#include "relacy/relacy_std.hpp"
+#include "relacy_shims.h"
+// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
+// We'll override the default trait malloc ourselves without a macro.
+#undef new
+#undef delete
+#undef malloc
+#undef free
+#else
 #include <atomic>		// Requires C++11. Sorry VS2010.
+#include <cassert>
+#endif
 #include <cstdint>
 #include <cstdlib>
 #include <type_traits>
@@ -39,12 +62,18 @@
 #include <utility>
 #include <limits>
 #include <climits>		// for CHAR_BIT
-#include <cassert>
 #include <array>
 #include <thread>		// for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
 
 // Platform-specific definitions of a numeric thread ID type and an invalid value
-#if defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
+#if defined(MCDBGQ_USE_RELACY)
+namespace moodycamel { namespace details {
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
+	static inline thread_id_t thread_id() { return rl::thread_index(); }
+} }
+#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
 // No sense pulling in windows.h in a header, we'll manually declare the function
 // we use and rely on backwards-compatibility for this not to break
 extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
@@ -82,10 +111,12 @@ namespace moodycamel { namespace details {
 #define MOODYCAMEL_TRY try
 #define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
 #define MOODYCAMEL_RETHROW throw
+#define MOODYCAMEL_THROW(expr) throw (expr)
 #else
 #define MOODYCAMEL_TRY if (true)
 #define MOODYCAMEL_CATCH(...) else if (false)
 #define MOODYCAMEL_RETHROW
+#define MOODYCAMEL_THROW(expr)
 #endif
 #endif
 
@@ -94,6 +125,12 @@ namespace moodycamel { namespace details {
 #define MOODYCAMEL_NOEXCEPT
 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
 #define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
+// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
+// We have to assume *all* non-trivial constructors may throw on VS2012!
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
 #elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
 #define MOODYCAMEL_NOEXCEPT _NOEXCEPT
 #define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
@@ -106,16 +143,27 @@ namespace moodycamel { namespace details {
 #endif
 
 #ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
-// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
-// g++ <=4.7 doesn't support thread_local either
-#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-// Assume `thread_local` is fully supported in all other C++11 compilers/runtimes
+#ifdef MCDBGQ_USE_RELACY
 #define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#else
+//// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+//// g++ <=4.7 doesn't support thread_local either
+//#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+//// Assume `thread_local` is fully supported in all other C++11 compilers/runtimes
+//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+//#endif
 #endif
 #endif
 
-// rob-p test
-#undef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+// VS2012 doesn't support deleted functions. 
+// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
+#ifndef MOODYCAMEL_DELETE_FUNCTION
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#define MOODYCAMEL_DELETE_FUNCTION
+#else
+#define MOODYCAMEL_DELETE_FUNCTION = delete
+#endif
+#endif
 
 // Compiler-specific likely/unlikely hints
 namespace moodycamel { namespace details {
@@ -128,19 +176,10 @@ namespace moodycamel { namespace details {
 #endif
 } }
 
-
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 #include "internal/concurrentqueue_internal_debug.h"
 #endif
 
-#if defined(__GNUC__)
-// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
-// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
-// upon assigning any computed values)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-
 namespace moodycamel {
 namespace details {
 	template<typename T>
@@ -152,7 +191,6 @@ namespace details {
 	};
 }
 
-
 // Default traits for the ConcurrentQueue. To change some of the
 // traits without re-implementing all of them, inherit from this
 // struct and shadow the declarations you wish to be different;
@@ -163,7 +201,7 @@ struct ConcurrentQueueDefaultTraits
 {
 	// General-purpose size type. std::size_t is strongly recommended.
 	typedef std::size_t size_t;
-
+	
 	// The type used for the enqueue and dequeue indices. Must be at least as
 	// large as size_t. Should be significantly larger than the number of elements
 	// you expect to hold at once, especially if you have a high turnover rate;
@@ -175,51 +213,67 @@ struct ConcurrentQueueDefaultTraits
 	// whether the queue is lock-free with a 64-int type depends on the whether
 	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
 	typedef std::size_t index_t;
-
+	
 	// Internally, all elements are enqueued and dequeued from multi-element
 	// blocks; this is the smallest controllable unit. If you expect few elements
 	// but many producers, a smaller block size should be favoured. For few producers
 	// and/or many elements, a larger block size is preferred. A sane default
 	// is provided. Must be a power of 2.
 	static const size_t BLOCK_SIZE = 32;
-
+	
 	// For explicit producers (i.e. when using a producer token), the block is
 	// checked for being empty by iterating through a list of flags, one per element.
 	// For large block sizes, this is too inefficient, and switching to an atomic
 	// counter-based approach is faster. The switch is made for block sizes strictly
 	// larger than this threshold.
 	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
-
+	
 	// How many full blocks can be expected for a single explicit producer? This should
 	// reflect that number's maximum for optimal performance. Must be a power of 2.
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
-
+	
 	// How many full blocks can be expected for a single implicit producer? This should
 	// reflect that number's maximum for optimal performance. Must be a power of 2.
 	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
-
+	
 	// The initial size of the hash table mapping thread IDs to implicit producers.
 	// Note that the hash is resized every time it becomes half full.
 	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
 	// (using the enqueue methods without an explicit producer token) is disabled.
 	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
-
+	
 	// Controls the number of items that an explicit consumer (i.e. one with a token)
 	// must consume before it causes all consumers to rotate and move on to the next
 	// internal queue.
 	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
-
+	
 	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
 	// Enqueue operations that would cause this limit to be surpassed will fail. Note
 	// that this limit is enforced at the block level (for performance reasons), i.e.
 	// it's rounded up to the nearest block size.
 	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
-
-
+	
+	
+#ifndef MCDBGQ_USE_RELACY
 	// Memory allocation can be customized if needed.
 	// malloc should return nullptr on failure, and handle alignment like std::malloc.
+#if defined(malloc) || defined(free)
+	// Gah, this is 2015, stop defining macros that break standard code already!
+	// Work around malloc/free being special macros:
+	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
+	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
+	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
+	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
+#else
 	static inline void* malloc(size_t size) { return std::malloc(size); }
 	static inline void free(void* ptr) { return std::free(ptr); }
+#endif
+#else
+	// Debug versions when running under the Relacy race detector (ignore
+	// these in user code)
+	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
+	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
+#endif
 };
 
 
@@ -234,6 +288,7 @@ struct ProducerToken;
 struct ConsumerToken;
 
 template<typename T, typename Traits> class ConcurrentQueue;
+template<typename T, typename Traits> class BlockingConcurrentQueue;
 class ConcurrentQueueTests;
 
 
@@ -244,13 +299,13 @@ namespace details
 		ConcurrentQueueProducerTypelessBase* next;
 		std::atomic<bool> inactive;
 		ProducerToken* token;
-
+		
 		ConcurrentQueueProducerTypelessBase()
 			: inactive(false), token(nullptr)
 		{
 		}
 	};
-
+	
 	template<bool use32> struct _hash_32_or_64 {
 		static inline std::uint32_t hash(std::uint32_t h)
 		{
@@ -276,13 +331,13 @@ namespace details
 		}
 	};
 	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
-
+	
 	static inline size_t hash_thread_id(thread_id_t id)
 	{
 		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
 		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_t)>::hash(id));
 	}
-
+	
 	template<typename T>
 	static inline bool circular_less_than(T a, T b)
 	{
@@ -296,7 +351,7 @@ namespace details
 #pragma warning(pop)
 #endif
 	}
-
+	
 	template<typename U>
 	static inline char* align_for(char* ptr)
 	{
@@ -320,7 +375,7 @@ namespace details
 		++x;
 		return x;
 	}
-
+	
 	template<typename T>
 	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
 	{
@@ -328,13 +383,13 @@ namespace details
 		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
 		right.store(std::move(temp), std::memory_order_relaxed);
 	}
-
+	
 	template<typename T>
 	static inline T const& nomove(T const& x)
 	{
 		return x;
 	}
-
+	
 	template<bool Enable>
 	struct nomove_if
 	{
@@ -344,7 +399,7 @@ namespace details
 			return x;
 		}
 	};
-
+	
 	template<>
 	struct nomove_if<false>
 	{
@@ -355,30 +410,34 @@ namespace details
 			return std::forward<U>(x);
 		}
 	};
-
+	
 	template<typename It>
 	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
 	{
 		return *it;
 	}
-
+	
 #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
 	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
 #else
 	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
 #endif
-
+	
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+	typedef RelacyThreadExitListener ThreadExitListener;
+	typedef RelacyThreadExitNotifier ThreadExitNotifier;
+#else
 	struct ThreadExitListener
 	{
 		typedef void (*callback_t)(void*);
 		callback_t callback;
 		void* userData;
-
+		
 		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
 	};
-
-
+	
+	
 	class ThreadExitNotifier
 	{
 	public:
@@ -388,7 +447,7 @@ namespace details
 			listener->next = tlsInst.tail;
 			tlsInst.tail = listener;
 		}
-
+		
 		static void unsubscribe(ThreadExitListener* listener)
 		{
 			auto& tlsInst = instance();
@@ -401,12 +460,12 @@ namespace details
 				prev = &ptr->next;
 			}
 		}
-
+		
 	private:
 		ThreadExitNotifier() : tail(nullptr) { }
-		ThreadExitNotifier(ThreadExitNotifier const&) = delete;
-		ThreadExitNotifier& operator=(ThreadExitNotifier const&) = delete;
-
+		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		
 		~ThreadExitNotifier()
 		{
 			// This thread is about to exit, let everyone know!
@@ -415,19 +474,20 @@ namespace details
 				ptr->callback(ptr->userData);
 			}
 		}
-
+		
 		// Thread-local
 		static inline ThreadExitNotifier& instance()
 		{
 			static thread_local ThreadExitNotifier notifier;
 			return notifier;
 		}
-
+		
 	private:
 		ThreadExitListener* tail;
 	};
-	#endif
-
+#endif
+#endif
+	
 	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
 	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
 	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
@@ -444,8 +504,11 @@ struct ProducerToken
 {
 	template<typename T, typename Traits>
 	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
-
-	explicit ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+	
+	template<typename T, typename Traits>
+	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
+	
+	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
 		: producer(other.producer)
 	{
 		other.producer = nullptr;
@@ -453,13 +516,13 @@ struct ProducerToken
 			producer->token = this;
 		}
 	}
-
+	
 	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
 	{
 		swap(other);
 		return *this;
 	}
-
+	
 	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
 	{
 		std::swap(producer, other.producer);
@@ -470,7 +533,7 @@ struct ProducerToken
 			other.producer->token = &other;
 		}
 	}
-
+	
 	// A token is always valid unless:
 	//     1) Memory allocation failed during construction
 	//     2) It was moved via the move constructor
@@ -480,7 +543,7 @@ struct ProducerToken
 	// that the token is valid for use with a specific queue,
 	// but not which one; that's up to the user to track.
 	inline bool valid() const { return producer != nullptr; }
-
+	
 	~ProducerToken()
 	{
 		if (producer != nullptr) {
@@ -488,15 +551,15 @@ struct ProducerToken
 			producer->inactive.store(true, std::memory_order_release);
 		}
 	}
-
+	
 	// Disable copying and assignment
-	ProducerToken(ProducerToken const&) = delete;
-	ProducerToken& operator=(ProducerToken const&) = delete;
-
+	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	
 private:
 	template<typename T, typename Traits> friend class ConcurrentQueue;
 	friend class ConcurrentQueueTests;
-
+	
 protected:
 	details::ConcurrentQueueProducerTypelessBase* producer;
 };
@@ -506,18 +569,21 @@ struct ConsumerToken
 {
 	template<typename T, typename Traits>
 	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
-
-	explicit ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+	
+	template<typename T, typename Traits>
+	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
+	
+	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
 		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
 	{
 	}
-
+	
 	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
 	{
 		swap(other);
 		return *this;
 	}
-
+	
 	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
 	{
 		std::swap(initialOffset, other.initialOffset);
@@ -526,15 +592,15 @@ struct ConsumerToken
 		std::swap(currentProducer, other.currentProducer);
 		std::swap(desiredProducer, other.desiredProducer);
 	}
-
+	
 	// Disable copying and assignment
-	ConsumerToken(ConsumerToken const&) = delete;
-	ConsumerToken& operator=(ConsumerToken const&) = delete;
+	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
 
 private:
 	template<typename T, typename Traits> friend class ConcurrentQueue;
 	friend class ConcurrentQueueTests;
-
+	
 private: // but shared with ConcurrentQueue
 	std::uint32_t initialOffset;
 	std::uint32_t lastKnownGlobalOffset;
@@ -555,10 +621,10 @@ class ConcurrentQueue
 public:
 	typedef ::moodycamel::ProducerToken producer_token_t;
 	typedef ::moodycamel::ConsumerToken consumer_token_t;
-
+	
 	typedef typename Traits::index_t index_t;
 	typedef typename Traits::size_t size_t;
-
+	
 	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
 	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
@@ -603,11 +669,41 @@ public:
 		nextExplicitConsumerId(0),
 		globalExplicitConsumerOffset(0)
 	{
-		implicitProducerHashResizeInProgress.clear();
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 		populate_initial_implicit_producer_hash();
 		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		// Track all the producers using a fully-resolved typed list for
+		// each kind; this makes it possible to debug them starting from
+		// the root queue object (otherwise wacky casts are needed that
+		// don't compile in the debugger's expression evaluator).
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
 	}
-
+	
+	// Computes the correct amount of pre-allocated blocks for you based
+	// on the minimum number of elements you want available at any given
+	// time, and the maximum concurrent number of each type of producer.
+	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		size_t blocks = ((((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers)) * BLOCK_SIZE;
+		populate_initial_block_list(blocks);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
 	// Note: The queue should not be accessed concurrently while it's
 	// being deleted. It's up to the user to synchronize this.
 	// This method is not thread safe.
@@ -623,7 +719,7 @@ public:
 			destroy(ptr);
 			ptr = next;
 		}
-
+		
 		// Destroy implicit producer hash tables
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
 			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
@@ -634,12 +730,12 @@ public:
 						hash->entries[i].~ImplicitProducerKVP();
 					}
 					hash->~ImplicitProducerHash();
-					Traits::free(hash);
+					(Traits::free)(hash);
 				}
 				hash = prev;
 			}
 		}
-
+		
 		// Destroy global free list
 		auto block = freeList.head_unsafe();
 		while (block != nullptr) {
@@ -649,15 +745,15 @@ public:
 			}
 			block = next;
 		}
-
+		
 		// Destroy initial free list
 		destroy_array(initialBlockPool, initialBlockPoolSize);
 	}
 
 	// Disable copying and copy assignment
-	ConcurrentQueue(ConcurrentQueue const&) = delete;
-	ConcurrentQueue& operator=(ConcurrentQueue const&) = delete;
-
+	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	
 	// Moving is supported, but note that it is *not* a thread-safe operation.
 	// Nobody can use the queue while it's being moved, and the memory effects
 	// of that move must be propagated to other threads before they can use it.
@@ -675,27 +771,34 @@ public:
 		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
 	{
 		// Move the other one into this, and leave the other one as an empty queue
-		implicitProducerHashResizeInProgress.clear();
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 		populate_initial_implicit_producer_hash();
 		swap_implicit_producer_hashes(other);
-
+		
 		other.producerListTail.store(nullptr, std::memory_order_relaxed);
 		other.producerCount.store(0, std::memory_order_relaxed);
 		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
 		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
-
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+		
 		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
 		other.initialBlockPoolSize = 0;
 		other.initialBlockPool = nullptr;
-
+		
 		reown_producers();
 	}
-
+	
 	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
 	{
 		return swap_internal(other);
 	}
-
+	
 	// Swaps this queue's state with the other's. Not thread-safe.
 	// Swapping two queues does not invalidate their tokens, however
 	// the tokens that were created for one queue must be used with
@@ -705,14 +808,14 @@ public:
 	{
 		swap_internal(other);
 	}
-
+	
 private:
 	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
 	{
 		if (this == &other) {
 			return *this;
 		}
-
+		
 		details::swap_relaxed(producerListTail, other.producerListTail);
 		details::swap_relaxed(producerCount, other.producerCount);
 		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
@@ -721,15 +824,20 @@ private:
 		freeList.swap(other.freeList);
 		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
 		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
-
+		
 		swap_implicit_producer_hashes(other);
-
+		
 		reown_producers();
 		other.reown_producers();
-
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		details::swap_relaxed(explicitProducers, other.explicitProducers);
+		details::swap_relaxed(implicitProducers, other.implicitProducers);
+#endif
+		
 		return *this;
 	}
-
+	
 public:
 	// Enqueues a single item (by copying it).
 	// Allocates memory if required. Only fails if memory allocation fails (or implicit
@@ -741,7 +849,7 @@ public:
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
 		return inner_enqueue<CanAlloc>(item);
 	}
-
+	
 	// Enqueues a single item (by moving it, if possible).
 	// Allocates memory if required. Only fails if memory allocation fails (or implicit
 	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
@@ -752,7 +860,7 @@ public:
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
 		return inner_enqueue<CanAlloc>(std::move(item));
 	}
-
+	
 	// Enqueues a single item (by copying it) using an explicit producer token.
 	// Allocates memory if required. Only fails if memory allocation fails (or
 	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -761,7 +869,7 @@ public:
 	{
 		return inner_enqueue<CanAlloc>(token, item);
 	}
-
+	
 	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
 	// Allocates memory if required. Only fails if memory allocation fails (or
 	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -770,7 +878,7 @@ public:
 	{
 		return inner_enqueue<CanAlloc>(token, std::move(item));
 	}
-
+	
 	// Enqueues several items.
 	// Allocates memory if required. Only fails if memory allocation fails (or
 	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
@@ -783,7 +891,7 @@ public:
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
 		return inner_enqueue_bulk<CanAlloc>(std::forward<It>(itemFirst), count);
 	}
-
+	
 	// Enqueues several items using an explicit producer token.
 	// Allocates memory if required. Only fails if memory allocation fails
 	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -795,7 +903,7 @@ public:
 	{
 		return inner_enqueue_bulk<CanAlloc>(token, std::forward<It>(itemFirst), count);
 	}
-
+	
 	// Enqueues a single item (by copying it).
 	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
 	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
@@ -806,7 +914,7 @@ public:
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
 		return inner_enqueue<CannotAlloc>(item);
 	}
-
+	
 	// Enqueues a single item (by moving it, if possible).
 	// Does not allocate memory (except for one-time implicit producer).
 	// Fails if not enough room to enqueue (or implicit production is
@@ -817,7 +925,7 @@ public:
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
 		return inner_enqueue<CannotAlloc>(std::move(item));
 	}
-
+	
 	// Enqueues a single item (by copying it) using an explicit producer token.
 	// Does not allocate memory. Fails if not enough room to enqueue.
 	// Thread-safe.
@@ -825,7 +933,7 @@ public:
 	{
 		return inner_enqueue<CannotAlloc>(token, item);
 	}
-
+	
 	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
 	// Does not allocate memory. Fails if not enough room to enqueue.
 	// Thread-safe.
@@ -833,7 +941,7 @@ public:
 	{
 		return inner_enqueue<CannotAlloc>(token, std::move(item));
 	}
-
+	
 	// Enqueues several items.
 	// Does not allocate memory (except for one-time implicit producer).
 	// Fails if not enough room to enqueue (or implicit production is
@@ -847,7 +955,7 @@ public:
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
 		return inner_enqueue_bulk<CannotAlloc>(std::forward<It>(itemFirst), count);
 	}
-
+	
 	// Enqueues several items using an explicit producer token.
 	// Does not allocate memory. Fails if not enough room to enqueue.
 	// Note: Use std::make_move_iterator if the elements should be moved
@@ -858,9 +966,9 @@ public:
 	{
 		return inner_enqueue_bulk<CannotAlloc>(token, std::forward<It>(itemFirst), count);
 	}
-
-
-
+	
+	
+	
 	// Attempts to dequeue from the queue.
 	// Returns false if all producer streams appeared empty at the time they
 	// were checked (so, the queue is likely but not guaranteed to be empty).
@@ -883,7 +991,7 @@ public:
 				++nonEmptyCount;
 			}
 		}
-
+		
 		// If there was at least one non-empty queue but it appears empty at the time
 		// we try to dequeue from it, we need to make sure every queue's been tried
 		if (nonEmptyCount > 0) {
@@ -898,7 +1006,7 @@ public:
 		}
 		return false;
 	}
-
+	
 	// Attempts to dequeue from the queue.
 	// Returns false if all producer streams appeared empty at the time they
 	// were checked (so, the queue is likely but not guaranteed to be empty).
@@ -906,7 +1014,7 @@ public:
 	// not attempt to reduce contention by interleaving the order that producer
 	// streams are dequeued from. So, using this method can reduce overall throughput
 	// under contention, but will give more predictable results in single-threaded
-	// consumer scenarios.
+	// consumer scenarios. This is mostly only useful for internal unit tests.
 	// Never allocates. Thread-safe.
 	template<typename U>
 	bool try_dequeue_non_interleaved(U& item)
@@ -918,7 +1026,7 @@ public:
 		}
 		return false;
 	}
-
+	
 	// Attempts to dequeue from the queue using an explicit consumer token.
 	// Returns false if all producer streams appeared empty at the time they
 	// were checked (so, the queue is likely but not guaranteed to be empty).
@@ -931,13 +1039,13 @@ public:
 		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
 		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
 		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
-
+		
 		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
 			if (!update_current_producer_after_rotation(token)) {
 				return false;
 			}
 		}
-
+		
 		// If there was at least one non-empty queue but it appears empty at the time
 		// we try to dequeue from it, we need to make sure every queue's been tried
 		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
@@ -946,7 +1054,7 @@ public:
 			}
 			return true;
 		}
-
+		
 		auto tail = producerListTail.load(std::memory_order_acquire);
 		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
 		if (ptr == nullptr) {
@@ -965,7 +1073,7 @@ public:
 		}
 		return false;
 	}
-
+	
 	// Attempts to dequeue several elements from the queue.
 	// Returns the number of items actually dequeued.
 	// Returns 0 if all producer streams appeared empty at the time they
@@ -983,7 +1091,7 @@ public:
 		}
 		return count;
 	}
-
+	
 	// Attempts to dequeue several elements from the queue using an explicit consumer token.
 	// Returns the number of items actually dequeued.
 	// Returns 0 if all producer streams appeared empty at the time they
@@ -997,7 +1105,7 @@ public:
 				return false;
 			}
 		}
-
+		
 		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
 		if (count == max) {
 			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
@@ -1007,7 +1115,7 @@ public:
 		}
 		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
 		max -= count;
-
+		
 		auto tail = producerListTail.load(std::memory_order_acquire);
 		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
 		if (ptr == nullptr) {
@@ -1031,9 +1139,9 @@ public:
 		}
 		return count;
 	}
-
-
-
+	
+	
+	
 	// Attempts to dequeue from a specific producer's inner queue.
 	// If you happen to know which producer you want to dequeue from, this
 	// is significantly faster than using the general-case try_dequeue methods.
@@ -1045,7 +1153,7 @@ public:
 	{
 		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
 	}
-
+	
 	// Attempts to dequeue several elements from a specific producer's inner queue.
 	// Returns the number of items actually dequeued.
 	// If you happen to know which producer you want to dequeue from, this
@@ -1058,8 +1166,8 @@ public:
 	{
 		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
 	}
-
-
+	
+	
 	// Returns an estimate of the total number of elements currently in the queue. This
 	// estimate is only accurate if the queue has completely stabilized before it is called
 	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
@@ -1074,8 +1182,8 @@ public:
 		}
 		return size;
 	}
-
-
+	
+	
 	// Returns true if the underlying atomic variables used by
 	// the queue are lock-free (they should be on most platforms).
 	// Thread-safe.
@@ -1096,40 +1204,40 @@ private:
 	friend struct ConsumerToken;
 	friend struct ExplicitProducer;
 	friend class ConcurrentQueueTests;
-
+		
 	enum AllocationMode { CanAlloc, CannotAlloc };
-
-
+	
+	
 	///////////////////////////////
 	// Queue methods
 	///////////////////////////////
-
+	
 	template<AllocationMode canAlloc, typename U>
 	inline bool inner_enqueue(producer_token_t const& token, U&& element)
 	{
 		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
 	}
-
+	
 	template<AllocationMode canAlloc, typename U>
 	inline bool inner_enqueue(U&& element)
 	{
 		auto producer = get_or_add_implicit_producer();
 		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
 	}
-
+	
 	template<AllocationMode canAlloc, typename It>
 	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
 	{
 		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(std::forward<It>(itemFirst), count);
 	}
-
+	
 	template<AllocationMode canAlloc, typename It>
 	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
 	{
 		auto producer = get_or_add_implicit_producer();
 		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(std::forward<It>(itemFirst), count);
 	}
-
+	
 	inline bool update_current_producer_after_rotation(consumer_token_t& token)
 	{
 		// Ah, there's been a rotation, figure out where we should be!
@@ -1152,7 +1260,7 @@ private:
 				}
 			}
 		}
-
+		
 		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
 		if (delta >= prodCount) {
 			delta = delta % prodCount;
@@ -1163,27 +1271,27 @@ private:
 				token.desiredProducer = tail;
 			}
 		}
-
+		
 		token.lastKnownGlobalOffset = globalOffset;
 		token.currentProducer = token.desiredProducer;
 		token.itemsConsumedFromCurrent = 0;
 		return true;
 	}
-
-
+	
+	
 	///////////////////////////
 	// Free list
 	///////////////////////////
-
+	
 	template <typename N>
 	struct FreeListNode
 	{
 		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
-
+		
 		std::atomic<std::uint32_t> freeListRefs;
 		std::atomic<N*> freeListNext;
 	};
-
+	
 	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
 	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
 	// speedy under low contention.
@@ -1193,15 +1301,15 @@ private:
 		FreeList() : freeListHead(nullptr) { }
 		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
 		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
-
-		FreeList(FreeList const&) = delete;
-		FreeList& operator=(FreeList const&) = delete;
-
+		
+		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		
 		inline void add(N* node)
 		{
 #if MCDBGQ_NOLOCKFREE_FREELIST
 			debug::DebugLock lock(mutex);
-#endif
+#endif		
 			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
 			// set it using a fetch_add
 			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
@@ -1210,12 +1318,12 @@ private:
 		 		add_knowing_refcount_is_zero(node);
 			}
 		}
-
+		
 		inline N* try_get()
 		{
 #if MCDBGQ_NOLOCKFREE_FREELIST
 			debug::DebugLock lock(mutex);
-#endif
+#endif		
 			auto head = freeListHead.load(std::memory_order_acquire);
 			while (head != nullptr) {
 				auto prevHead = head;
@@ -1224,7 +1332,7 @@ private:
 					head = freeListHead.load(std::memory_order_acquire);
 					continue;
 				}
-
+				
 				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
 				// next and not worry about it changing between now and the time we do the CAS
 				auto next = head->freeListNext.load(std::memory_order_relaxed);
@@ -1232,12 +1340,12 @@ private:
 					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
 					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
 					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
-
+					
 					// Decrease refcount twice, once for our ref, and once for the list's ref
 					head->freeListRefs.fetch_add(-2, std::memory_order_release);
 					return head;
 				}
-
+				
 				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
 				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
 				// count decrement happens-after the CAS on the head.
@@ -1246,13 +1354,13 @@ private:
 					add_knowing_refcount_is_zero(prevHead);
 				}
 			}
-
+			
 			return nullptr;
 		}
-
+		
 		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
 		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
-
+		
 	private:
 		inline void add_knowing_refcount_is_zero(N* node)
 		{
@@ -1277,26 +1385,26 @@ private:
 				return;
 			}
 		}
-
+		
 	private:
 		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
 		std::atomic<N*> freeListHead;
-
+	
 	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
 	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
-
+		
 #if MCDBGQ_NOLOCKFREE_FREELIST
 		debug::DebugMutex mutex;
 #endif
 	};
-
-
+	
+	
 	///////////////////////////
 	// Block
 	///////////////////////////
-
+	
 	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
-
+	
 	struct Block
 	{
 		Block()
@@ -1306,7 +1414,7 @@ private:
 			owner = nullptr;
 #endif
 		}
-
+		
 		template<InnerQueueContext context>
 		inline bool is_empty() const
 		{
@@ -1317,7 +1425,7 @@ private:
 						return false;
 					}
 				}
-
+				
 				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
 				std::atomic_thread_fence(std::memory_order_acquire);
 				return true;
@@ -1332,7 +1440,7 @@ private:
 				return false;
 			}
 		}
-
+		
 		// Returns true if the block is now empty (does not apply in explicit context)
 		template<InnerQueueContext context>
 		inline bool set_empty(index_t i)
@@ -1350,7 +1458,7 @@ private:
 				return prevVal == BLOCK_SIZE - 1;
 			}
 		}
-
+		
 		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
 		// Returns true if the block is now empty (does not apply in explicit context).
 		template<InnerQueueContext context>
@@ -1373,7 +1481,7 @@ private:
 				return prevVal + count == BLOCK_SIZE;
 			}
 		}
-
+		
 		template<InnerQueueContext context>
 		inline void set_all_empty()
 		{
@@ -1388,7 +1496,7 @@ private:
 				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
 			}
 		}
-
+		
 		template<InnerQueueContext context>
 		inline void reset_empty()
 		{
@@ -1403,10 +1511,10 @@ private:
 				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
 			}
 		}
-
+		
 		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return reinterpret_cast<T*>(elements) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
-		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return reinterpret_cast<T*>(elements) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
-
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return reinterpret_cast<T const*>(elements) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		
 	public:
 		Block* next;
 		std::atomic<size_t> elementsCompletelyDequeued;
@@ -1418,7 +1526,7 @@ private:
 		std::atomic<Block*> freeListNext;
 		std::atomic<bool> shouldBeOnFreeList;
 		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
-
+		
 #if MCDBGQ_TRACKMEM
 		void* owner;
 #endif
@@ -1430,11 +1538,11 @@ public:
 	struct MemStats;
 private:
 #endif
-
+	
 	///////////////////////////
 	// Producer base
 	///////////////////////////
-
+	
 	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
 	{
 		ProducerBase(ConcurrentQueue* parent, bool isExplicit) :
@@ -1447,9 +1555,9 @@ private:
 			parent(parent)
 		{
 		}
-
+		
 		virtual ~ProducerBase() { };
-
+		
 		template<typename U>
 		inline bool dequeue(U& element)
 		{
@@ -1460,7 +1568,7 @@ private:
 				return static_cast<ImplicitProducer*>(this)->dequeue(element);
 			}
 		}
-
+		
 		template<typename It>
 		inline size_t dequeue_bulk(It& itemFirst, size_t max)
 		{
@@ -1471,41 +1579,41 @@ private:
 				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
 			}
 		}
-
+		
 		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
-
+		
 		inline size_t size_approx() const
 		{
 			auto tail = tailIndex.load(std::memory_order_relaxed);
 			auto head = headIndex.load(std::memory_order_relaxed);
 			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
 		}
-
+		
 		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
 	protected:
 		std::atomic<index_t> tailIndex;		// Where to enqueue to next
 		std::atomic<index_t> headIndex;		// Where to dequeue from next
-
+		
 		std::atomic<index_t> dequeueOptimisticCount;
 		std::atomic<index_t> dequeueOvercommit;
-
+		
 		Block* tailBlock;
-
+		
 	public:
 		bool isExplicit;
 		ConcurrentQueue* parent;
-
+		
 	protected:
 #if MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
-
-
+	
+	
 	///////////////////////////
 	// Explicit queue
 	///////////////////////////
-
+		
 	struct ExplicitProducer : public ProducerBase
 	{
 		explicit ExplicitProducer(ConcurrentQueue* parent) :
@@ -1521,10 +1629,10 @@ private:
 			if (poolBasedIndexSize > pr_blockIndexSize) {
 				pr_blockIndexSize = poolBasedIndexSize;
 			}
-
+			
 			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
 		}
-
+		
 		~ExplicitProducer()
 		{
 			// Destruct any elements not yet dequeued.
@@ -1533,15 +1641,17 @@ private:
 			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
 				// First find the block that's partially dequeued, if any
 				Block* halfDequeuedBlock = nullptr;
-				if (this->headIndex.load(std::memory_order_relaxed) != this->tailIndex.load(std::memory_order_relaxed) && (this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
 					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
+					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
 					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
 					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
 						i = (i + 1) & (pr_blockIndexSize - 1);
 					}
+					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
 					halfDequeuedBlock = pr_blockIndexEntries[i].block;
 				}
-
+				
 				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
 				auto block = this->tailBlock;
 				do {
@@ -1549,12 +1659,12 @@ private:
 					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
 						continue;
 					}
-
+					
 					size_t i = 0;	// Offset into block
 					if (block == halfDequeuedBlock) {
 						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
 					}
-
+					
 					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
 					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
 					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
@@ -1562,7 +1672,7 @@ private:
 					}
 				} while (block != this->tailBlock);
 			}
-
+			
 			// Destroy all blocks that we own
 			if (this->tailBlock != nullptr) {
 				auto block = this->tailBlock;
@@ -1574,17 +1684,17 @@ private:
 					block = next;
 				} while (block != this->tailBlock);
 			}
-
+			
 			// Destroy the block indices
 			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
 			while (header != nullptr) {
 				auto prev = static_cast<BlockIndexHeader*>(header->prev);
 				header->~BlockIndexHeader();
-				Traits::free(header);
+				(Traits::free)(header);
 				header = prev;
 			}
 		}
-
+		
 		template<AllocationMode allocMode, typename U>
 		inline bool enqueue(U&& element)
 		{
@@ -1595,10 +1705,10 @@ private:
 				auto startBlock = this->tailBlock;
 				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
 				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
-					// We can re-use the block ahead of us, it's empty!
+					// We can re-use the block ahead of us, it's empty!					
 					this->tailBlock = this->tailBlock->next;
 					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
-
+					
 					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
 					// last block from it first -- except instead of removing then adding, we can just overwrite).
 					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
@@ -1623,12 +1733,12 @@ private:
 						// Hmm, the circular block index is already full -- we'll need
 						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
 						// the initial allocation failed in the constructor.
-
+						
 						if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) {
 							return false;
 						}
 					}
-
+					
 					// Insert a new block in the circular linked list
 					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
 					if (newBlock == nullptr) {
@@ -1648,7 +1758,7 @@ private:
 					this->tailBlock = newBlock;
 					++pr_blockIndexSlotsUsed;
 				}
-
+				
 				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
 					// The constructor may throw. We want the element not to appear in the queue in
 					// that case (without corrupting the queue):
@@ -1667,27 +1777,27 @@ private:
 					(void)startBlock;
 					(void)originalBlockIndexSlotsUsed;
 				}
-
+				
 				// Add block to block index
 				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
 				entry.base = currentTailIndex;
 				entry.block = this->tailBlock;
 				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
 				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
-
+				
 				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
 					this->tailIndex.store(newTailIndex, std::memory_order_release);
 					return true;
 				}
 			}
-
+			
 			// Enqueue
 			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-
+			
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-
+		
 		template<typename U>
 		bool dequeue(U& element)
 		{
@@ -1695,10 +1805,10 @@ private:
 			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
 			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
 				// Might be something to dequeue, let's give it a try
-
+				
 				// Note that this if is purely for performance purposes in the common case when the queue is
 				// empty and the values are eventually consistent -- we may enter here spuriously.
-
+				
 				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
 				// change them) and must be the same value at this point (inside the if) as when the if condition was
 				// evaluated.
@@ -1711,33 +1821,39 @@ private:
 				// unfortunately that can't be shown to be correct using only the C++11 standard.
 				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
 				std::atomic_thread_fence(std::memory_order_acquire);
-
+				
 				// Increment optimistic counter, then check if it went over the boundary
 				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-
+				
 				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
 				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
 				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
 				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
 				assert(overcommit <= myDequeueCount);
-
+				
 				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
 				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
 				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				if (details::likely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
 					// Guaranteed to be at least one element to dequeue!
-
+					
 					// Get the index. Note that since there's guaranteed to be at least one element, this
-					// will never exceed tail.
-					auto index = this->headIndex.fetch_add(1, std::memory_order_relaxed);
-
-
+					// will never exceed tail. We need to do an acquire-release fence here since it's possible
+					// that whatever condition got us to this point was for an earlier enqueued element (that
+					// we already see the memory effects for), but that by the time we increment somebody else
+					// has incremented it, and we need to see the memory effects for *that* element, which is
+					// in such a case is necessarily visible on the thread that incremented it in the first
+					// place with the more current condition (they must have acquired a tail that is at least
+					// as recent).
+					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					
 					// Determine which block the element is in
-
+					
 					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
 					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
-
+					
 					// We need to be careful here about subtracting and dividing because of index wrap-around.
 					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
 					// block size (in order to get a correct signed block count offset in all cases):
@@ -1745,7 +1861,7 @@ private:
 					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
 					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
 					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
-
+					
 					// Dequeue
 					auto& el = *((*block)[index]);
 					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
@@ -1754,14 +1870,14 @@ private:
 						struct Guard {
 							Block* block;
 							index_t index;
-
+							
 							~Guard()
 							{
 								(*block)[index]->~T();
 								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
 							}
 						} guard = { block, index };
-
+						
 						element = std::move(el);
 					}
 					else {
@@ -1769,7 +1885,7 @@ private:
 						el.~T();
 						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
 					}
-
+					
 					return true;
 				}
 				else {
@@ -1777,10 +1893,10 @@ private:
 					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
 				}
 			}
-
+		
 			return false;
 		}
-
+		
 		template<AllocationMode allocMode, typename It>
 		bool enqueue_bulk(It itemFirst, size_t count)
 		{
@@ -1791,9 +1907,9 @@ private:
 			auto startBlock = this->tailBlock;
 			auto originalBlockIndexFront = pr_blockIndexFront;
 			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
-
+			
 			Block* firstAllocatedBlock = nullptr;
-
+			
 			// Figure out how many blocks we'll need to allocate, and do so
 			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
 			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
@@ -1802,21 +1918,21 @@ private:
 				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
 					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
 					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-
+					
 					this->tailBlock = this->tailBlock->next;
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-
+					
 					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
 					entry.base = currentTailIndex;
 					entry.block = this->tailBlock;
 					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
 				}
-
+				
 				// Now allocate as many blocks as necessary from the block pool
 				while (blockBaseDiff > 0) {
 					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
 					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-
+					
 					auto head = this->headIndex.load(std::memory_order_relaxed);
 					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
 					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
@@ -1828,13 +1944,13 @@ private:
 							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
 							return false;
 						}
-
+						
 						// pr_blockIndexFront is updated inside new_block_index, so we need to
 						// update our fallback value too (since we keep the new index even if we
 						// later fail)
 						originalBlockIndexFront = originalBlockIndexSlotsUsed;
 					}
-
+					
 					// Insert a new block in the circular linked list
 					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
 					if (newBlock == nullptr) {
@@ -1843,7 +1959,7 @@ private:
 						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
 						return false;
 					}
-
+					
 #if MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
@@ -1857,15 +1973,15 @@ private:
 					}
 					this->tailBlock = newBlock;
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-
+					
 					++pr_blockIndexSlotsUsed;
-
+					
 					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
 					entry.base = currentTailIndex;
 					entry.block = this->tailBlock;
 					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
 				}
-
+				
 				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
 				// publish the new block index front
 				auto block = firstAllocatedBlock;
@@ -1876,12 +1992,12 @@ private:
 					}
 					block = block->next;
 				}
-
+				
 				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
 					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
 				}
 			}
-
+			
 			// Enqueue, one block at a time
 			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
 			currentTailIndex = startTailIndex;
@@ -1922,11 +2038,11 @@ private:
 						// any allocated blocks in our linked list for later, though).
 						auto constructedStopIndex = currentTailIndex;
 						auto lastBlockEnqueued = this->tailBlock;
-
+						
 						pr_blockIndexFront = originalBlockIndexFront;
 						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
 						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
-
+						
 						if (!details::is_trivially_destructible<T>::value) {
 							auto block = startBlock;
 							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
@@ -1950,22 +2066,22 @@ private:
 						MOODYCAMEL_RETHROW;
 					}
 				}
-
+				
 				if (this->tailBlock == endBlock) {
 					assert(currentTailIndex == newTailIndex);
 					break;
 				}
 				this->tailBlock = this->tailBlock->next;
 			}
-
+			
 			if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) {
 				blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
 			}
-
+			
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-
+		
 		template<typename It>
 		size_t dequeue_bulk(It& itemFirst, size_t max)
 		{
@@ -1975,10 +2091,10 @@ private:
 			if (details::circular_less_than<size_t>(0, desiredCount)) {
 				desiredCount = desiredCount < max ? desiredCount : max;
 				std::atomic_thread_fence(std::memory_order_acquire);
-
+				
 				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
 				assert(overcommit <= myDequeueCount);
-
+				
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
 				if (details::circular_less_than<size_t>(0, actualCount)) {
@@ -1986,20 +2102,20 @@ private:
 					if (actualCount < desiredCount) {
 						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
 					}
-
+					
 					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
 					// will never exceed tail.
-					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_relaxed);
-
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
 					// Determine which block the first element is in
 					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
 					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
-
+					
 					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
 					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
 					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
 					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
-
+					
 					// Iterate the blocks and dequeue
 					auto index = firstIndex;
 					do {
@@ -2036,19 +2152,19 @@ private:
 									}
 									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
 									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
-
+									
 									firstIndexInBlock = index;
 									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 								} while (index != firstIndex + actualCount);
-
+								
 								MOODYCAMEL_RETHROW;
 							}
 						}
 						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
 						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
 					} while (index != firstIndex + actualCount);
-
+					
 					return actualCount;
 				}
 				else {
@@ -2056,17 +2172,17 @@ private:
 					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
 				}
 			}
-
+			
 			return 0;
 		}
-
+		
 	private:
 		struct BlockIndexEntry
 		{
 			index_t base;
 			Block* block;
 		};
-
+		
 		struct BlockIndexHeader
 		{
 			size_t size;
@@ -2074,22 +2190,22 @@ private:
 			BlockIndexEntry* entries;
 			void* prev;
 		};
-
-
+		
+		
 		bool new_block_index(size_t numberOfFilledSlotsToExpose)
 		{
 			auto prevBlockSizeMask = pr_blockIndexSize - 1;
-
+			
 			// Create the new block
 			pr_blockIndexSize <<= 1;
-			auto newRawPtr = static_cast<char*>(Traits::malloc(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
+			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
 			if (newRawPtr == nullptr) {
 				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
 				return false;
 			}
-
+			
 			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
-
+			
 			// Copy in all the old indices, if any
 			size_t j = 0;
 			if (pr_blockIndexSlotsUsed != 0) {
@@ -2099,44 +2215,50 @@ private:
 					i = (i + 1) & prevBlockSizeMask;
 				} while (i != pr_blockIndexFront);
 			}
-
+			
 			// Update everything
 			auto header = new (newRawPtr) BlockIndexHeader;
 			header->size = pr_blockIndexSize;
 			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
 			header->entries = newBlockIndexEntries;
 			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
-
+			
 			pr_blockIndexFront = j;
 			pr_blockIndexEntries = newBlockIndexEntries;
 			pr_blockIndexRaw = newRawPtr;
 			blockIndex.store(header, std::memory_order_release);
-
+			
 			return true;
 		}
-
+		
 	private:
 		std::atomic<BlockIndexHeader*> blockIndex;
-
+		
 		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
 		size_t pr_blockIndexSlotsUsed;
 		size_t pr_blockIndexSize;
 		size_t pr_blockIndexFront;		// Next slot (not current)
 		BlockIndexEntry* pr_blockIndexEntries;
 		void* pr_blockIndexRaw;
-
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ExplicitProducer* nextExplicitProducer;
+	private:
+#endif
+		
 #if MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
-
-
+	
+	
 	//////////////////////////////////
 	// Implicit queue
 	//////////////////////////////////
-
+	
 	struct ImplicitProducer : public ProducerBase
-	{
+	{			
 		ImplicitProducer(ConcurrentQueue* parent) :
 			ProducerBase(parent, false),
 			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
@@ -2144,21 +2266,21 @@ private:
 		{
 			new_block_index();
 		}
-
+		
 		~ImplicitProducer()
 		{
 			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
 			// completed already; this means that all undequeued elements are placed contiguously across
 			// contiguous blocks, and that only the first and last remaining blocks can be only partially
 			// empty (all other remaining blocks must be completely full).
-
+			
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 			// Unregister ourselves for thread termination notification
 			if (!this->inactive.load(std::memory_order_relaxed)) {
 				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
 			}
 #endif
-
+			
 			// Destroy all remaining elements!
 			auto tail = this->tailIndex.load(std::memory_order_relaxed);
 			auto index = this->headIndex.load(std::memory_order_relaxed);
@@ -2171,10 +2293,10 @@ private:
 						// Free the old block
 						this->parent->destroy(block);
 					}
-
+					
 					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
 				}
-
+				
 				((*block)[index])->~T();
 				++index;
 			}
@@ -2184,7 +2306,7 @@ private:
 			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) && this->tailBlock->dynamicallyAllocated) {
 				this->parent->destroy(this->tailBlock);
 			}
-
+			
 			// Destroy block index
 			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
 			if (localBlockIndex != nullptr) {
@@ -2194,12 +2316,12 @@ private:
 				do {
 					auto prev = localBlockIndex->prev;
 					localBlockIndex->~BlockIndexHeader();
-					Traits::free(localBlockIndex);
+					(Traits::free)(localBlockIndex);
 					localBlockIndex = prev;
 				} while (localBlockIndex != nullptr);
 			}
 		}
-
+		
 		template<AllocationMode allocMode, typename U>
 		inline bool enqueue(U&& element)
 		{
@@ -2220,7 +2342,7 @@ private:
 				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
 					return false;
 				}
-
+				
 				// Get ahold of a new block
 				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
 				if (newBlock == nullptr) {
@@ -2232,7 +2354,7 @@ private:
 				newBlock->owner = this;
 #endif
 				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
-
+				
 				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
 					// May throw, try to insert now before we publish the fact that we have this new block
 					MOODYCAMEL_TRY {
@@ -2245,25 +2367,25 @@ private:
 						MOODYCAMEL_RETHROW;
 					}
 				}
-
+				
 				// Insert the new block into the index
 				idxEntry->value.store(newBlock, std::memory_order_relaxed);
-
+				
 				this->tailBlock = newBlock;
-
+				
 				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
 					this->tailIndex.store(newTailIndex, std::memory_order_release);
 					return true;
 				}
 			}
-
+			
 			// Enqueue
 			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-
+			
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-
+		
 		template<typename U>
 		bool dequeue(U& element)
 		{
@@ -2272,20 +2394,20 @@ private:
 			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
 			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
 				std::atomic_thread_fence(std::memory_order_acquire);
-
+				
 				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
 				assert(overcommit <= myDequeueCount);
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				if (details::likely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
-					index_t index = this->headIndex.fetch_add(1, std::memory_order_relaxed);
-
+					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
 					// Determine which block the element is in
 					auto entry = get_block_index_entry_for_index(index);
-
+					
 					// Dequeue
 					auto block = entry->value.load(std::memory_order_relaxed);
 					auto& el = *((*block)[index]);
-
+					
 					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 						// Note: Acquiring the mutex with every dequeue instead of only when a block
@@ -2297,7 +2419,7 @@ private:
 							index_t index;
 							BlockIndexEntry* entry;
 							ConcurrentQueue* parent;
-
+							
 							~Guard()
 							{
 								(*block)[index]->~T();
@@ -2307,13 +2429,13 @@ private:
 								}
 							}
 						} guard = { block, index, entry, this->parent };
-
+						
 						element = std::move(el);
 					}
 					else {
 						element = std::move(el);
 						el.~T();
-
+					
 						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
 							{
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
@@ -2325,34 +2447,34 @@ private:
 							this->parent->add_block_to_free_list(block);		// releases the above store
 						}
 					}
-
+					
 					return true;
 				}
 				else {
 					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
 				}
 			}
-
+		
 			return false;
 		}
-
+		
 		template<AllocationMode allocMode, typename It>
 		bool enqueue_bulk(It itemFirst, size_t count)
 		{
 			// First, we need to make sure we have enough room to enqueue all of the elements;
 			// this means pre-allocating blocks and putting them in the block index (but only if
 			// all the allocations succeeded).
-
+			
 			// Note that the tailBlock we start off with may not be owned by us any more;
 			// this happens if it was filled up exactly to the top (setting tailIndex to
 			// the first index of the next block which is not yet allocated), then dequeued
 			// completely (putting it on the free list) before we enqueue again.
-
+			
 			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
 			auto startBlock = this->tailBlock;
 			Block* firstAllocatedBlock = nullptr;
 			auto endBlock = this->tailBlock;
-
+			
 			// Figure out how many blocks we'll need to allocate, and do so
 			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
 			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
@@ -2363,7 +2485,7 @@ private:
 				do {
 					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
 					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-
+					
 					// Find out where we'll be inserting this block in the block index
 					BlockIndexEntry* idxEntry;
 					Block* newBlock;
@@ -2387,19 +2509,19 @@ private:
 						}
 						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
 						this->tailBlock = startBlock;
-
+						
 						return false;
 					}
-
+					
 #if MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
 					newBlock->next = nullptr;
-
+					
 					// Insert the new block into the index
 					idxEntry->value.store(newBlock, std::memory_order_relaxed);
-
+					
 					// Store the chain of blocks so that we can undo if later allocations fail,
 					// and so that we can find the blocks when we do the actual enqueueing
 					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
@@ -2411,7 +2533,7 @@ private:
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
 				} while (blockBaseDiff > 0);
 			}
-
+			
 			// Enqueue, one block at a time
 			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
 			currentTailIndex = startTailIndex;
@@ -2441,7 +2563,7 @@ private:
 					MOODYCAMEL_CATCH (...) {
 						auto constructedStopIndex = currentTailIndex;
 						auto lastBlockEnqueued = this->tailBlock;
-
+						
 						if (!details::is_trivially_destructible<T>::value) {
 							auto block = startBlock;
 							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
@@ -2462,7 +2584,7 @@ private:
 								block = block->next;
 							}
 						}
-
+						
 						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
 							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
@@ -2475,7 +2597,7 @@ private:
 						MOODYCAMEL_RETHROW;
 					}
 				}
-
+				
 				if (this->tailBlock == endBlock) {
 					assert(currentTailIndex == newTailIndex);
 					break;
@@ -2485,7 +2607,7 @@ private:
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-
+		
 		template<typename It>
 		size_t dequeue_bulk(It& itemFirst, size_t max)
 		{
@@ -2495,10 +2617,10 @@ private:
 			if (details::circular_less_than<size_t>(0, desiredCount)) {
 				desiredCount = desiredCount < max ? desiredCount : max;
 				std::atomic_thread_fence(std::memory_order_acquire);
-
+				
 				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
 				assert(overcommit <= myDequeueCount);
-
+				
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
 				if (details::circular_less_than<size_t>(0, actualCount)) {
@@ -2506,11 +2628,11 @@ private:
 					if (actualCount < desiredCount) {
 						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
 					}
-
+					
 					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
 					// will never exceed tail.
-					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_relaxed);
-
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
 					// Iterate the blocks and dequeue
 					auto index = firstIndex;
 					BlockIndexHeader* localBlockIndex;
@@ -2519,7 +2641,7 @@ private:
 						auto blockStartIndex = index;
 						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
-
+						
 						auto entry = localBlockIndex->index[indexIndex];
 						auto block = entry->value.load(std::memory_order_relaxed);
 						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
@@ -2547,7 +2669,7 @@ private:
 									while (index != endIndex) {
 										(*block)[index++]->~T();
 									}
-
+									
 									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 										debug::DebugLock lock(mutex);
@@ -2556,12 +2678,12 @@ private:
 										this->parent->add_block_to_free_list(block);
 									}
 									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
-
+									
 									blockStartIndex = index;
 									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 								} while (index != firstIndex + actualCount);
-
+								
 								MOODYCAMEL_RETHROW;
 							}
 						}
@@ -2578,27 +2700,27 @@ private:
 						}
 						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
 					} while (index != firstIndex + actualCount);
-
+					
 					return actualCount;
 				}
 				else {
 					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
 				}
 			}
-
+			
 			return 0;
 		}
-
+		
 	private:
 		// The block size must be > 1, so any number with the low bit set is an invalid block base index
 		static const index_t INVALID_BLOCK_BASE = 1;
-
+		
 		struct BlockIndexEntry
 		{
 			std::atomic<index_t> key;
 			std::atomic<Block*> value;
 		};
-
+		
 		struct BlockIndexHeader
 		{
 			size_t capacity;
@@ -2607,7 +2729,7 @@ private:
 			BlockIndexEntry** index;
 			BlockIndexHeader* prev;
 		};
-
+		
 		template<AllocationMode allocMode>
 		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
 		{
@@ -2616,12 +2738,12 @@ private:
 			idxEntry = localBlockIndex->index[newTail];
 			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
 				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
-
+				
 				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
 				localBlockIndex->tail.store(newTail, std::memory_order_release);
 				return true;
 			}
-
+			
 			// No room in the old block index, try to allocate another one!
 			if (allocMode == CannotAlloc || !new_block_index()) {
 				return false;
@@ -2634,20 +2756,20 @@ private:
 			localBlockIndex->tail.store(newTail, std::memory_order_release);
 			return true;
 		}
-
+		
 		inline void rewind_block_index_tail()
 		{
 			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
 			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
 		}
-
+		
 		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
 		{
 			BlockIndexHeader* localBlockIndex;
 			auto idx = get_block_index_index_for_index(index, localBlockIndex);
 			return localBlockIndex->index[idx];
 		}
-
+		
 		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
 		{
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
@@ -2665,20 +2787,20 @@ private:
 			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
 			return idx;
 		}
-
+		
 		bool new_block_index()
 		{
 			auto prev = blockIndex.load(std::memory_order_relaxed);
 			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
 			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
-			auto raw = static_cast<char*>(Traits::malloc(
+			auto raw = static_cast<char*>((Traits::malloc)(
 				sizeof(BlockIndexHeader) +
 				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
 				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
 			if (raw == nullptr) {
 				return false;
 			}
-
+			
 			auto header = new (raw) BlockIndexHeader;
 			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
 			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
@@ -2702,14 +2824,14 @@ private:
 			header->index = index;
 			header->capacity = nextBlockIndexCapacity;
 			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
-
+			
 			blockIndex.store(header, std::memory_order_release);
-
+			
 			nextBlockIndexCapacity <<= 1;
-
+			
 			return true;
 		}
-
+		
 	private:
 		size_t nextBlockIndexCapacity;
 		std::atomic<BlockIndexHeader*> blockIndex;
@@ -2719,6 +2841,12 @@ private:
 		details::ThreadExitListener threadExitListener;
 	private:
 #endif
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ImplicitProducer* nextImplicitProducer;
+	private:
+#endif
 
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 		mutable debug::DebugMutex mutex;
@@ -2727,12 +2855,12 @@ private:
 		friend struct MemStats;
 #endif
 	};
-
-
+	
+	
 	//////////////////////////////////
 	// Block pool manipulation
 	//////////////////////////////////
-
+	
 	void populate_initial_block_list(size_t blockCount)
 	{
 		initialBlockPoolSize = blockCount;
@@ -2740,7 +2868,7 @@ private:
 			initialBlockPool = nullptr;
 			return;
 		}
-
+		
 		initialBlockPool = create_array<Block>(blockCount);
 		if (initialBlockPool == nullptr) {
 			initialBlockPoolSize = 0;
@@ -2749,18 +2877,18 @@ private:
 			initialBlockPool[i].dynamicallyAllocated = false;
 		}
 	}
-
+	
 	inline Block* try_get_block_from_initial_pool()
 	{
 		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
 			return nullptr;
 		}
-
+		
 		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
-
+		
 		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
 	}
-
+	
 	inline void add_block_to_free_list(Block* block)
 	{
 #if MCDBGQ_TRACKMEM
@@ -2768,7 +2896,7 @@ private:
 #endif
 		freeList.add(block);
 	}
-
+	
 	inline void add_blocks_to_free_list(Block* block)
 	{
 		while (block != nullptr) {
@@ -2777,12 +2905,12 @@ private:
 			block = next;
 		}
 	}
-
+	
 	inline Block* try_get_block_from_free_list()
 	{
 		return freeList.try_get();
 	}
-
+	
 	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
 	template<AllocationMode canAlloc>
 	Block* requisition_block()
@@ -2791,19 +2919,19 @@ private:
 		if (block != nullptr) {
 			return block;
 		}
-
+		
 		block = try_get_block_from_free_list();
 		if (block != nullptr) {
 			return block;
 		}
-
+		
 		if (canAlloc == CanAlloc) {
 			return create<Block>();
 		}
-
+		
 		return nullptr;
 	}
-
+	
 
 #if MCDBGQ_TRACKMEM
 	public:
@@ -2820,28 +2948,28 @@ private:
 			size_t queueClassBytes;
 			size_t implicitBlockIndexBytes;
 			size_t explicitBlockIndexBytes;
-
+			
 			friend class ConcurrentQueue;
-
+			
 		private:
 			static MemStats getFor(ConcurrentQueue* q)
 			{
 				MemStats stats = { 0 };
-
+				
 				stats.elementsEnqueued = q->size_approx();
-
+			
 				auto block = q->freeList.head_unsafe();
 				while (block != nullptr) {
 					++stats.allocatedBlocks;
 					++stats.freeBlocks;
 					block = block->freeListNext.load(std::memory_order_relaxed);
 				}
-
+				
 				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
 					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
 					stats.implicitProducers += implicit ? 1 : 0;
 					stats.explicitProducers += implicit ? 0 : 1;
-
+					
 					if (implicit) {
 						auto prod = static_cast<ImplicitProducer*>(ptr);
 						stats.queueClassBytes += sizeof(ImplicitProducer);
@@ -2889,18 +3017,18 @@ private:
 						}
 					}
 				}
-
+				
 				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
 				stats.allocatedBlocks += freeOnInitialPool;
 				stats.freeBlocks += freeOnInitialPool;
-
+				
 				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
 				stats.queueClassBytes += sizeof(ConcurrentQueue);
-
+				
 				return stats;
 			}
 		};
-
+		
 		// For debugging only. Not thread-safe.
 		MemStats getMemStats()
 		{
@@ -2909,14 +3037,20 @@ private:
 	private:
 		friend struct MemStats;
 #endif
-
-
+	
+	
 	//////////////////////////////////
 	// Producer list manipulation
-	//////////////////////////////////
-
+	//////////////////////////////////	
+	
 	ProducerBase* recycle_or_create_producer(bool isExplicit)
 	{
+		bool recycled;
+		return recycle_or_create_producer(isExplicit, recycled);
+	}
+	
+	ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled)
+	{
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
@@ -2926,32 +3060,49 @@ private:
 				bool expected = true;
 				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
 					// We caught one! It's been marked as activated, the caller can have it
+					recycled = true;
 					return ptr;
 				}
 			}
 		}
-
+		
+		recycled = false;
 		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
 	}
-
+	
 	ProducerBase* add_producer(ProducerBase* producer)
 	{
 		// Handle failed memory allocation
 		if (producer == nullptr) {
 			return nullptr;
 		}
-
+		
 		producerCount.fetch_add(1, std::memory_order_relaxed);
-
+		
 		// Add it to the lock-free list
 		auto prevTail = producerListTail.load(std::memory_order_relaxed);
 		do {
 			producer->next = prevTail;
 		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
-
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		if (producer->isExplicit) {
+			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
+			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+		else {
+			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
+			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+#endif
+		
 		return producer;
 	}
-
+	
 	void reown_producers()
 	{
 		// After another instance is moved-into/swapped-with this one, all the
@@ -2961,31 +3112,31 @@ private:
 			ptr->parent = this;
 		}
 	}
-
-
+	
+	
 	//////////////////////////////////
 	// Implicit producer hash
 	//////////////////////////////////
-
+	
 	struct ImplicitProducerKVP
 	{
 		std::atomic<details::thread_id_t> key;
 		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
-
+		
 		ImplicitProducerKVP() { }
-
+		
 		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
 		{
 			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
 			value = other.value;
 		}
-
+		
 		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
 		{
 			swap(other);
 			return *this;
 		}
-
+		
 		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
 		{
 			if (this != &other) {
@@ -2994,21 +3145,21 @@ private:
 			}
 		}
 	};
-
+	
 	template<typename XT, typename XTraits>
 	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
-
+	
 	struct ImplicitProducerHash
 	{
 		size_t capacity;
 		ImplicitProducerKVP* entries;
 		ImplicitProducerHash* prev;
 	};
-
+	
 	inline void populate_initial_implicit_producer_hash()
 	{
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-
+		
 		implicitProducerHashCount.store(0, std::memory_order_relaxed);
 		auto hash = &initialImplicitProducerHash;
 		hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
@@ -3019,18 +3170,18 @@ private:
 		hash->prev = nullptr;
 		implicitProducerHash.store(hash, std::memory_order_relaxed);
 	}
-
+	
 	void swap_implicit_producer_hashes(ConcurrentQueue& other)
 	{
 		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-
+		
 		// Swap (assumes our implicit producer hash is initialized)
 		initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
 		initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
 		other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
-
+		
 		details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
-
+		
 		details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
 		if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
 			implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
@@ -3053,34 +3204,34 @@ private:
 			hash->prev = &other.initialImplicitProducerHash;
 		}
 	}
-
+	
 	// Only fails (returns nullptr) if memory allocation fails
 	ImplicitProducer* get_or_add_implicit_producer()
 	{
 		// Note that since the data is essentially thread-local (key is thread ID),
 		// there's a reduced need for fences (memory ordering is already consistent
 		// for any individual thread), except for the current table itself.
-
+		
 		// Start by looking for the thread ID in the current and all previous hash tables.
 		// If it's not found, it must not be in there yet, since this same thread would
 		// have added it previously to one of the tables that we traversed.
-
+		
 		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
-
+		
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
-
+		
 		auto id = details::thread_id();
 		auto hashedId = details::hash_thread_id(id);
-
+		
 		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
 		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
 			// Look for the id in this hash
 			auto index = hashedId;
 			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
 				index &= hash->capacity - 1;
-
+				
 				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
 				if (probedKey == id) {
 					// Found it! If we had to search several hashes deep, though, we should lazily add it
@@ -3108,7 +3259,7 @@ private:
 							++index;
 						}
 					}
-
+					
 					return value;
 				}
 				if (probedKey == details::invalid_thread_id) {
@@ -3117,7 +3268,7 @@ private:
 				++index;
 			}
 		}
-
+		
 		// Insert!
 		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
 		while (true) {
@@ -3127,105 +3278,119 @@ private:
 				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
 				// locked block).
 				mainHash = implicitProducerHash.load(std::memory_order_acquire);
-				auto newCapacity = mainHash->capacity << 1;
-				while (newCount >= (newCapacity >> 1)) {
-					newCapacity <<= 1;
-				}
-				auto raw = static_cast<char*>(Traits::malloc(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
-				if (raw == nullptr) {
-					// Allocation failed
-					implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);
-					implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
-					return nullptr;
+				if (newCount >= (mainHash->capacity >> 1)) {
+					auto newCapacity = mainHash->capacity << 1;
+					while (newCount >= (newCapacity >> 1)) {
+						newCapacity <<= 1;
+					}
+					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
+					if (raw == nullptr) {
+						// Allocation failed
+						implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);
+						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+						return nullptr;
+					}
+					
+					auto newHash = new (raw) ImplicitProducerHash;
+					newHash->capacity = newCapacity;
+					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
+					for (size_t i = 0; i != newCapacity; ++i) {
+						new (newHash->entries + i) ImplicitProducerKVP;
+						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+					}
+					newHash->prev = mainHash;
+					implicitProducerHash.store(newHash, std::memory_order_release);
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+					mainHash = newHash;
 				}
-
-				auto newHash = new (raw) ImplicitProducerHash;
-				newHash->capacity = newCapacity;
-				newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
-				for (size_t i = 0; i != newCapacity; ++i) {
-					new (newHash->entries + i) ImplicitProducerKVP;
-					newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+				else {
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
 				}
-				newHash->prev = mainHash;
-				implicitProducerHash.store(newHash, std::memory_order_release);
-				implicitProducerHashResizeInProgress.clear(std::memory_order_release);
-				mainHash = newHash;
 			}
-
+			
 			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
 			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
 			// always be true)
 			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
-				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false));
+				bool recycled;
+				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled));
 				if (producer == nullptr) {
+					implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);
 					return nullptr;
 				}
-
+				if (recycled) {
+					implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);
+				}
+				
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
 				producer->threadExitListener.userData = producer;
 				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
 #endif
-
+				
 				auto index = hashedId;
 				while (true) {
 					index &= mainHash->capacity - 1;
 					auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+					
 					auto empty = details::invalid_thread_id;
-					if (probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed)) {
-						mainHash->entries[index].value = producer;
-						break;
-					}
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 					auto reusable = details::invalid_thread_id2;
-					if (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire)) {
-						implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed)) ||
+						(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire))) {
+#else
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed))) {
+#endif
 						mainHash->entries[index].value = producer;
 						break;
 					}
-#endif
 					++index;
 				}
 				return producer;
 			}
-
+			
 			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
 			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
 			// we try to allocate ourselves).
 			mainHash = implicitProducerHash.load(std::memory_order_acquire);
 		}
 	}
-
+	
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 	void implicit_producer_thread_exited(ImplicitProducer* producer)
 	{
 		// Remove from thread exit listeners
 		details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
-
+		
 		// Remove from hash
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
-		// Only need to remove from main table (as that's the only one that new inserts could go into)
 		auto hash = implicitProducerHash.load(std::memory_order_acquire);
 		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
 		auto id = details::thread_id();
-		auto index = details::hash_thread_id(id);
+		auto hashedId = details::hash_thread_id(id);
 		details::thread_id_t probedKey;
-		do {
-			index &= hash->capacity - 1;
-			probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
-			if (probedKey == id) {
-				hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);
-				break;
-			}
-			++index;
-		} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet
-
+		
+		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
+		// trying to add an entry thinking there's a free slot (because they reused a producer)
+		for (; hash != nullptr; hash = hash->prev) {
+			auto index = hashedId;
+			do {
+				index &= hash->capacity - 1;
+				probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);
+					break;
+				}
+				++index;
+			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
+		}
+		
 		// Mark the queue as being recyclable
 		producer->inactive.store(true, std::memory_order_release);
 	}
-
+	
 	static void implicit_producer_thread_exited_callback(void* userData)
 	{
 		auto producer = static_cast<ImplicitProducer*>(userData);
@@ -3233,26 +3398,26 @@ private:
 		queue->implicit_producer_thread_exited(producer);
 	}
 #endif
-
+	
 	//////////////////////////////////
 	// Utility functions
 	//////////////////////////////////
-
+	
 	template<typename U>
 	static inline U* create_array(size_t count)
 	{
 		assert(count > 0);
-		auto p = static_cast<U*>(Traits::malloc(sizeof(U) * count));
+		auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count));
 		if (p == nullptr) {
 			return nullptr;
 		}
-
+		
 		for (size_t i = 0; i != count; ++i) {
 			new (p + i) U();
 		}
 		return p;
 	}
-
+	
 	template<typename U>
 	static inline void destroy_array(U* p, size_t count)
 	{
@@ -3261,59 +3426,64 @@ private:
 			for (size_t i = count; i != 0; ) {
 				(p + --i)->~U();
 			}
-			Traits::free(p);
+			(Traits::free)(p);
 		}
 	}
-
+	
 	template<typename U>
 	static inline U* create()
 	{
-		auto p = Traits::malloc(sizeof(U));
+		auto p = (Traits::malloc)(sizeof(U));
 		return p != nullptr ? new (p) U : nullptr;
 	}
-
+	
 	template<typename U, typename A1>
 	static inline U* create(A1&& a1)
 	{
-		auto p = Traits::malloc(sizeof(U));
+		auto p = (Traits::malloc)(sizeof(U));
 		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
 	}
-
+	
 	template<typename U>
 	static inline void destroy(U* p)
 	{
 		if (p != nullptr) {
 			p->~U();
 		}
-		Traits::free(p);
+		(Traits::free)(p);
 	}
 
 private:
 	std::atomic<ProducerBase*> producerListTail;
 	std::atomic<std::uint32_t> producerCount;
-
+	
 	std::atomic<size_t> initialBlockPoolIndex;
 	Block* initialBlockPool;
 	size_t initialBlockPoolSize;
-
+	
 #if !MCDBGQ_USEDEBUGFREELIST
 	FreeList<Block> freeList;
 #else
 	debug::DebugFreeList<Block> freeList;
 #endif
-
+	
 	std::atomic<ImplicitProducerHash*> implicitProducerHash;
 	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
 	ImplicitProducerHash initialImplicitProducerHash;
 	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
 	std::atomic_flag implicitProducerHashResizeInProgress;
-
+	
 	std::atomic<std::uint32_t> nextExplicitConsumerId;
 	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
-
+	
 #if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 	debug::DebugMutex implicitProdMutex;
 #endif
+	
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	std::atomic<ExplicitProducer*> explicitProducers;
+	std::atomic<ImplicitProducer*> implicitProducers;
+#endif
 };
 
 
@@ -3327,6 +3497,15 @@ ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
 }
 
 template<typename T, typename Traits>
+ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
 ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
 	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
 {
@@ -3335,6 +3514,14 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
 }
 
 template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = -1;
+}
+
+template<typename T, typename Traits>
 inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
 {
 	a.swap(b);
diff --git a/include/cuckoohash_config.h b/include/cuckoohash_config.h
deleted file mode 100644
index 6c9fe0c..0000000
--- a/include/cuckoohash_config.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*! \file */
-
-#ifndef _CUCKOOHASH_CONFIG_H
-#define _CUCKOOHASH_CONFIG_H
-
-//! SLOT_PER_BUCKET is the maximum number of keys per bucket
-const size_t SLOT_PER_BUCKET = 4;
-
-//! DEFAULT_SIZE is the default number of elements in an empty hash
-//! table
-const size_t DEFAULT_SIZE = (1U << 16) * SLOT_PER_BUCKET;
-
-//! set LIBCUCKOO_DEBUG to 1 to enable debug output
-#define LIBCUCKOO_DEBUG 0
-
-#endif // _CUCKOOHASH_CONFIG_H
diff --git a/include/cuckoohash_config.hh b/include/cuckoohash_config.hh
new file mode 100644
index 0000000..fa6ca4d
--- /dev/null
+++ b/include/cuckoohash_config.hh
@@ -0,0 +1,28 @@
+/** \file */
+
+#ifndef _CUCKOOHASH_CONFIG_HH
+#define _CUCKOOHASH_CONFIG_HH
+
+#include <cstddef>
+
+//! The default maximum number of keys per bucket
+const size_t DEFAULT_SLOT_PER_BUCKET = 4;
+
+//! The default number of elements in an empty hash table
+const size_t DEFAULT_SIZE = (1U << 16) * DEFAULT_SLOT_PER_BUCKET;
+
+//! The default minimum load factor that the table allows for automatic
+//! expansion. It must be a number between 0.0 and 1.0. The table will throw
+//! libcuckoo_load_factor_too_low if the load factor falls below this value
+//! during an automatic expansion.
+const double DEFAULT_MINIMUM_LOAD_FACTOR = 0.05;
+
+//! An alias for the value that sets no limit on the maximum hashpower. If this
+//! value is set as the maximum hashpower limit, there will be no limit. Since 0
+//! is the only hashpower that can never occur, it should stay at 0.
+const size_t NO_MAXIMUM_HASHPOWER = 0;
+
+//! set LIBCUCKOO_DEBUG to 1 to enable debug output
+#define LIBCUCKOO_DEBUG 0
+
+#endif // _CUCKOOHASH_CONFIG_HH
diff --git a/include/cuckoohash_map.hh b/include/cuckoohash_map.hh
index f60262a..3ef2db4 100644
--- a/include/cuckoohash_map.hh
+++ b/include/cuckoohash_map.hh
@@ -1,3 +1,5 @@
+/** \file */
+
 #ifndef _CUCKOOHASH_MAP_HH
 #define _CUCKOOHASH_MAP_HH
 
@@ -12,6 +14,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <functional>
+#include <iterator>
 #include <limits>
 #include <list>
 #include <memory>
@@ -24,24 +27,39 @@
 #include <utility>
 #include <vector>
 
-#include "cuckoohash_config.h"
-#include "cuckoohash_util.h"
+#include "cuckoohash_config.hh"
+#include "cuckoohash_util.hh"
+#include "default_hasher.hh"
 
 //! cuckoohash_map is the hash table class.
-template <class Key, class T, class Hash = std::hash<Key>,
-          class Pred = std::equal_to<Key> >
+template < class Key,
+           class T,
+           class Hash = DefaultHasher<Key>,
+           class Pred = std::equal_to<Key>,
+           class Alloc = std::allocator<std::pair<const Key, T>>,
+           size_t SLOT_PER_BUCKET = DEFAULT_SLOT_PER_BUCKET
+           >
 class cuckoohash_map {
 public:
     //! key_type is the type of keys.
-    typedef Key               key_type;
+    typedef Key                     key_type;
     //! value_type is the type of key-value pairs.
     typedef std::pair<const Key, T> value_type;
     //! mapped_type is the type of values.
-    typedef T                 mapped_type;
+    typedef T                       mapped_type;
     //! hasher is the type of the hash function.
-    typedef Hash              hasher;
+    typedef Hash                    hasher;
     //! key_equal is the type of the equality predicate.
-    typedef Pred              key_equal;
+    typedef Pred                    key_equal;
+    //! allocator_type is the type of the allocator
+    typedef Alloc                   allocator_type;
+
+    //! slot_per_bucket is the number of items each bucket in the table can hold
+    static const size_t slot_per_bucket = SLOT_PER_BUCKET;
+
+    //! For any update operations, the callable passed in must be convertible to
+    //! the following type
+    typedef std::function<void(mapped_type&)> updater_type;
 
     //! Class returned by operator[] which wraps an entry in the hash table.
     //! Note that this reference type behave somewhat differently from an STL
@@ -93,32 +111,36 @@ public:
 
     private:
         // private constructor which initializes the owner and key
-        reference(cuckoohash_map& owner, const key_type& key)
-            : owner_(owner), key_(key) {}
+        reference(
+            cuckoohash_map<Key, T, Hash, Pred, Alloc, slot_per_bucket>& owner,
+            const key_type& key) : owner_(owner), key_(key) {}
 
         // reference to the hash map instance
-        cuckoohash_map& owner_;
+        cuckoohash_map<Key, T, Hash, Pred, Alloc, slot_per_bucket>& owner_;
         // the referenced key
         const key_type& key_;
 
         // cuckoohash_map needs to call the private constructor
-        friend class cuckoohash_map;
+        friend class cuckoohash_map<Key, T, Hash, Pred, Alloc, slot_per_bucket>;
     };
 
     typedef const mapped_type const_reference;
 
+    typedef char partial_t;
+
 private:
     // Constants used internally
 
-    // true if the key is small and simple, which means using partial keys would
-    // probably slow us down
+    // true if the key is small and simple, which means using partial keys for
+    // lookup would probably slow us down
     static const bool is_simple =
         std::is_pod<key_type>::value && sizeof(key_type) <= 8;
 
+    // We enable certain methods only if the mapped_type is copy-assignable
     static const bool value_copy_assignable = std::is_copy_assignable<
         mapped_type>::value;
 
-    // number of locks in the locks_ array
+    // number of locks in the locks array
     static const size_t kNumLocks = 1 << 16;
 
     // number of cores on the machine
@@ -151,114 +173,85 @@ private:
     } __attribute__((aligned(64)));
 
     typedef enum {
-        ok = 0,
-        failure = 1,
-        failure_key_not_found = 2,
-        failure_key_duplicated = 3,
-        failure_space_not_enough = 4,
-        failure_function_not_supported = 5,
-        failure_table_full = 6,
-        failure_under_expansion = 7,
+        ok,
+        failure,
+        failure_key_not_found,
+        failure_key_duplicated,
+        failure_table_full,
+        failure_under_expansion,
     } cuckoo_status;
 
-    typedef char partial_t;
-    // Two partial key containers. One for when we're actually using partial
-    // keys and another that mocks partial keys for when the type is simple. The
-    // bucket will derive the correct class depending on whether the type is
-    // simple or not.
-    class RealPartialContainer {
-        std::array<partial_t, SLOT_PER_BUCKET> partials_;
+    // The Bucket type holds slot_per_bucket partial keys, key-value pairs, and
+    // a occupied bitset, which indicates whether the slot at the given bit
+    // index is in the table or not. It uses aligned_storage arrays to store the
+    // keys and values to allow constructing and destroying key-value pairs in
+    // place.
+    class Bucket {
+    private:
+        std::array<partial_t, slot_per_bucket> partials_;
+        std::bitset<slot_per_bucket> occupied_;
+        std::array<typename std::aligned_storage<
+                       sizeof(value_type), alignof(value_type)>::type,
+                   slot_per_bucket> kvpairs_;
+
     public:
-        const partial_t& partial(int ind) const {
+        const partial_t& partial(size_t ind) const {
             return partials_[ind];
         }
-        partial_t& partial(int ind) {
+
+        partial_t& partial(size_t ind) {
             return partials_[ind];
         }
-    };
 
-    class FakePartialContainer {
-    public:
-        // These methods should never be called, so we raise an exception if
-        // they are.
-        const partial_t& partial(int) const {
-            throw std::logic_error(
-                "FakePartialContainer::partial should never be called");
+        const value_type& kvpair(size_t ind) const {
+            return *static_cast<const value_type*>(
+                static_cast<const void*>(&kvpairs_[ind]));
         }
-        partial_t& partial(int) {
-            throw std::logic_error(
-                "FakePartialContainer::partial should never be called");
-        }
-    };
-
-    // The Bucket type holds SLOT_PER_BUCKET keys and values, and a occupied
-    // bitset, which indicates whether the slot at the given bit index is in
-    // the table or not. It uses aligned_storage arrays to store the keys and
-    // values to allow constructing and destroying key-value pairs in place.
-    class Bucket : public std::conditional<is_simple, FakePartialContainer,
-                                           RealPartialContainer>::type {
-    private:
-        std::array<typename std::aligned_storage<
-                       sizeof(key_type), alignof(key_type)>::type,
-                   SLOT_PER_BUCKET> keys_;
-        std::array<typename std::aligned_storage<
-                       sizeof(mapped_type), alignof(mapped_type)>::type,
-                   SLOT_PER_BUCKET> vals_;
-        std::bitset<SLOT_PER_BUCKET> occupied_;
-
-        // key_allocator is the allocator used to construct keys
-        static std::allocator<key_type> key_allocator;
 
-        // value_allocator is the allocator to construct values
-        static std::allocator<mapped_type> value_allocator;
-
-    public:
-        bool occupied(int ind) const {
-            return occupied_.test(ind);
+        value_type& kvpair(size_t ind) {
+            return *static_cast<value_type*>(
+                static_cast<void*>(&kvpairs_[ind]));
         }
 
-        const key_type& key(int ind) const {
-            return *static_cast<const key_type*>(
-                static_cast<const void*>(&keys_[ind]));
+        bool occupied(size_t ind) const {
+            return occupied_.test(ind);
         }
 
-        key_type& key(int ind) {
-            return *static_cast<key_type*>(static_cast<void*>(&keys_[ind]));
+        const key_type& key(size_t ind) const {
+            return kvpair(ind).first;
         }
 
-        const mapped_type& val(int ind) const {
-            return *static_cast<const mapped_type*>(
-                static_cast<const void*>(&vals_[ind]));
+        const mapped_type& val(size_t ind) const {
+            return kvpair(ind).second;
         }
 
-        mapped_type& val(int ind) {
-            return *static_cast<mapped_type*>(static_cast<void*>(&vals_[ind]));
+        mapped_type& val(size_t ind) {
+            return kvpair(ind).second;
         }
 
-        template <class V>
-        void setKV(size_t ind, const key_type& k, V v) {
+        template <class... Args>
+        void setKV(size_t ind, Args&&... args) {
+            static allocator_type pair_allocator;
             occupied_.set(ind);
-            key_allocator.construct(&key(ind), k);
-            value_allocator.construct(&val(ind), std::forward<V>(v));
+            pair_allocator.construct(&kvpair(ind), std::forward<Args>(args)...);
         }
 
         void eraseKV(size_t ind) {
             occupied_.reset(ind);
-            key_allocator.destroy(&key(ind));
-            value_allocator.destroy(&val(ind));
+            (&kvpair(ind))->~value_type();
         }
 
-        Bucket() {
-            occupied_.reset();
-        }
-
-        ~Bucket() {
-            for (size_t i = 0; i < SLOT_PER_BUCKET; ++i) {
+        void clear() {
+            for (size_t i = 0; i < slot_per_bucket; ++i) {
                 if (occupied(i)) {
                     eraseKV(i);
                 }
             }
         }
+
+        ~Bucket() {
+            clear();
+        }
     };
 
     // cacheint is a cache-aligned atomic integer type.
@@ -266,122 +259,84 @@ private:
         std::atomic<size_t> num;
         cacheint(): num(0) {}
         cacheint(size_t x): num(x) {}
+        cacheint(const cacheint& x): num(x.num.load()) {}
         cacheint(cacheint&& x): num(x.num.load()) {}
+        cacheint& operator=(const cacheint& x) {
+            num = x.num.load();
+            return *this;
+        }
+        cacheint& operator=(const cacheint&& x) {
+            num = x.num.load();
+            return *this;
+        }
     } __attribute__((aligned(64)));
 
-    // An alias for the type of lock we are using
-    typedef spinlock locktype;
 
-    // TableInfo contains the entire state of the hashtable. We allocate one
-    // TableInfo pointer per hash table and store all of the table memory in it,
-    // so that all the data can be atomically swapped during expansion.
-    struct TableInfo {
-        // 2**hashpower is the number of buckets
-        size_t hashpower_;
+    // 2**hashpower is the number of buckets. This cannot be changed unless all
+    // the locks are taken on the table. Since it is still read and written by
+    // multiple threads not necessarily synchronized by a lock, we keep it
+    // atomic
+    std::atomic<size_t> hashpower_;
 
-        // vector of buckets
-        std::vector<Bucket> buckets_;
+    // Helper methods to read and write hashpower_ with the correct memory
+    // barriers
+    size_t get_hashpower() const {
+        return hashpower_.load(std::memory_order_acquire);
+    }
 
-        // array of locks
-        std::array<locktype, kNumLocks> locks_;
+    void set_hashpower(size_t val) {
+        hashpower_.store(val, std::memory_order_release);
+    }
 
-        // per-core counters for the number of inserts and deletes
-        std::vector<cacheint> num_inserts, num_deletes;
+    // vector of buckets. The size or memory location of the buckets cannot be
+    // changed unless al the locks are taken on the table. Thus, it is only safe
+    // to access the buckets_ vector when you have at least one lock held.
+    typedef std::vector<
+        Bucket, typename allocator_type::template rebind<Bucket>::other>
+    buckets_t;
+    buckets_t buckets_;
 
-        // The constructor allocates the memory for the table. It allocates one
-        // cacheint for each core in num_inserts and num_deletes.
-        TableInfo(const size_t hashpower)
-            : hashpower_(hashpower), buckets_(hashsize(hashpower_)),
-              num_inserts(kNumCores()), num_deletes(kNumCores()) {}
+    // array of locks. marked mutable, so that const methods can take locks.
+    // Even though it's a vector, it should not ever change in size after the
+    // initial allocation.
+    typedef std::vector<spinlock> locks_t;
+    mutable locks_t locks_;
 
-        ~TableInfo() {}
-    };
+    // per-core counters for the number of inserts and deletes
+    std::vector<
+        cacheint, typename allocator_type::template rebind<cacheint>::other>
+    num_inserts_, num_deletes_;
 
-    // This is a hazard pointer, used to indicate which version of the TableInfo
-    // is currently being used in the thread. Since cuckoohash_map operations
-    // can run simultaneously in different threads, this variable is thread
-    // local. Note that this variable can be safely shared between different
-    // cuckoohash_map instances, since multiple operations cannot occur
-    // simultaneously in one thread. The hazard pointer variable points to a
-    // pointer inside a global list of pointers, that each map checks before
-    // deleting any old TableInfo pointers.
-    static __thread TableInfo** hazard_pointer;
-
-    // A GlobalHazardPointerList stores a list of pointers that cannot be
-    // deleted by an expansion thread. Each thread gets its own node in the
-    // list, whose data pointer it can modify without contention.
-    class GlobalHazardPointerList {
-        std::list<TableInfo*> hp_;
-        std::mutex lock_;
-    public:
-        // new_hazard_pointer creates and returns a new hazard pointer for a
-        // thread.
-        TableInfo** new_hazard_pointer() {
-            std::unique_lock<std::mutex> ul(lock_);
-            hp_.emplace_back(nullptr);
-            return &hp_.back();
-        }
-
-        // delete_unused scans the list of hazard pointers, deleting any
-        // pointers in old_pointers that aren't in this list. If it does delete
-        // a pointer in old_pointers, it deletes that node from the list.
-        void delete_unused(std::list<std::unique_ptr<TableInfo>>&
-                           old_pointers) {
-            std::unique_lock<std::mutex> ul(lock_);
-            old_pointers.remove_if(
-                [this](const std::unique_ptr<TableInfo>& ptr) {
-                    return std::find(hp_.begin(), hp_.end(), ptr.get()) ==
-                        hp_.end();
-                });
-        }
-    };
-
-    // As long as the thread_local hazard_pointer is static, which means each
-    // template instantiation of a cuckoohash_map class gets its own per-thread
-    // hazard pointer, then each template instantiation of a cuckoohash_map
-    // class can get its own global_hazard_pointers list, since different
-    // template instantiations won't interfere with each other.
-    static GlobalHazardPointerList global_hazard_pointers;
+    // stores the minimum load factor allowed for automatic expansions. Whenever
+    // an automatic expansion is triggered (during an insertion where cuckoo
+    // hashing fails, for example), we check the load factor against this
+    // double, and throw an exception if it's lower than this value. It can be
+    // used to signal when the hash function is bad or the input adversarial.
+    std::atomic<double> minimum_load_factor_;
 
-    // check_hazard_pointer should be called before any public method that loads
-    // a table snapshot. It checks that the thread local hazard pointer pointer
-    // is not null, and gets a new pointer if it is null.
-    static inline void check_hazard_pointer() {
-        if (hazard_pointer == nullptr) {
-            hazard_pointer = global_hazard_pointers.new_hazard_pointer();
-        }
-    }
-
-    // Once a function is finished with a version of the table, it will want to
-    // unset the hazard pointer it set so that it can be freed if it needs to.
-    // This is an object which, upon destruction, will unset the hazard pointer.
-    class HazardPointerUnsetter {
-    public:
-        ~HazardPointerUnsetter() {
-            *hazard_pointer = nullptr;
-        }
-    };
+    // stores the maximum hashpower allowed for any expansions. If set to
+    // NO_MAXIMUM_HASHPOWER, this limit will be disregarded.
+    std::atomic<size_t> maximum_hashpower_;
 
-    // counterid stores the per-thread counter index of each thread.
-    static __thread int counterid;
+    // get_counterid returns the counterid for the current thread.
+    static inline int get_counterid() {
+        // counterid stores the per-thread counter index of each thread. Each
+        // counter value corresponds to a core on the machine.
+        static __thread int counterid = -1;
 
-    // check_counterid checks if the counterid has already been determined. If
-    // not, it assigns a counterid to the current thread by picking a random
-    // core. This should be called at the beginning of any function that changes
-    // the number of elements in the table.
-    static inline void check_counterid() {
         if (counterid < 0) {
             counterid = rand() % kNumCores();
         }
+        return counterid;
     }
 
     // reserve_calc takes in a parameter specifying a certain number of slots
     // for a table and returns the smallest hashpower that will hold n elements.
     static size_t reserve_calc(size_t n) {
-        double nhd = ceil(log2((double)n / (double)SLOT_PER_BUCKET));
-        size_t new_hashpower = (size_t) (nhd <= 0 ? 1.0 : nhd);
-        assert(n <= hashsize(new_hashpower) * SLOT_PER_BUCKET);
-        return new_hashpower;
+        double nhd = ceil(log2((double)n / (double)slot_per_bucket));
+        size_t new_hp = (size_t) (nhd <= 0 ? 1.0 : nhd);
+        assert(n <= hashsize(new_hp) * slot_per_bucket);
+        return new_hp;
     }
 
     // hashfn returns an instance of the hash function
@@ -397,88 +352,125 @@ private:
     }
 
 public:
-    //! The constructor creates a new hash table with enough space for \p n
-    //! elements. If the constructor fails, it will throw an exception.
-    explicit cuckoohash_map(size_t n = DEFAULT_SIZE) {
-        cuckoo_init(reserve_calc(n));
-    }
-
-    //! The destructor explicitly deletes the current table info.
-    ~cuckoohash_map() {
-        TableInfo* ti = table_info.load();
-        if (ti != nullptr) {
-            delete ti;
-        }
+    /**
+     * Creates a new cuckohash_map instance
+     *
+     * @param n the number of elements to reserve space for initially
+     * @param mlf the minimum load factor required that the
+     * table allows for automatic expansion.
+     * @param mhp the maximum hashpower that the table can take on (pass in 0
+     * for no limit)
+     * @throw std::invalid_argument if the given minimum load factor is invalid,
+     * or if the initial space exceeds the maximum hashpower
+     */
+    cuckoohash_map(size_t n = DEFAULT_SIZE,
+                   double mlf = DEFAULT_MINIMUM_LOAD_FACTOR,
+                   size_t mhp = NO_MAXIMUM_HASHPOWER)
+        : locks_(kNumLocks) {
+        minimum_load_factor(mlf);
+        maximum_hashpower(mhp);
+        size_t hp = reserve_calc(n);
+        if (mhp != NO_MAXIMUM_HASHPOWER && hp > mhp) {
+            throw std::invalid_argument(
+                "hashpower for initial size " + std::to_string(hp) +
+                " is greater than the maximum hashpower");
+        }
+        set_hashpower(hp);
+        buckets_.resize(hashsize(hp));
+        num_inserts_.resize(kNumCores(), 0);
+        num_deletes_.resize(kNumCores(), 0);
     }
 
     //! clear removes all the elements in the hash table, calling their
     //! destructors.
-    void clear() {
-        check_hazard_pointer();
-        TableInfo* ti = snapshot_and_lock_all();
-        assert(ti == table_info.load());
-        AllUnlocker au(ti);
-        HazardPointerUnsetter hpu;
-        cuckoo_clear(ti);
+    void clear() noexcept {
+        auto unlocker = snapshot_and_lock_all();
+        cuckoo_clear();
     }
 
     //! size returns the number of items currently in the hash table. Since it
     //! doesn't lock the table, elements can be inserted during the computation,
     //! so the result may not necessarily be exact.
-    size_t size() const {
-        check_hazard_pointer();
-        const TableInfo* ti = snapshot_table_nolock();
-        HazardPointerUnsetter hpu;
-        const size_t s = cuckoo_size(ti);
-        return s;
+    size_t size() const noexcept {
+        return cuckoo_size();
     }
 
     //! empty returns true if the table is empty.
-    bool empty() const {
+    bool empty() const noexcept {
         return size() == 0;
     }
 
     //! hashpower returns the hashpower of the table, which is
     //! log<SUB>2</SUB>(the number of buckets).
-    size_t hashpower() const {
-        check_hazard_pointer();
-        TableInfo* ti = snapshot_table_nolock();
-        HazardPointerUnsetter hpu;
-        const size_t hashpower = ti->hashpower_;
-        return hashpower;
+    size_t hashpower() const noexcept {
+        return get_hashpower();
     }
 
     //! bucket_count returns the number of buckets in the table.
-    size_t bucket_count() const {
-        check_hazard_pointer();
-        TableInfo* ti = snapshot_table_nolock();
-        HazardPointerUnsetter hpu;
-        size_t buckets = hashsize(ti->hashpower_);
-        return buckets;
+    size_t bucket_count() const noexcept {
+        return hashsize(get_hashpower());
     }
 
     //! load_factor returns the ratio of the number of items in the table to the
     //! total number of available slots in the table.
-    double load_factor() const {
-        check_hazard_pointer();
-        const TableInfo* ti = snapshot_table_nolock();
-        HazardPointerUnsetter hpu;
-        return cuckoo_loadfactor(ti);
+    double load_factor() const noexcept {
+        return cuckoo_loadfactor(get_hashpower());
+    }
+
+    /**
+     * Sets the minimum load factor allowed for automatic expansions. If an
+     * expansion is needed when the load factor of the table is lower than this
+     * threshold, the libcuckoo_load_factor_too_low exception is thrown.
+     *
+     * @param mlf the load factor to set the minimum to
+     * @throw std::invalid_argument if the given load factor is less than 0.0
+     * or greater than 1.0
+     */
+    void minimum_load_factor(const double mlf) {
+        if (mlf < 0.0) {
+            throw std::invalid_argument(
+                "load factor " + std::to_string(mlf) + " cannot be "
+                " less than 0");
+        } else if (mlf > 1.0) {
+            throw std::invalid_argument(
+                "load factor " + std::to_string(mlf) + " cannot be "
+                " greater than 1");
+        }
+        minimum_load_factor_ = mlf;
+    }
+
+    /**
+     * @return the minimum load factor of the table
+     */
+    double minimum_load_factor() noexcept {
+        return minimum_load_factor_;
+    }
+
+    /**
+     * Sets the maximum hashpower the table can be. If set to \ref
+     * NO_MAXIMUM_HASHPOWER, there will be no limit on the hashpower.
+     *
+     * @param mhp the hashpower to set the maximum to
+     */
+    void maximum_hashpower(size_t mhp) noexcept {
+        maximum_hashpower_ = mhp;
+    }
+
+    /**
+     * @return the maximum hashpower of the table
+     */
+    size_t maximum_hashpower() noexcept {
+        return maximum_hashpower_;
     }
 
     //! find searches through the table for \p key, and stores the associated
     //! value it finds in \p val.
     ENABLE_IF(, value_copy_assignable, bool)
     find(const key_type& key, mapped_type& val) const {
-        check_hazard_pointer();
         size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-        std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-        HazardPointerUnsetter hpu;
-
-        const cuckoo_status st = cuckoo_find(key, val, hv, ti, i1, i2);
-        unlock_two(ti, i1, i2);
+        auto b = snapshot_and_lock_two(hv);
+        const cuckoo_status st = cuckoo_find(key, val, hv, b.first, b.second);
+        unlock_two(b.first, b.second);
         return (st == ok);
     }
 
@@ -499,53 +491,39 @@ public:
     //! contains searches through the table for \p key, and returns true if it
     //! finds it in the table, and false otherwise.
     bool contains(const key_type& key) const {
-        check_hazard_pointer();
         size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-        std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-        HazardPointerUnsetter hpu;
-
-        const bool result = cuckoo_contains(key, hv, ti, i1, i2);
-        unlock_two(ti, i1, i2);
+        auto b = snapshot_and_lock_two(hv);
+        const bool result = cuckoo_contains(key, hv, b.first, b.second);
+        unlock_two(b.first, b.second);
         return result;
     }
 
-    //! insert puts the given key-value pair into the table. It first checks
-    //! that \p key isn't already in the table, since the table doesn't support
-    //! duplicate keys. If the table is out of space, insert will automatically
-    //! expand until it can succeed. Note that expansion can throw an exception,
-    //! which insert will propagate. If \p key is already in the table, it
-    //! returns false, otherwise it returns true.
+    /**
+     * Puts the given key-value pair into the table. If the key cannot be placed
+     * in the table, it may be automatically expanded to fit more items.
+     *
+     * @param key the key to insert into the table
+     * @param val the value to insert
+     * @return true if the insertion succeeded, false if there was a duplicate
+     * key
+     * @throw libcuckoo_load_factor_too_low if the load factor is below the
+     * minimum_load_factor threshold, if expansion is required
+     * @throw libcuckoo_maximum_hashpower_exceeded if expansion is required
+     * beyond the maximum hash power, if one was set
+     */
     template <class V>
-    typename std::enable_if<std::is_convertible<V, const mapped_type&>::value,
-                            bool>::type
-    insert(const key_type& key, V val) {
-        check_hazard_pointer();
-        check_counterid();
-        size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-        std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-        HazardPointerUnsetter hpu;
-        return cuckoo_insert_loop(key, std::forward<V>(val),
-                                  hv, ti, i1, i2);
+    bool insert(const key_type& key, V&& val) {
+        return cuckoo_insert_loop(key, std::forward<V>(val), hashed_key(key));
     }
 
     //! erase removes \p key and it's associated value from the table, calling
     //! their destructors. If \p key is not there, it returns false, otherwise
     //! it returns true.
     bool erase(const key_type& key) {
-        check_hazard_pointer();
-        check_counterid();
         size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-        std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-        HazardPointerUnsetter hpu;
-
-        const cuckoo_status st = cuckoo_delete(key, hv, ti, i1, i2);
-        unlock_two(ti, i1, i2);
+        auto b = snapshot_and_lock_two(hv);
+        const cuckoo_status st = cuckoo_delete(key, hv, b.first, b.second);
+        unlock_two(b.first, b.second);
         return (st == ok);
     }
 
@@ -553,15 +531,11 @@ public:
     //! not there, it returns false, otherwise it returns true.
     ENABLE_IF(, value_copy_assignable, bool)
     update(const key_type& key, const mapped_type& val) {
-        check_hazard_pointer();
         size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-        std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-        HazardPointerUnsetter hpu;
-
-        const cuckoo_status st = cuckoo_update(key, val, hv, ti, i1, i2);
-        unlock_two(ti, i1, i2);
+        auto b = snapshot_and_lock_two(hv);
+        const cuckoo_status st = cuckoo_update(key, val, hv, b.first,
+                                               b.second);
+        unlock_two(b.first, b.second);
         return (st == ok);
     }
 
@@ -570,16 +544,14 @@ public:
     //! modify the argument as desired, returning nothing. If \p key is not
     //! there, it returns false, otherwise it returns true.
     template <typename Updater>
-    bool update_fn(const key_type& key, Updater fn) {
-        check_hazard_pointer();
+    typename std::enable_if<
+        std::is_convertible<Updater, updater_type>::value,
+        bool>::type update_fn(const key_type& key, Updater fn) {
         size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-        std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-        HazardPointerUnsetter hpu;
-
-        const cuckoo_status st = cuckoo_update_fn(key, fn, hv, ti, i1, i2);
-        unlock_two(ti, i1, i2);
+        auto b = snapshot_and_lock_two(hv);
+        const cuckoo_status st = cuckoo_update_fn(key, fn, hv, b.first,
+                                                  b.second);
+        unlock_two(b.first, b.second);
         return (st == ok);
     }
 
@@ -588,77 +560,85 @@ public:
     //! table, then it runs an insert with \p key and \p val. It will always
     //! succeed, since if the update fails and the insert finds the key already
     //! inserted, it can retry the update.
-    template <typename Updater>
-    void upsert(const key_type& key, Updater fn, const mapped_type& val) {
-        check_hazard_pointer();
-        check_counterid();
+    template <typename Updater, typename V>
+    typename std::enable_if<
+        std::is_convertible<Updater, updater_type>::value,
+        void>::type upsert(const key_type& key, Updater fn, V val) {
         size_t hv = hashed_key(key);
-        TableInfo* ti;
-        size_t i1, i2;
-
-        bool res;
+        cuckoo_status st;
         do {
-            std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-            HazardPointerUnsetter hpu;
-            const cuckoo_status st = cuckoo_update_fn(key, fn, hv, ti, i1, i2);
+            auto b = snapshot_and_lock_two(hv);
+            size_t hp = get_hashpower();
+            st = cuckoo_update_fn(key, fn, hv, b.first, b.second);
             if (st == ok) {
-                unlock_two(ti, i1, i2);
-                return;
+                unlock_two(b.first, b.second);
+                break;
             }
 
-            // We run an insert, since the update failed
-            res = cuckoo_insert_loop(key, val, hv, ti, i1, i2);
-
-            // The only valid reason for res being false is if insert
-            // encountered a duplicate key after releasing the locks and
-            // performing cuckoo hashing. In this case, we retry the entire
-            // upsert operation.
-        } while (!res);
-        return;
-    }
-
-    //! rehash will size the table using a hashpower of \p n. Note that the
-    //! number of buckets in the table will be 2<SUP>\p n</SUP> after expansion,
-    //! so the table will have 2<SUP>\p n</SUP> × \ref SLOT_PER_BUCKET
-    //! slots to store items in. If \p n is not larger than the current
-    //! hashpower, then the function does nothing. It returns true if the table
-    //! expansion succeeded, and false otherwise. rehash can throw an exception
-    //! if the expansion fails to allocate enough memory for the larger table.
+            // We run an insert, since the update failed. Since we already have
+            // the locks, we don't run cuckoo_insert_loop immediately, to avoid
+            // releasing and re-grabbing the locks. Recall, that the locks will
+            // be released at the end of this call to cuckoo_insert.
+            st = cuckoo_insert(key, std::forward<V>(val), hv,
+                               b.first, b.second);
+            if (st == failure_table_full) {
+                cuckoo_expand_simple(hp + 1, true);
+                // Retry until the insert doesn't fail due to expansion.
+                if (cuckoo_insert_loop(key, val, hv)) {
+                    break;
+                }
+                // The only valid reason for failure is a duplicate key. In this
+                // case, we retry the entire upsert operation.
+            }
+        } while (st != ok);
+    }
+
+    /**
+     * Resizes the table to the given hashpower. If this hashpower is not larger
+     * than the current hashpower, then it decreases the hashpower to the
+     * maximum of the specified value and the smallest hashpower that can hold
+     * all the elements currently in the table.
+     *
+     * @param n the hashpower to set for the table
+     * @return true if the table changed size, false otherwise
+     * @throw libcuckoo_maximum_hashpower_exceeded if the specified hashpower is
+     * greater than the maximum, if one was set
+     */
     bool rehash(size_t n) {
-        check_hazard_pointer();
-        TableInfo* ti = snapshot_table_nolock();
-        HazardPointerUnsetter hpu;
-        if (n <= ti->hashpower_) {
+        size_t hp = get_hashpower();
+        if (n == hp) {
             return false;
         }
-        const cuckoo_status st = cuckoo_expand_simple(n);
-        return (st == ok);
+        return cuckoo_expand_simple(n, n > hp) == ok;
     }
 
-    //! reserve will size the table to have enough slots for at least \p n
-    //! elements. If the table can already hold that many elements, the function
-    //! has no effect. Otherwise, the function will expand the table to a
-    //! hashpower sufficient to hold \p n elements. It will return true if there
-    //! was an expansion, and false otherwise. reserve can throw an exception if
-    //! the expansion fails to allocate enough memory for the larger table.
+    /**
+     * Reserve enough space in the table for the given number of elements. If
+     * the table can already hold that many elements, the function will shrink
+     * the table to the smallest hashpower that can hold the maximum of the
+     * specified amount and the current table size.
+     *
+     * @param n the number of elements to reserve space for
+     * @return true if the size of the table changed, false otherwise
+     * @throw libcuckoo_maximum_hashpower_exceeded if the specified hashpower is
+     * greater than the maximum, if one was set
+     */
     bool reserve(size_t n) {
-        check_hazard_pointer();
-        TableInfo* ti = snapshot_table_nolock();
-        HazardPointerUnsetter hpu;
-        if (n <= hashsize(ti->hashpower_) * SLOT_PER_BUCKET) {
+        size_t hp = get_hashpower();
+        size_t new_hp = reserve_calc(n);
+        if (new_hp == hp) {
             return false;
         }
-        const cuckoo_status st = cuckoo_expand_simple(reserve_calc(n));
-        return (st == ok);
+        return cuckoo_expand_simple(new_hp, new_hp > hp) == ok;
     }
 
     //! hash_function returns the hash function object used by the table.
-    hasher hash_function() const {
+    hasher hash_function() const noexcept {
         return hashfn();
     }
 
     //! key_eq returns the equality predicate object used by the table.
-    key_equal key_eq() const {
+    key_equal key_eq() const noexcept {
         return eqfn();
     }
 
@@ -677,217 +657,199 @@ public:
     }
 
 private:
-    std::atomic<TableInfo*> table_info;
 
-    // old_table_infos holds pointers to old TableInfos that were replaced
-    // during expansion. This keeps the memory alive for any leftover
-    // operations, until they are deleted by the global hazard pointer manager.
-    std::list<std::unique_ptr<TableInfo>> old_table_infos;
+    // This exception is thrown whenever we try to lock a bucket, but the
+    // hashpower is not what was expected
+    class hashpower_changed {};
+
+    // After taking a lock on the table for the given bucket, this function will
+    // check the hashpower to make sure it is the same as what it was before the
+    // lock was taken. If it isn't unlock the bucket and throw a
+    // hashpower_changed exception.
+    inline void check_hashpower(const size_t hp, const size_t lock) const {
+        if (get_hashpower() != hp) {
+            locks_[lock].unlock();
+            LIBCUCKOO_DBG("hashpower changed\n");
+            throw hashpower_changed();
+        }
+    }
 
-    // lock locks the given bucket index.
-    static inline void lock(TableInfo* ti, const size_t i) {
-        ti->locks_[lock_ind(i)].lock();
+    // locks the given bucket index.
+    //
+    // throws hashpower_changed if it changed after taking the lock.
+    inline void lock_one(const size_t hp, size_t i) const {
+        i = lock_ind(i);
+        locks_[i].lock();
+        check_hashpower(hp, i);
     }
 
-    // unlock unlocks the given bucket index.
-    static inline void unlock(TableInfo* ti, const size_t i) {
-        ti->locks_[lock_ind(i)].unlock();
+    // unlocks the given bucket index.
+    inline void unlock_one(const size_t i) const {
+        locks_[lock_ind(i)].unlock();
     }
 
-    // lock_two locks the two bucket indexes, always locking the earlier index
-    // first to avoid deadlock. If the two indexes are the same, it just locks
-    // one.
-    static void lock_two(TableInfo* ti, size_t i1, size_t i2) {
+    // locks the two bucket indexes, always locking the earlier index first to
+    // avoid deadlock. If the two indexes are the same, it just locks one.
+    //
+    // throws hashpower_changed if it changed after taking the lock.
+    void lock_two(const size_t hp, size_t i1, size_t i2) const {
         i1 = lock_ind(i1);
         i2 = lock_ind(i2);
-        if (i1 < i2) {
-            ti->locks_[i1].lock();
-            ti->locks_[i2].lock();
-        } else if (i2 < i1) {
-            ti->locks_[i2].lock();
-            ti->locks_[i1].lock();
-        } else {
-            ti->locks_[i1].lock();
+        if (i2 < i1) {
+            std::swap(i1, i2);
+        }
+        locks_[i1].lock();
+        check_hashpower(hp, i1);
+        if (i2 != i1) {
+            locks_[i2].lock();
         }
     }
 
     // unlock_two unlocks both of the given bucket indexes, or only one if they
     // are equal. Order doesn't matter here.
-    static void unlock_two(TableInfo* ti, size_t i1, size_t i2) {
+    void unlock_two(size_t i1, size_t i2) const {
         i1 = lock_ind(i1);
         i2 = lock_ind(i2);
-        ti->locks_[i1].unlock();
+        locks_[i1].unlock();
         if (i1 != i2) {
-            ti->locks_[i2].unlock();
+            locks_[i2].unlock();
         }
     }
 
     // lock_three locks the three bucket indexes in numerical order.
-    static void lock_three(TableInfo* ti, size_t i1,
-                           size_t i2, size_t i3) {
+    //
+    // throws hashpower_changed if it changed after taking the lock.
+    void lock_three(const size_t hp, size_t i1, size_t i2, size_t i3) const {
         i1 = lock_ind(i1);
         i2 = lock_ind(i2);
         i3 = lock_ind(i3);
         // If any are the same, we just run lock_two
         if (i1 == i2) {
-            lock_two(ti, i1, i3);
+            lock_two(hp, i1, i3);
         } else if (i2 == i3) {
-            lock_two(ti, i1, i3);
+            lock_two(hp, i1, i3);
         } else if (i1 == i3) {
-            lock_two(ti, i1, i2);
+            lock_two(hp, i1, i2);
         } else {
-            if (i1 < i2) {
-                if (i2 < i3) {
-                    ti->locks_[i1].lock();
-                    ti->locks_[i2].lock();
-                    ti->locks_[i3].lock();
-                } else if (i1 < i3) {
-                    ti->locks_[i1].lock();
-                    ti->locks_[i3].lock();
-                    ti->locks_[i2].lock();
-                } else {
-                    ti->locks_[i3].lock();
-                    ti->locks_[i1].lock();
-                    ti->locks_[i2].lock();
-                }
-            } else if (i2 < i3) {
-                if (i1 < i3) {
-                    ti->locks_[i2].lock();
-                    ti->locks_[i1].lock();
-                    ti->locks_[i3].lock();
-                } else {
-                    ti->locks_[i2].lock();
-                    ti->locks_[i3].lock();
-                    ti->locks_[i1].lock();
-                }
-            } else {
-                ti->locks_[i3].lock();
-                ti->locks_[i2].lock();
-                ti->locks_[i1].lock();
+            if (i2 < i1) {
+                std::swap(i1, i2);
+            }
+            if (i3 < i2) {
+                std::swap(i2, i3);
             }
+            // Now i3 is the largest, but i2 could now be less than i1
+            if (i2 < i1) {
+                std::swap(i1, i2);
+            }
+            locks_[i1].lock();
+            check_hashpower(hp, i1);
+            locks_[i2].lock();
+            locks_[i3].lock();
         }
     }
 
     // unlock_three unlocks the three given buckets
-    static void unlock_three(TableInfo* ti, size_t i1,
-                             size_t i2, size_t i3) {
+    void unlock_three(size_t i1, size_t i2, size_t i3) const {
         i1 = lock_ind(i1);
         i2 = lock_ind(i2);
         i3 = lock_ind(i3);
-        ti->locks_[i1].unlock();
+        locks_[i1].unlock();
         if (i2 != i1) {
-            ti->locks_[i2].unlock();
+            locks_[i2].unlock();
         }
         if (i3 != i1 && i3 != i2) {
-            ti->locks_[i3].unlock();
+            locks_[i3].unlock();
         }
     }
 
-    // snapshot_table_nolock loads the table info pointer and sets the hazard
-    // pointer, whithout locking anything. There is a possibility that after
-    // loading a snapshot and setting the hazard pointer, an expansion runs and
-    // create a new version of the table, leaving the old one for deletion. To
-    // deal with that, we check that the table_info we loaded is the same as the
-    // current one, and if it isn't, we try again. Whenever we check if (ti !=
-    // table_info.load()) after setting the hazard pointer, there is an ABA
-    // issue, where the address of the new table_info equals the address of a
-    // previously deleted one, however it doesn't matter, since we would still
-    // be looking at the most recent table_info in that case.
-    TableInfo* snapshot_table_nolock() const {
-        while (true) {
-            TableInfo* ti = table_info.load();
-            *hazard_pointer = ti;
-            // If the table info has changed in the time we set the hazard
-            // pointer, ti could have been deleted, so try again.
-            if (ti != table_info.load()) {
-                continue;
-            }
-            return ti;
-        }
-    }
-
-    // snapshot_and_lock_two loads the table_info pointer and locks the buckets
-    // associated with the given hash value. It returns the table_info and the
-    // two locked buckets as a tuple. Since the positions of the bucket locks
-    // depends on the number of buckets in the table, the table_info pointer
-    // needs to be grabbed first.
-    std::tuple<TableInfo*, size_t, size_t>
-    snapshot_and_lock_two(const size_t hv) const {
-        TableInfo* ti;
+    // snapshot_and_lock_two loads locks the buckets associated with the given
+    // hash value, making sure the hashpower doesn't change before the locks are
+    // taken. Thus it ensures that the buckets and locks corresponding to the
+    // hash value will stay correct as long as the locks are held. It returns
+    // the bucket indices associated with the hash value and the current
+    // hashpower.
+    std::pair<size_t, size_t>
+    snapshot_and_lock_two(const size_t hv) const noexcept {
         size_t i1, i2;
         while (true) {
-            ti = table_info.load();
-            *hazard_pointer = ti;
-            // If the table info has changed in the time we set the hazard
-            // pointer, ti could have been deleted, so try again.
-            if (ti != table_info.load()) {
-                continue;
-            }
-            i1 = index_hash(ti, hv);
-            i2 = alt_index(ti, hv, i1);
-            lock_two(ti, i1, i2);
-            // Check the table info again
-            if (ti != table_info.load()) {
-                unlock_two(ti, i1, i2);
+            // Store the current hashpower we're using to compute the buckets
+            size_t hp = get_hashpower();
+            i1 = index_hash(hp, hv);
+            i2 = alt_index(hp, partial_key(hv), i1);
+            try {
+                lock_two(hp, i1, i2);
+            } catch (hashpower_changed&) {
+                // The hashpower changed while taking the locks. Try again.
                 continue;
             }
-            return std::make_tuple(ti, i1, i2);
+            return {i1, i2};
         }
     }
 
-    // AllUnlocker is an object which releases all the locks on the given table
-    // info when it's destructor is called.
+    // A resource manager which releases all the locks upon destruction. It can
+    // only be moved, not copied.
     class AllUnlocker {
-        TableInfo* ti_;
+    private:
+        // If nullptr, do nothing
+        locks_t* locks_;
     public:
-        AllUnlocker(TableInfo* ti): ti_(ti) {}
-        ~AllUnlocker() {
-            if (ti_ != nullptr) {
-                for (size_t i = 0; i < kNumLocks; ++i) {
-                    ti_->locks_[i].unlock();
+        AllUnlocker(locks_t* locks): locks_(locks) {}
+
+        AllUnlocker(const AllUnlocker&) = delete;
+        AllUnlocker(AllUnlocker&& au) : locks_(au.locks_) {
+            au.locks_ = nullptr;
+        }
+
+        AllUnlocker& operator=(const AllUnlocker&) = delete;
+        AllUnlocker& operator=(AllUnlocker&& au) {
+            locks_ = au.locks_;
+            au.locks_ = nullptr;
+        }
+
+        void deactivate() {
+            locks_ = nullptr;
+        }
+
+        void release() {
+            if (locks_) {
+                for (auto& lock : *locks_) {
+                    lock.unlock();
                 }
+                deactivate();
             }
         }
+
+        ~AllUnlocker() {
+            release();
+        }
     };
 
-    // snapshot_and_lock_all is similar to snapshot_and_lock_two, except that it
-    // takes all the locks in the table.
-    TableInfo* snapshot_and_lock_all() const {
-        while (true) {
-            TableInfo* ti = table_info.load();
-            *hazard_pointer = ti;
-            // If the table info has changed, ti could have been deleted, so try
-            // again
-            if (ti != table_info.load()) {
-                continue;
-            }
-            for (size_t i = 0; i < kNumLocks; ++i) {
-                ti->locks_[i].lock();
-            }
-            // If the table info has changed, unlock the locks and try again.
-            if (ti != table_info.load()) {
-                AllUnlocker au(ti);
-                continue;
-            }
-            return ti;
+    // snapshot_and_lock_all takes all the locks, and returns a deleter object,
+    // that releases the locks upon destruction. Note that after taking all the
+    // locks, it is okay to change the buckets_ vector and the hashpower_, since
+    // no other threads should be accessing the buckets.
+    AllUnlocker snapshot_and_lock_all() const noexcept {
+        for (auto& lock : locks_) {
+            lock.lock();
         }
+        return AllUnlocker(&locks_);
     }
 
-    // lock_ind converts an index into buckets_ to an index into locks_.
+    // lock_ind converts an index into buckets to an index into locks.
     static inline size_t lock_ind(const size_t bucket_ind) {
         return bucket_ind & (kNumLocks - 1);
     }
 
     // hashsize returns the number of buckets corresponding to a given
     // hashpower.
-    static inline size_t hashsize(const size_t hashpower) {
-        return 1U << hashpower;
+    static inline size_t hashsize(const size_t hp) {
+        return 1U << hp;
     }
 
     // hashmask returns the bitmask for the buckets array corresponding to a
     // given hashpower.
-    static inline size_t hashmask(const size_t hashpower) {
-        return hashsize(hashpower) - 1;
+    static inline size_t hashmask(const size_t hp) {
+        return hashsize(hp) - 1;
     }
 
     // hashed_key hashes the given key.
@@ -897,36 +859,31 @@ private:
 
     // index_hash returns the first possible bucket that the given hashed key
     // could be.
-    static inline size_t index_hash(const TableInfo* ti, const size_t hv) {
-        return hv & hashmask(ti->hashpower_);
+    static inline size_t index_hash(const size_t hp, const size_t hv) {
+        return hv & hashmask(hp);
     }
 
     // alt_index returns the other possible bucket that the given hashed key
     // could be. It takes the first possible bucket as a parameter. Note that
     // this function will return the first possible bucket if index is the
-    // second possible bucket, so alt_index(ti, hv, alt_index(ti, hv,
+    // second possible bucket, so alt_index(ti, partial, alt_index(ti, partial,
     // index_hash(ti, hv))) == index_hash(ti, hv).
-    static inline size_t alt_index(
-        const TableInfo* ti, const size_t hv, const size_t index) {
-        // ensure tag is nonzero for the multiply
-        const size_t tag = (hv >> ti->hashpower_) + 1;
-        // 0x5bd1e995 is the hash constant from MurmurHash2
-        return (index ^ (tag * 0x5bd1e995)) & hashmask(ti->hashpower_);
+    static inline size_t alt_index(const size_t hp, const partial_t partial,
+                                   const size_t index) {
+        // ensure tag is nonzero for the multiply.
+        const partial_t nonzero_tag = (partial >> 1 << 1) + 1;
+        // 0xc6a4a7935bd1e995 is the hash constant from 64-bit MurmurHash2
+        const size_t hash_of_tag = nonzero_tag * 0xc6a4a7935bd1e995;
+        return (index ^ hash_of_tag) & hashmask(hp);
     }
 
     // partial_key returns a partial_t representing the upper sizeof(partial_t)
-    // bytes of the hashed key. This is used for partial-key cuckoohashing. If
-    // the key type is POD and small, we don't use partial keys, so we just
-    // return 0.
-    ENABLE_IF(static inline, is_simple, partial_t)
-        partial_key(const size_t hv) {
+    // bytes of the hashed key. This is used for partial-key cuckoohashing, and
+    // for finding the alternate bucket of that a key hashes to.
+    static inline partial_t partial_key(const size_t hv) {
         return (partial_t)(hv >> ((sizeof(size_t)-sizeof(partial_t)) * 8));
     }
 
-    ENABLE_IF(static inline, !is_simple, partial_t) partial_key(const size_t&) {
-        return 0;
-    }
-
     // A constexpr version of pow that we can use for static_asserts
     static constexpr size_t const_pow(size_t a, size_t b) {
         return (b == 0) ? 1 : a * const_pow(a, b - 1);
@@ -940,18 +897,21 @@ private:
         size_t bucket;
         size_t slot;
         key_type key;
+        partial_t partial;
     }  CuckooRecord;
 
+    typedef std::array<CuckooRecord, MAX_BFS_PATH_LEN> CuckooRecords;
+
     // b_slot holds the information for a BFS path through the table
     struct b_slot {
         // The bucket of the last item in the path
         size_t bucket;
         // a compressed representation of the slots for each of the buckets in
-        // the path. pathcode is sort of like a base-SLOT_PER_BUCKET number, and
+        // the path. pathcode is sort of like a base-slot_per_bucket number, and
         // we need to hold at most MAX_BFS_PATH_LEN slots. Thus we need the
-        // maximum pathcode to be at least SLOT_PER_BUCKET^(MAX_BFS_PATH_LEN)
+        // maximum pathcode to be at least slot_per_bucket^(MAX_BFS_PATH_LEN)
         size_t pathcode;
-        static_assert(const_pow(SLOT_PER_BUCKET, MAX_BFS_PATH_LEN) <
+        static_assert(const_pow(slot_per_bucket, MAX_BFS_PATH_LEN) <
                       std::numeric_limits<decltype(pathcode)>::max(),
                       "pathcode may not be large enough to encode a cuckoo"
                       " path");
@@ -973,14 +933,9 @@ private:
 
     // b_queue is the queue used to store b_slots for BFS cuckoo hashing.
     class b_queue {
-        // The maximum size of the BFS queue. Unless it's less than
-        // SLOT_PER_BUCKET^MAX_BFS_PATH_LEN, it won't really mean anything. If
-        // it's a power of 2, then we can quickly wrap around to the beginning
-        // of the array, so we do that.
+        // The maximum size of the BFS queue. Note that unless it's less than
+        // SLOT_PER_BUCKET^MAX_BFS_PATH_LEN, it won't really mean anything.
         static const size_t MAX_CUCKOO_COUNT = 512;
-        static_assert(const_pow(SLOT_PER_BUCKET, MAX_BFS_PATH_LEN) >=
-                      MAX_CUCKOO_COUNT, "MAX_CUCKOO_COUNT value is too large"
-                      " to be useful");
         static_assert((MAX_CUCKOO_COUNT & (MAX_CUCKOO_COUNT - 1)) == 0,
                       "MAX_CUCKOO_COUNT should be a power of 2");
         // A circular array of b_slots
@@ -1025,7 +980,10 @@ private:
     // starts with the i1 and i2 buckets, and, until it finds a bucket with an
     // empty slot, adds each slot of the bucket in the b_slot. If the queue runs
     // out of space, it fails.
-    static b_slot slot_search(TableInfo* ti, const size_t i1, const size_t i2) {
+    //
+    // throws hashpower_changed if it changed during the search
+    b_slot slot_search(const size_t hp, const size_t i1,
+                       const size_t i2) {
         b_queue q;
         // The initial pathcode informs cuckoopath_search which bucket the path
         // starts on
@@ -1034,15 +992,15 @@ private:
         while (!q.full() && !q.empty()) {
             b_slot x = q.dequeue();
             // Picks a (sort-of) random slot to start from
-            size_t starting_slot = x.pathcode % SLOT_PER_BUCKET;
-            for (size_t i = 0; i < SLOT_PER_BUCKET && !q.full();
+            size_t starting_slot = x.pathcode % slot_per_bucket;
+            for (size_t i = 0; i < slot_per_bucket && !q.full();
                  ++i) {
-                size_t slot = (starting_slot + i) % SLOT_PER_BUCKET;
-                lock(ti, x.bucket);
-                if (!ti->buckets_[x.bucket].occupied(slot)) {
+                size_t slot = (starting_slot + i) % slot_per_bucket;
+                lock_one(hp, x.bucket);
+                if (!buckets_[x.bucket].occupied(slot)) {
                     // We can terminate the search here
-                    x.pathcode = x.pathcode * SLOT_PER_BUCKET + slot;
-                    unlock(ti, x.bucket);
+                    x.pathcode = x.pathcode * slot_per_bucket + slot;
+                    unlock_one(x.bucket);
                     return x;
                 }
 
@@ -1050,11 +1008,11 @@ private:
                 // create a new b_slot item, that represents the bucket we would
                 // have come from if we kicked out the item at this slot.
                 if (x.depth < MAX_BFS_PATH_LEN - 1) {
-                    const size_t hv = hashed_key(
-                        ti->buckets_[x.bucket].key(slot));
-                    unlock(ti, x.bucket);
-                    b_slot y(alt_index(ti, hv, x.bucket),
-                             x.pathcode * SLOT_PER_BUCKET + slot, x.depth+1);
+                    const partial_t partial =
+                        buckets_[x.bucket].partial(slot);
+                    unlock_one(x.bucket);
+                    b_slot y(alt_index(hp, partial, x.bucket),
+                             x.pathcode * slot_per_bucket + slot, x.depth+1);
                     q.enqueue(y);
                 }
             }
@@ -1070,67 +1028,73 @@ private:
     // the buckets it searches, the data can change between this function and
     // cuckoopath_move. Thus cuckoopath_move checks that the data matches the
     // cuckoo path before changing it.
-    static int cuckoopath_search(TableInfo* ti, CuckooRecord* cuckoo_path,
-                                 const size_t i1, const size_t i2) {
-        b_slot x = slot_search(ti, i1, i2);
+    //
+    // throws hashpower_changed if it changed during the search
+    int cuckoopath_search(const size_t hp,
+                          CuckooRecords& cuckoo_path,
+                          const size_t i1, const size_t i2) {
+        b_slot x = slot_search(hp, i1, i2);
         if (x.depth == -1) {
             return -1;
         }
         // Fill in the cuckoo path slots from the end to the beginning
         for (int i = x.depth; i >= 0; i--) {
-            cuckoo_path[i].slot = x.pathcode % SLOT_PER_BUCKET;
-            x.pathcode /= SLOT_PER_BUCKET;
+            cuckoo_path[i].slot = x.pathcode % slot_per_bucket;
+            x.pathcode /= slot_per_bucket;
         }
         // Fill in the cuckoo_path buckets and keys from the beginning to the
         // end, using the final pathcode to figure out which bucket the path
         // starts on. Since data could have been modified between slot_search
         // and the computation of the cuckoo path, this could be an invalid
         // cuckoo_path.
-        CuckooRecord* curr = cuckoo_path;
+        CuckooRecord& first = cuckoo_path[0];
         if (x.pathcode == 0) {
-            curr->bucket = i1;
-            lock(ti, curr->bucket);
-            if (!ti->buckets_[curr->bucket].occupied(curr->slot)) {
+            first.bucket = i1;
+            lock_one(hp, first.bucket);
+            if (!buckets_[first.bucket].occupied(first.slot)) {
                 // We can terminate here
-                unlock(ti, curr->bucket);
+                unlock_one(first.bucket);
                 return 0;
             }
-            curr->key = ti->buckets_[curr->bucket].key(curr->slot);
-            unlock(ti, curr->bucket);
+            first.partial = buckets_[first.bucket].partial(first.slot);
+            first.key = buckets_[first.bucket].key(first.slot);
+            unlock_one(first.bucket);
         } else {
             assert(x.pathcode == 1);
-            curr->bucket = i2;
-            lock(ti, curr->bucket);
-            if (!ti->buckets_[curr->bucket].occupied(curr->slot)) {
+            first.bucket = i2;
+            lock_one(hp, first.bucket);
+            if (!buckets_[first.bucket].occupied(first.slot)) {
                 // We can terminate here
-                unlock(ti, curr->bucket);
+                unlock_one(first.bucket);
                 return 0;
             }
-            curr->key = ti->buckets_[curr->bucket].key(curr->slot);
-            unlock(ti, curr->bucket);
+            first.partial = buckets_[first.bucket].partial(first.slot);
+            first.key = buckets_[first.bucket].key(first.slot);
+            unlock_one(first.bucket);
         }
         for (int i = 1; i <= x.depth; ++i) {
-            CuckooRecord* prev = curr++;
-            const size_t prevhv = hashed_key(prev->key);
-            assert(prev->bucket == index_hash(ti, prevhv) ||
-                   prev->bucket == alt_index(ti, prevhv, index_hash(ti,
-                                                                    prevhv)));
+            CuckooRecord& curr = cuckoo_path[i];
+            CuckooRecord& prev = cuckoo_path[i-1];
+            const size_t prevhv = hashed_key(prev.key);
+            assert(prev.bucket == index_hash(hp, prevhv) ||
+                   prev.bucket == alt_index(hp, prev.partial,
+                                            index_hash(hp, prevhv)));
             // We get the bucket that this slot is on by computing the alternate
             // index of the previous bucket
-            curr->bucket = alt_index(ti, prevhv, prev->bucket);
-            lock(ti, curr->bucket);
-            if (!ti->buckets_[curr->bucket].occupied(curr->slot)) {
+            curr.bucket = alt_index(hp, prev.partial, prev.bucket);
+            lock_one(hp, curr.bucket);
+            if (!buckets_[curr.bucket].occupied(curr.slot)) {
                 // We can terminate here
-                unlock(ti, curr->bucket);
+                unlock_one(curr.bucket);
                 return i;
             }
-            curr->key = ti->buckets_[curr->bucket].key(curr->slot);
-            unlock(ti, curr->bucket);
+            curr.partial = buckets_[curr.bucket].partial(curr.slot);
+            curr.key = buckets_[curr.bucket].key(curr.slot);
+            unlock_one(curr.bucket);
         }
         return x.depth;
     }
 
-
     // cuckoopath_move moves keys along the given cuckoo path in order to make
     // an empty slot in one of the buckets in cuckoo_insert. Before the start of
     // this function, the two insert-locked buckets were unlocked in run_cuckoo.
@@ -1138,9 +1102,12 @@ private:
     // the last bucket it looks at (which is either i1 or i2 in run_cuckoo)
     // remains locked. If the function is unsuccessful, then both insert-locked
     // buckets will be unlocked.
-    static bool cuckoopath_move(
-        TableInfo* ti, CuckooRecord* cuckoo_path, size_t depth,
-        const size_t i1, const size_t i2) {
+    //
+
+    // throws hashpower_changed if it changed during the move
+    bool cuckoopath_move(const size_t hp,
+                         CuckooRecords& cuckoo_path, size_t depth,
+                         const size_t i1, const size_t i2) {
         if (depth == 0) {
             // There is a chance that depth == 0, when try_add_to_bucket sees i1
             // and i2 as full and cuckoopath_search finds one empty. In this
@@ -1150,22 +1117,22 @@ private:
             // locks and return true.
             const size_t bucket = cuckoo_path[0].bucket;
             assert(bucket == i1 || bucket == i2);
-            lock_two(ti, i1, i2);
-            if (!ti->buckets_[bucket].occupied(cuckoo_path[0].slot)) {
+            lock_two(hp, i1, i2);
+            if (!buckets_[bucket].occupied(cuckoo_path[0].slot)) {
                 return true;
             } else {
-                unlock_two(ti, i1, i2);
+                unlock_two(i1, i2);
                 return false;
             }
         }
 
         while (depth > 0) {
-            CuckooRecord* from = cuckoo_path + depth - 1;
-            CuckooRecord* to   = cuckoo_path + depth;
-            size_t fb = from->bucket;
-            size_t fs = from->slot;
-            size_t tb = to->bucket;
-            size_t ts = to->slot;
+            CuckooRecord& from = cuckoo_path[depth-1];
+            CuckooRecord& to   = cuckoo_path[depth];
+            size_t fb = from.bucket;
+            size_t fs = from.slot;
+            size_t tb = to.bucket;
+            size_t ts = to.slot;
 
             size_t ob = 0;
             if (depth == 1) {
@@ -1173,9 +1140,9 @@ private:
                 // lock both of them along with the slot we are swapping to,
                 // since at the end of this function, i1 and i2 must be locked.
                 ob = (fb == i1) ? i2 : i1;
-                lock_three(ti, fb, tb, ob);
+                lock_three(hp, fb, tb, ob);
             } else {
-                lock_two(ti, fb, tb);
+                lock_two(hp, fb, tb);
             }
 
             // We plan to kick out fs, but let's check if it is still there;
@@ -1183,33 +1150,31 @@ private:
             // that happened, just... try again. Also the slot we are filling in
             // may have already been filled in by another thread, or the slot we
             // are moving from may be empty, both of which invalidate the swap.
-            if (!eqfn()(ti->buckets_[fb].key(fs), from->key) ||
-                ti->buckets_[tb].occupied(ts) ||
-                !ti->buckets_[fb].occupied(fs)) {
+            if (!eqfn()(buckets_[fb].key(fs), from.key) ||
+                buckets_[tb].occupied(ts) ||
+                !buckets_[fb].occupied(fs)) {
                 if (depth == 1) {
-                    unlock_three(ti, fb, tb, ob);
+                    unlock_three(fb, tb, ob);
                 } else {
-                    unlock_two(ti, fb, tb);
+                    unlock_two(fb, tb);
                 }
                 return false;
             }
 
-            if (!is_simple) {
-                ti->buckets_[tb].partial(ts) = ti->buckets_[fb].partial(fs);
-            }
-            ti->buckets_[tb].setKV(ts, ti->buckets_[fb].key(fs),
-                                   std::move(ti->buckets_[fb].val(fs)));
-            ti->buckets_[fb].eraseKV(fs);
+            buckets_[tb].partial(ts) = buckets_[fb].partial(fs);
+            buckets_[tb].setKV(ts, buckets_[fb].key(fs),
+                               std::move(buckets_[fb].val(fs)));
+            buckets_[fb].eraseKV(fs);
             if (depth == 1) {
                 // Don't unlock fb or ob, since they are needed in
                 // cuckoo_insert. Only unlock tb if it doesn't unlock the same
                 // bucket as fb or ob.
                 if (lock_ind(tb) != lock_ind(fb) &&
                     lock_ind(tb) != lock_ind(ob)) {
-                    unlock(ti, tb);
+                    unlock_one(tb);
                 }
             } else {
-                unlock_two(ti, fb, tb);
+                unlock_two(fb, tb);
             }
             depth--;
         }
@@ -1217,17 +1182,15 @@ private:
     }
 
     // run_cuckoo performs cuckoo hashing on the table in an attempt to free up
-    // a slot on either i1 or i2. On success, the bucket and slot that was freed
-    // up is stored in insert_bucket and insert_slot. In order to perform the
-    // search and the swaps, it has to unlock both i1 and i2, which can lead to
-    // certain concurrency issues, the details of which are explained in the
-    // function. If run_cuckoo returns ok (success), then the slot it freed up
-    // is still locked. Otherwise it is unlocked.
-    cuckoo_status run_cuckoo(TableInfo* ti, const size_t i1, const size_t i2,
+    // a slot on either i1 or i2, which are assumed to be locked before the
+    // start. On success, the bucket and slot that was freed up is stored in
+    // insert_bucket and insert_slot. In order to perform the search and the
+    // swaps, it has to unlock both i1 and i2, which can lead to certain
+    // concurrency issues, the details of which are explained in the function.
+    // If run_cuckoo returns ok (success), then the slot it freed up is still
+    // locked. Otherwise it is unlocked.
+    cuckoo_status run_cuckoo(const size_t i1, const size_t i2,
                              size_t &insert_bucket, size_t &insert_slot) {
-
-        CuckooRecord cuckoo_path[MAX_BFS_PATH_LEN];
-
         // We must unlock i1 and i2 here, so that cuckoopath_search and
         // cuckoopath_move can lock buckets as desired without deadlock.
         // cuckoopath_move has to look at either i1 or i2 as its last slot, and
@@ -1241,59 +1204,55 @@ private:
         // duplication. To check for this, we search i1 and i2 for the key we
         // are trying to insert before doing so (this is done in cuckoo_insert,
         // and requires that both i1 and i2 are locked). Another problem is that
-        // an expansion runs and changes table_info, meaning the cuckoopath_move
-        // and cuckoo_insert would have operated on an old version of the table,
-        // so the insert would be invalid. For this, we check that ti ==
-        // table_info.load() after cuckoopath_move, signaling to the outer
-        // insert to try again if the comparison fails.
-        unlock_two(ti, i1, i2);
-
+        // an expansion runs and changes the hashpower, meaning the buckets may
+        // not be valid anymore. In this case, the cuckoopath functions will
+        // have thrown a hashpower_changed exception, which we catch and handle
+        // here.
+        size_t hp = get_hashpower();
+        unlock_two(i1, i2);
+
+        CuckooRecords cuckoo_path;
         bool done = false;
-        while (!done) {
-            int depth = cuckoopath_search(ti, cuckoo_path, i1, i2);
-            if (depth < 0) {
-                break;
-            }
+        try {
+            while (!done) {
+                int depth = cuckoopath_search(hp, cuckoo_path, i1, i2);
+                if (depth < 0) {
+                    break;
+                }
 
-            if (cuckoopath_move(ti, cuckoo_path, depth, i1, i2)) {
-                insert_bucket = cuckoo_path[0].bucket;
-                insert_slot = cuckoo_path[0].slot;
-                assert(insert_bucket == i1 || insert_bucket == i2);
-                assert(!ti->locks_[lock_ind(i1)].try_lock());
-                assert(!ti->locks_[lock_ind(i2)].try_lock());
-                assert(!ti->buckets_[insert_bucket].occupied(insert_slot));
-                done = true;
-                break;
+                if (cuckoopath_move(hp, cuckoo_path, depth, i1, i2)) {
+                    insert_bucket = cuckoo_path[0].bucket;
+                    insert_slot = cuckoo_path[0].slot;
+                    assert(insert_bucket == i1 || insert_bucket == i2);
+                    assert(!locks_[lock_ind(i1)].try_lock());
+                    assert(!locks_[lock_ind(i2)].try_lock());
+                    assert(!buckets_[insert_bucket].occupied(insert_slot));
+                    done = true;
+                    break;
+                }
             }
-        }
-
-        if (!done) {
-            return failure;
-        } else if (ti != table_info.load()) {
-            // Unlock i1 and i2 and signal to cuckoo_insert to try again. Since
-            // we set the hazard pointer to be ti, this check isn't susceptible
-            // to an ABA issue, since a new pointer can't have the same address
-            // as ti.
-            unlock_two(ti, i1, i2);
+        } catch (hashpower_changed&) {
+            // The hashpower changed while we were trying to cuckoo, which means
+            // we want to retry. i1 and i2 should not be locked in this case.
             return failure_under_expansion;
         }
-        return ok;
+        return done ? ok : failure;
     }
 
     // try_read_from_bucket will search the bucket for the given key and store
     // the associated value if it finds it.
-    ENABLE_IF(static, value_copy_assignable, bool) try_read_from_bucket(
-        const TableInfo* ti, const partial_t partial,
-        const key_type &key, mapped_type &val, const size_t i) {
-        for (size_t j = 0; j < SLOT_PER_BUCKET; ++j) {
-            if (!ti->buckets_[i].occupied(j)) {
+    ENABLE_IF(, value_copy_assignable, bool)
+    try_read_from_bucket(const partial_t partial, const key_type &key,
+                         mapped_type &val, const size_t i) const {
+        for (size_t j = 0; j < slot_per_bucket; ++j) {
+            if (!buckets_[i].occupied(j)) {
                 continue;
             }
-            if (!is_simple && partial != ti->buckets_[i].partial(j)) {
+            if (!is_simple && partial != buckets_[i].partial(j)) {
                 continue;
             }
-            if (eqfn()(key, ti->buckets_[i].key(j))) {
-                val = ti->buckets_[i].val(j);
+            if (eqfn()(key, buckets_[i].key(j))) {
+                val = buckets_[i].val(j);
                 return true;
             }
         }
@@ -1302,17 +1261,16 @@ private:
 
     // check_in_bucket will search the bucket for the given key and return true
     // if the key is in the bucket, and false if it isn't.
-    static bool check_in_bucket(
-        const TableInfo* ti, const partial_t partial,
-        const key_type &key, const size_t i) {
-        for (size_t j = 0; j < SLOT_PER_BUCKET; ++j) {
-            if (!ti->buckets_[i].occupied(j)) {
+    bool check_in_bucket(const partial_t partial, const key_type &key,
+                         const size_t i) const {
+        for (size_t j = 0; j < slot_per_bucket; ++j) {
+            if (!buckets_[i].occupied(j)) {
                 continue;
             }
-            if (!is_simple && partial != ti->buckets_[i].partial(j)) {
+            if (!is_simple && partial != buckets_[i].partial(j)) {
                 continue;
             }
-            if (eqfn()(key, ti->buckets_[i].key(j))) {
+            if (eqfn()(key, buckets_[i].key(j))) {
                 return true;
             }
         }
@@ -1321,32 +1279,29 @@ private:
 
     // add_to_bucket will insert the given key-value pair into the slot.
     template <class V>
-    static void add_to_bucket(TableInfo* ti, const partial_t partial,
-                              const key_type &key, V val,
-                              const size_t i, const size_t j) {
-        assert(!ti->buckets_[i].occupied(j));
-        if (!is_simple) {
-            ti->buckets_[i].partial(j) = partial;
-        }
-        ti->buckets_[i].setKV(j, key, std::forward<V>(val));
-        ti->num_inserts[counterid].num.fetch_add(1, std::memory_order_relaxed);
+    void add_to_bucket(const partial_t partial, const key_type &key,
+                       V&& val, const size_t i, const size_t j) {
+        assert(!buckets_[i].occupied(j));
+        buckets_[i].partial(j) = partial;
+        buckets_[i].setKV(j, key, std::forward<V>(val));
+        num_inserts_[get_counterid()].num.fetch_add(
+            1, std::memory_order_relaxed);
     }
 
     // try_find_insert_bucket will search the bucket and store the index of an
     // empty slot if it finds one, or -1 if it doesn't. Regardless, it will
     // search the entire bucket and return false if it finds the key already in
     // the table (duplicate key error) and true otherwise.
-    static bool try_find_insert_bucket(
-        TableInfo* ti, const partial_t partial,
-        const key_type &key, const size_t i, int& j) {
+    bool try_find_insert_bucket(const partial_t partial, const key_type &key,
+                                const size_t i, int& j) const {
         j = -1;
         bool found_empty = false;
-        for (size_t k = 0; k < SLOT_PER_BUCKET; ++k) {
-            if (ti->buckets_[i].occupied(k)) {
-                if (!is_simple && partial != ti->buckets_[i].partial(k)) {
+        for (size_t k = 0; k < slot_per_bucket; ++k) {
+            if (buckets_[i].occupied(k)) {
+                if (!is_simple && partial != buckets_[i].partial(k)) {
                     continue;
                 }
-                if (eqfn()(key, ti->buckets_[i].key(k))) {
+                if (eqfn()(key, buckets_[i].key(k))) {
                     return false;
                 }
             } else {
@@ -1361,18 +1316,18 @@ private:
 
     // try_del_from_bucket will search the bucket for the given key, and set the
     // slot of the key to empty if it finds it.
-    static bool try_del_from_bucket(TableInfo* ti, const partial_t partial,
-                                    const key_type &key, const size_t i) {
-        for (size_t j = 0; j < SLOT_PER_BUCKET; ++j) {
-            if (!ti->buckets_[i].occupied(j)) {
+    bool try_del_from_bucket(const partial_t partial,
+                             const key_type &key, const size_t i) {
+        for (size_t j = 0; j < slot_per_bucket; ++j) {
+            if (!buckets_[i].occupied(j)) {
                 continue;
             }
-            if (!is_simple && ti->buckets_[i].partial(j) != partial) {
+            if (!is_simple && buckets_[i].partial(j) != partial) {
                 continue;
             }
-            if (eqfn()(ti->buckets_[i].key(j), key)) {
-                ti->buckets_[i].eraseKV(j);
-                ti->num_deletes[counterid].num.fetch_add(
+            if (eqfn()(buckets_[i].key(j), key)) {
+                buckets_[i].eraseKV(j);
+                num_deletes_[get_counterid()].num.fetch_add(
                     1, std::memory_order_relaxed);
                 return true;
             }
@@ -1382,18 +1337,18 @@ private:
 
     // try_update_bucket will search the bucket for the given key and change its
     // associated value if it finds it.
-    ENABLE_IF(static, value_copy_assignable, bool) try_update_bucket(
-        TableInfo* ti, const partial_t partial,
-        const key_type &key, const mapped_type &value, const size_t i) {
-        for (size_t j = 0; j < SLOT_PER_BUCKET; ++j) {
-            if (!ti->buckets_[i].occupied(j)) {
+    ENABLE_IF(, value_copy_assignable, bool)
+    try_update_bucket(const partial_t partial, const key_type &key,
+                      const mapped_type &value, const size_t i) {
+        for (size_t j = 0; j < slot_per_bucket; ++j) {
+            if (!buckets_[i].occupied(j)) {
                 continue;
             }
-            if (!is_simple && ti->buckets_[i].partial(j) != partial) {
+            if (!is_simple && buckets_[i].partial(j) != partial) {
                 continue;
             }
-            if (eqfn()(ti->buckets_[i].key(j), key)) {
-                ti->buckets_[i].val(j) = value;
+            if (eqfn()(buckets_[i].key(j), key)) {
+                buckets_[i].val(j) = value;
                 return true;
             }
         }
@@ -1403,18 +1358,17 @@ private:
     // try_update_bucket_fn will search the bucket for the given key and change
     // its associated value with the given function if it finds it.
     template <typename Updater>
-    static bool try_update_bucket_fn(
-        TableInfo* ti, const partial_t partial,
-        const key_type &key, Updater fn, const size_t i) {
-        for (size_t j = 0; j < SLOT_PER_BUCKET; ++j) {
-            if (!ti->buckets_[i].occupied(j)) {
+    bool try_update_bucket_fn(const partial_t partial, const key_type &key,
+                              Updater fn, const size_t i) {
+        for (size_t j = 0; j < slot_per_bucket; ++j) {
+            if (!buckets_[i].occupied(j)) {
                 continue;
             }
-            if (!is_simple && ti->buckets_[i].partial(j) != partial) {
+            if (!is_simple && buckets_[i].partial(j) != partial) {
                 continue;
             }
-            if (eqfn()(ti->buckets_[i].key(j), key)) {
-                fn(ti->buckets_[i].val(j));
+            if (eqfn()(buckets_[i].key(j), key)) {
+                fn(buckets_[i].val(j));
                 return true;
             }
         }
@@ -1424,15 +1378,14 @@ private:
     // cuckoo_find searches the table for the given key and value, storing the
     // value in the val if it finds the key. It expects the locks to be taken
     // and released outside the function.
-    ENABLE_IF(static, value_copy_assignable, cuckoo_status)
-        cuckoo_find(const key_type& key, mapped_type& val,
-                    const size_t hv, const TableInfo* ti,
-                    const size_t i1, const size_t i2) {
+    ENABLE_IF(, value_copy_assignable, cuckoo_status)
+    cuckoo_find(const key_type& key, mapped_type& val,
+                const size_t hv, const size_t i1, const size_t i2) const {
         const partial_t partial = partial_key(hv);
-        if (try_read_from_bucket(ti, partial, key, val, i1)) {
+        if (try_read_from_bucket(partial, key, val, i1)) {
             return ok;
         }
-        if (try_read_from_bucket(ti, partial, key, val, i2)) {
+        if (try_read_from_bucket(partial, key, val, i2)) {
             return ok;
         }
         return failure_key_not_found;
@@ -1441,14 +1394,13 @@ private:
     // cuckoo_contains searches the table for the given key, returning true if
     // it's in the table and false otherwise. It expects the locks to be taken
     // and released outside the function.
-    static bool cuckoo_contains(const key_type& key,
-                                const size_t hv, const TableInfo* ti,
-                                const size_t i1, const size_t i2) {
+    bool cuckoo_contains(const key_type& key, const size_t hv,
+                         const size_t i1, const size_t i2) const {
         const partial_t partial = partial_key(hv);
-        if (check_in_bucket(ti, partial, key, i1)) {
+        if (check_in_bucket(partial, key, i1)) {
             return true;
         }
-        if (check_in_bucket(ti, partial, key, i2)) {
+        if (check_in_bucket(partial, key, i2)) {
             return true;
         }
         return false;
@@ -1462,91 +1414,94 @@ private:
     // hashing presents multiple concurrency issues, which are explained in the
     // function.
     template <class V>
-    cuckoo_status cuckoo_insert(const key_type &key, V val,
-                                const size_t hv, TableInfo* ti,
+    cuckoo_status cuckoo_insert(const key_type &key, V&& val, const size_t hv,
                                 const size_t i1, const size_t i2) {
         int res1, res2;
         const partial_t partial = partial_key(hv);
-        if (!try_find_insert_bucket(ti, partial, key, i1, res1)) {
-            unlock_two(ti, i1, i2);
+        if (!try_find_insert_bucket(partial, key, i1, res1)) {
+            unlock_two(i1, i2);
             return failure_key_duplicated;
         }
-        if (!try_find_insert_bucket(ti, partial, key, i2, res2)) {
-            unlock_two(ti, i1, i2);
+        if (!try_find_insert_bucket(partial, key, i2, res2)) {
+            unlock_two(i1, i2);
             return failure_key_duplicated;
         }
         if (res1 != -1) {
-            add_to_bucket(ti, partial, key, std::forward<V>(val), i1, res1);
-            unlock_two(ti, i1, i2);
+            add_to_bucket(partial, key, std::forward<V>(val), i1, res1);
+            unlock_two(i1, i2);
             return ok;
         }
         if (res2 != -1) {
-            add_to_bucket(ti, partial, key, std::forward<V>(val), i2, res2);
-            unlock_two(ti, i1, i2);
+            add_to_bucket(partial, key, std::forward<V>(val), i2, res2);
+            unlock_two(i1, i2);
             return ok;
         }
 
         // we are unlucky, so let's perform cuckoo hashing
         size_t insert_bucket = 0;
         size_t insert_slot = 0;
-        cuckoo_status st = run_cuckoo(ti, i1, i2, insert_bucket, insert_slot);
+        cuckoo_status st = run_cuckoo(i1, i2, insert_bucket, insert_slot);
         if (st == failure_under_expansion) {
             // The run_cuckoo operation operated on an old version of the table,
             // so we have to try again. We signal to the calling insert method
             // to try again by returning failure_under_expansion.
             return failure_under_expansion;
         } else if (st == ok) {
-            assert(!ti->locks_[lock_ind(i1)].try_lock());
-            assert(!ti->locks_[lock_ind(i2)].try_lock());
-            assert(!ti->buckets_[insert_bucket].occupied(insert_slot));
-            assert(insert_bucket == index_hash(ti, hv) ||
-                   insert_bucket == alt_index(ti, hv, index_hash(ti, hv)));
+            assert(!locks_[lock_ind(i1)].try_lock());
+            assert(!locks_[lock_ind(i2)].try_lock());
+            assert(!buckets_[insert_bucket].occupied(insert_slot));
+            assert(insert_bucket == index_hash(get_hashpower(), hv) ||
+                   insert_bucket == alt_index(get_hashpower(), partial,
+                                              index_hash(get_hashpower(), hv)));
             // Since we unlocked the buckets during run_cuckoo, another insert
             // could have inserted the same key into either i1 or i2, so we
             // check for that before doing the insert.
-            if (cuckoo_contains(key, hv, ti, i1, i2)) {
-                unlock_two(ti, i1, i2);
+            if (cuckoo_contains(key, hv, i1, i2)) {
+                unlock_two(i1, i2);
                 return failure_key_duplicated;
             }
-            add_to_bucket(ti, partial, key, std::forward<V>(val),
+            add_to_bucket(partial, key, std::forward<V>(val),
                           insert_bucket, insert_slot);
-            unlock_two(ti, i1, i2);
+            unlock_two(i1, i2);
             return ok;
         }
         assert(st == failure);
         LIBCUCKOO_DBG("hash table is full (hashpower = %zu, hash_items = %zu,"
                       "load factor = %.2f), need to increase hashpower\n",
-                      ti->hashpower_, cuckoo_size(ti), cuckoo_loadfactor(ti));
+                      get_hashpower(), cuckoo_size(),
+                      cuckoo_loadfactor(get_hashpower()));
         return failure_table_full;
     }
 
-    // We run cuckoo_insert in a loop until it succeeds in insert and upsert, so
-    // we pulled out the loop to avoid duplicating it. This should be called
-    // directly after snapshot_and_lock_two, and by the end of the function, the
-    // hazard pointer will have been unset.
+    /**
+     * We run cuckoo_insert in a loop until it succeeds in insert and upsert, so
+     * we pulled out the loop to avoid duplicating logic
+     *
+     * @param key the key to insert
+     * @param val the value to insert
+     * @param hv the hash value of the key
+     * @return true if the insert succeeded, false if there was a duplicate key
+     * @throw libcuckoo_load_factor_too_low if expansion is necessary, but the
+     * load factor of the table is below the threshold
+     */
     template <class V>
-    bool cuckoo_insert_loop(const key_type& key, V val,
-                            size_t hv, TableInfo* ti, size_t i1, size_t i2) {
-        cuckoo_status st = cuckoo_insert(key, std::forward<V>(val),
-                                         hv, ti, i1, i2);
-        while (st != ok) {
-            // If the insert failed with failure_key_duplicated, it returns here
+    bool cuckoo_insert_loop(const key_type& key, V&& val, size_t hv) {
+        cuckoo_status st;
+        do {
+            auto b = snapshot_and_lock_two(hv);
+            size_t hp = get_hashpower();
+            st = cuckoo_insert(key, std::forward<V>(val), hv,
+                               b.first, b.second);
             if (st == failure_key_duplicated) {
                 return false;
-            }
-            // If it failed with failure_under_expansion, the insert operated on
-            // an old version of the table, so we just try again. If it's
-            // failure_table_full, we have to expand the table before trying
-            // again.
-            if (st == failure_table_full) {
-                if (cuckoo_expand_simple(ti->hashpower_+1) ==
-                    failure_under_expansion) {
-                    LIBCUCKOO_DBG("expansion is on-going\n");
+            } else if (st == failure_table_full) {
+                if (cuckoo_loadfactor(hp) < minimum_load_factor()) {
+                    throw libcuckoo_load_factor_too_low(minimum_load_factor());
                 }
+                // Expand the table and try again
+                cuckoo_expand_simple(hp + 1, true);
             }
-            std::tie(ti, i1, i2) = snapshot_and_lock_two(hv);
-            st = cuckoo_insert(key, std::forward<V>(val), hv, ti, i1, i2);
-        }
+        } while (st != ok);
         return true;
     }
 
@@ -1554,13 +1509,12 @@ private:
     // that key to empty if it finds it. It expects the locks to be taken and
     // released outside the function.
     cuckoo_status cuckoo_delete(const key_type &key, const size_t hv,
-                                TableInfo* ti, const size_t i1,
-                                const size_t i2) {
+                                const size_t i1, const size_t i2) {
         const partial_t partial = partial_key(hv);
-        if (try_del_from_bucket(ti, partial, key, i1)) {
+        if (try_del_from_bucket(partial, key, i1)) {
             return ok;
         }
-        if (try_del_from_bucket(ti, partial, key, i2)) {
+        if (try_del_from_bucket(partial, key, i2)) {
             return ok;
         }
         return failure_key_not_found;
@@ -1570,14 +1524,13 @@ private:
     // if it finds it. It expects the locks to be taken and released outside the
     // function.
     ENABLE_IF(, value_copy_assignable, cuckoo_status)
-    cuckoo_update(const key_type &key, const mapped_type &val,
-                                const size_t hv, TableInfo* ti,
-                                const size_t i1, const size_t i2) {
+    cuckoo_update(const key_type &key, const mapped_type &val, const size_t hv,
+                  const size_t i1, const size_t i2) {
         const partial_t partial = partial_key(hv);
-        if (try_update_bucket(ti, partial, key, val, i1)) {
+        if (try_update_bucket(partial, key, val, i1)) {
             return ok;
         }
-        if (try_update_bucket(ti, partial, key, val, i2)) {
+        if (try_update_bucket(partial, key, val, i2)) {
             return ok;
         }
         return failure_key_not_found;
@@ -1589,557 +1542,420 @@ private:
     // outside the function.
     template <typename Updater>
     cuckoo_status cuckoo_update_fn(const key_type &key, Updater fn,
-                                   const size_t hv, TableInfo* ti,
+                                   const size_t hv,
                                    const size_t i1, const size_t i2) {
         const partial_t partial = partial_key(hv);
-        if (try_update_bucket_fn(ti, partial, key, fn, i1)) {
+        if (try_update_bucket_fn(partial, key, fn, i1)) {
             return ok;
         }
-        if (try_update_bucket_fn(ti, partial, key, fn, i2)) {
+        if (try_update_bucket_fn(partial, key, fn, i2)) {
             return ok;
         }
         return failure_key_not_found;
     }
 
-    // cuckoo_init initializes the hashtable, given an initial hashpower as the
-    // argument.
-    cuckoo_status cuckoo_init(const size_t hashpower) {
-        table_info.store(new TableInfo(hashpower));
-        cuckoo_clear(table_info.load());
-        return ok;
-    }
-
     // cuckoo_clear empties the table, calling the destructors of all the
     // elements it removes from the table. It assumes the locks are taken as
     // necessary.
-    cuckoo_status cuckoo_clear(TableInfo* ti) {
-        const size_t num_buckets = ti->buckets_.size();
-        ti->buckets_.clear();
-        ti->buckets_.resize(num_buckets);
-        for (size_t i = 0; i < ti->num_inserts.size(); ++i) {
-            ti->num_inserts[i].num.store(0);
-            ti->num_deletes[i].num.store(0);
+    cuckoo_status cuckoo_clear() noexcept {
+        for (Bucket& b : buckets_) {
+            b.clear();
+        }
+        for (size_t i = 0; i < num_inserts_.size(); ++i) {
+            num_inserts_[i].num.store(0);
+            num_deletes_[i].num.store(0);
         }
         return ok;
     }
 
     // cuckoo_size returns the number of elements in the given table.
-    size_t cuckoo_size(const TableInfo* ti) const {
+    size_t cuckoo_size() const noexcept {
         size_t inserts = 0;
         size_t deletes = 0;
-        for (size_t i = 0; i < ti->num_inserts.size(); ++i) {
-            inserts += ti->num_inserts[i].num.load();
-            deletes += ti->num_deletes[i].num.load();
+        for (size_t i = 0; i < num_inserts_.size(); ++i) {
+            inserts += num_inserts_[i].num.load();
+            deletes += num_deletes_[i].num.load();
         }
         return inserts-deletes;
     }
 
     // cuckoo_loadfactor returns the load factor of the given table.
-    double cuckoo_loadfactor(const TableInfo* ti) const {
-        return static_cast<double>(cuckoo_size(ti)) / SLOT_PER_BUCKET /
-            hashsize(ti->hashpower_);
+    double cuckoo_loadfactor(const size_t hp) const noexcept {
+        return (static_cast<double>(cuckoo_size()) / slot_per_bucket /
+                hashsize(hp));
     }
 
     // insert_into_table is a helper function used by cuckoo_expand_simple to
     // fill up the new table.
     static void insert_into_table(
-        cuckoohash_map<Key, T, Hash, Pred>& new_map, const TableInfo* old_ti,
-        size_t i, size_t end) {
-        for (;i < end; ++i) {
-            for (size_t j = 0; j < SLOT_PER_BUCKET; ++j) {
-                if (old_ti->buckets_[i].occupied(j)) {
+        cuckoohash_map<Key, T, Hash, Pred, Alloc, slot_per_bucket>& new_map,
+        buckets_t& buckets, size_t i, size_t end) {
+        for (; i < end; ++i) {
+            for (size_t j = 0; j < slot_per_bucket; ++j) {
+                if (buckets[i].occupied(j)) {
                     new_map.insert(
-                        old_ti->buckets_[i].key(j),
-                        std::move((mapped_type&)old_ti->buckets_[i].val(j)));
+                        buckets[i].key(j),
+                        std::move((mapped_type&)buckets[i].val(j)));
                 }
             }
         }
     }
 
-    // cuckoo_expand_simple is a simpler version of expansion than
-    // cuckoo_expand, which will double the size of the existing hash table. It
-    // needs to take all the bucket locks, since no other operations can change
-    // the table during expansion. If some other thread is holding the expansion
-    // thread at the time, then it will return failure_under_expansion.
-    cuckoo_status cuckoo_expand_simple(size_t n) {
-        TableInfo* ti = snapshot_and_lock_all();
-        assert(ti == table_info.load());
-        AllUnlocker au(ti);
-        HazardPointerUnsetter hpu;
-        if (n <= ti->hashpower_) {
+    // cuckoo_expand_simple will resize the table to at least the given
+    // new_hashpower. If is_expansion is true, new_hashpower must be greater
+    // than the current size of the table. If it's false, then new_hashpower
+    // must be less. When we're shrinking the table, if the current table
+    // contains more elements than can be held by new_hashpower, the resulting
+    // hashpower will be greater than new_hashpower. It needs to take all the
+    // bucket locks, since no other operations can change the table during
+    // expansion. Throws libcuckoo_maximum_hashpower_exceeded if we're expanding
+    // beyond the maximum hashpower, and we have an actual limit.
+    cuckoo_status cuckoo_expand_simple(size_t new_hp,
+                                       bool is_expansion) {
+        size_t mhp = maximum_hashpower();
+        if (mhp != NO_MAXIMUM_HASHPOWER && new_hp > mhp) {
+            throw libcuckoo_maximum_hashpower_exceeded(new_hp);
+        }
+        auto unlocker = snapshot_and_lock_all();
+        const size_t hp = get_hashpower();
+        if ((is_expansion && new_hp <= hp) ||
+            (!is_expansion && new_hp >= hp)) {
             // Most likely another expansion ran before this one could grab the
             // locks
+            LIBCUCKOO_DBG("another expansion is on-going\n");
             return failure_under_expansion;
         }
 
-        // Creates a new hash table with hashpower n and adds all the
-        // elements from the old buckets
-        cuckoohash_map<Key, T, Hash, Pred> new_map(hashsize(n) * SLOT_PER_BUCKET);
+        // Creates a new hash table with hashpower new_hp and adds all
+        // the elements from the old buckets
+        cuckoohash_map<Key, T, Hash, Pred, Alloc, slot_per_bucket> new_map(
+            hashsize(new_hp) * slot_per_bucket);
         const size_t threadnum = kNumCores();
-        const size_t buckets_per_thread =
-            hashsize(ti->hashpower_) / threadnum;
+        const size_t buckets_per_thread = (
+            (hashsize(hp) + threadnum - 1) / threadnum);
         std::vector<std::thread> insertion_threads(threadnum);
-        for (size_t i = 0; i < threadnum-1; ++i) {
+        for (size_t i = 0; i < threadnum; ++i) {
             insertion_threads[i] = std::thread(
-                insert_into_table, std::ref(new_map),
-                ti, i*buckets_per_thread, (i+1)*buckets_per_thread);
+                insert_into_table, std::ref(new_map), std::ref(buckets_),
+                i*buckets_per_thread, std::min((i+1)*buckets_per_thread,
+                                               hashsize(hp)));
         }
-        insertion_threads[threadnum-1] = std::thread(
-            insert_into_table, std::ref(new_map), ti,
-            (threadnum-1)*buckets_per_thread, hashsize(ti->hashpower_));
         for (size_t i = 0; i < threadnum; ++i) {
             insertion_threads[i].join();
         }
-        // Sets this table_info to new_map's. It then sets new_map's
-        // table_info to nullptr, so that it doesn't get deleted when
-        // new_map goes out of scope
-        table_info.store(new_map.table_info.load());
-        new_map.table_info.store(nullptr);
 
-        // Rather than deleting ti now, we store it in old_table_infos. We then
-        // run a delete_unused routine to delete all the old table pointers.
-        old_table_infos.push_back(std::move(std::unique_ptr<TableInfo>(ti)));
-        global_hazard_pointers.delete_unused(old_table_infos);
+        // Swap the current buckets vector with new_map's and set the hashpower.
+        // This is okay, because we have all the locks, so nobody else should be
+        // reading from the buckets array. Then the old buckets array will be
+        // deleted when new_map is deleted. All the locks should be released by
+        // the unlocker as well.
+        std::swap(buckets_, new_map.buckets_);
+        set_hashpower(new_map.hashpower_);
         return ok;
     }
 
-    // Iterator definitions
-    friend class const_iterator;
-    friend class iterator;
-
 public:
-    //! A const_iterator is an iterator through the table that is thread safe.
-    //! For the duration of its existence, it takes all the locks on the table
-    //! it is given, thereby ensuring that no other threads can modify the table
-    //! while the iterator is in use. Note that this also means that only one
-    //! iterator can be active on a table at one time and furthermore that all
-    //! operations on the table, except the \ref size, \ref empty, \ref
-    //! hashpower, \ref bucket_count, and \ref load_factor methods, will stall
-    //! until the iterator loses its lock. For this reason, we suggest using the
-    //! \ref snapshot_table method if possible, since it is less error-prone.
-    //! The iterator allows movement forward and backward through the table as
-    //! well as dereferencing items in the table. It maintains the invariant
-    //! that the iterator is either an end iterator (which points past the end
-    //! of the table), or points to a filled slot. As soon as the iterator
-    //! looses its lock on the table, all dereference and movement operations
-    //! will throw an exception.
-    class const_iterator {
-        // The constructor locks the entire table, retrying until
-        // snapshot_and_lock_all succeeds. Then it calculates end_pos and
-        // begin_pos and sets index and slot to the beginning or end of the
-        // table, based on the boolean argument. We keep this constructor
-        // private (but expose it to the cuckoohash_map class), since we don't
-        // want users calling it.
-        const_iterator(const cuckoohash_map<Key, T, Hash, Pred>& hm,
-                       bool is_end) : hm_(hm) {
-            cuckoohash_map<Key, T, Hash, Pred>::check_hazard_pointer();
-            ti_ = hm_.snapshot_and_lock_all();
-            assert(ti_ == hm_.table_info.load());
-
-            has_table_lock = true;
-
-            index_ = slot_ = 0;
-
-            set_end(end_pos.first, end_pos.second);
-            set_begin(begin_pos.first, begin_pos.second);
-            if (is_end) {
-                index_ = end_pos.first;
-                slot_ = end_pos.second;
-            } else {
-                index_ = begin_pos.first;
-                slot_ = begin_pos.second;
-            }
-        }
+    //! A locked_table is an ownership wrapper around a \ref cuckoohash_map
+    //! table instance. When given a table instance, it takes all the locks on
+    //! the table, blocking all outside operations on the table. Because the
+    //! locked_table has unique ownership of the table, it can provide a set of
+    //! operations on the table that aren't possible in a concurrent context.
+    //! Right now, this includes the ability to construct STL-compatible
+    //! iterators on the table. When the locked_table is destroyed (or the \ref
+    //! release method is called), it will release all locks on the table. This
+    //! will invalidate all existing iterators.
+    class locked_table {
+        // A manager for all the locks we took on the table.
+        AllUnlocker unlocker_;
+        // A reference to the buckets owned by the table
+        std::reference_wrapper<buckets_t> buckets_;
+        // A boolean shared to all iterators, indicating whether the
+        // locked_table has ownership of the hashtable or not.
+        std::shared_ptr<bool> has_table_lock_;
 
-        friend class cuckoohash_map<Key, T, Hash, Pred>;
+        // The constructor locks the entire table, retrying until
+        // snapshot_and_lock_all succeeds. We keep this constructor private (but
+        // expose it to the cuckoohash_map class), since we don't want users
+        // calling it.
+        locked_table(cuckoohash_map<Key, T, Hash, Pred, Alloc,
+                     SLOT_PER_BUCKET>& hm)
+            : unlocker_(std::move(hm.snapshot_and_lock_all())),
+              buckets_(hm.buckets_),
+              has_table_lock_(new bool(true)) {}
 
     public:
-        //! This is an rvalue-reference constructor that takes the lock from \p
-        //! it and copies its state. To create an iterator from scratch, call
-        //! the \ref cbegin or \ref cend methods of cuckoohash_map.
-        const_iterator(const_iterator&& it)
-            : hm_(it.hm_) {
-            if (this == &it) {
-                return;
-            }
-            memcpy(this, &it, sizeof(const_iterator));
-            it.has_table_lock = false;
+        locked_table(locked_table&& lt)
+            : unlocker_(std::move(lt.unlocker_)),
+              buckets_(std::move(lt.buckets_)),
+              has_table_lock_(std::move(lt.has_table_lock_)) {}
+
+        locked_table& operator=(locked_table&& lt) {
+            release();
+            unlocker_ = std::move(lt.unlocker_);
+            buckets_ = std::move(lt.buckets_);
+            has_table_lock_ = std::move(lt.has_table_lock_);
+            return *this;
         }
 
-        //! The assignment operator behaves identically to the rvalue-reference
-        //! constructor.
-        const_iterator* operator=(const_iterator&& it) {
-            if (this == &it) {
-                return this;
-            }
-            memcpy(this, &it, sizeof(const_iterator));
-            it.has_table_lock = false;
-            return this;
+        //! Returns true if the locked table still has ownership of the
+        //! hashtable, false otherwise.
+        bool has_table_lock() const noexcept {
+            return has_table_lock_ && *has_table_lock_;
         }
 
         //! release unlocks the table, thereby freeing it up for other
-        //! operations, but also invalidating all future operations with this
-        //! iterator.
-        void release() {
-            if (has_table_lock) {
-                AllUnlocker au(ti_);
-                cuckoohash_map<Key, T, Hash, Pred>::HazardPointerUnsetter hpu;
-                has_table_lock = false;
+        //! operations, but also invalidating all iterators and future
+        //! operations with this table. It is idempotent.
+        void release() noexcept {
+            if (has_table_lock()) {
+                unlocker_.release();
+                *has_table_lock_ = false;
             }
         }
 
-        //! The destructor simply calls \ref release.
-        ~const_iterator() {
+        ~locked_table() {
             release();
         }
 
-        //! is_end returns true if the iterator is at end_pos, which means it is
-        //! past the end of the table.
-        bool is_end() const {
-            return (index_ == end_pos.first && slot_ == end_pos.second);
-        }
-
-        //! is_begin returns true if the iterator is at begin_pos, which means
-        //! it is at the first item in the table.
-        bool is_begin() const {
-            return (index_ == begin_pos.first && slot_ == begin_pos.second);
-        }
-
-    protected:
-        // For the arrow dereference operator, we return a pointer to a
-        // lightweight pair consisting of const references to the key and value
-        // under the iterator.
-        typedef std::pair<const Key&, const T&> ref_pair;
-        // Since we can't initialize a ref_pair before knowing what it points
-        // to, we use std::aligned_storage to reserve unititialized space for
-        // the object, which we then construct with placement new. Since this
-        // isn't really part of the logical iterator state, we make it mutable.
-        mutable typename std::aligned_storage<sizeof(ref_pair),
-                                              alignof(ref_pair)>::type data;
-
-    public:
-        //! The dereference operator returns a value_type copied from the
-        //! key-value pair under the iterator.
-        value_type operator*() const {
-            check_lock();
-            if (is_end()) {
-                throw end_dereference;
-            }
-            assert(ti_->buckets_[index_].occupied(slot_));
-            return {ti_->buckets_[index_].key(slot_),
-                    ti_->buckets_[index_].val(slot_)};
-        }
-
-        //! The arrow dereference operator returns a pointer to an internal
-        //! std::pair which contains const references to the key and value
-        //! under the iterator.
-        ref_pair* operator->() const {
-            check_lock();
-            if (is_end()) {
-                throw end_dereference;
-            }
-            assert(ti_->buckets_[index_].occupied(slot_));
-            ref_pair* data_ptr =
-                static_cast<ref_pair*>(static_cast<void*>(&data));
-            new (data_ptr) ref_pair(ti_->buckets_[index_].key(slot_),
-                                    ti_->buckets_[index_].val(slot_));
-            return data_ptr;
-        }
-
-        //! The prefix increment operator moves the iterator forwards to the
-        //! next nonempty slot. If it reaches the end of the table, it becomes
-        //! an end iterator. It throws an exception if the iterator is already
-        //! at the end of the table.
-        const_iterator* operator++() {
-            check_lock();
-            if (is_end()) {
-                throw end_increment;
+    private:
+        //! A templated iterator whose implementation works for both const and
+        //! non_const iterators. It is an STL-style BidirectionalIterator that
+        //! can be used to iterate over a locked table.
+        template <bool IS_CONST>
+        class templated_iterator :
+            public std::iterator<std::bidirectional_iterator_tag, value_type> {
+
+            // The buckets locked and owned by the locked table being iterated
+            // over.
+            std::reference_wrapper<
+                typename std::conditional<
+                IS_CONST, const buckets_t, buckets_t>::type> buckets_;
+
+            // The shared boolean indicating whether the iterator points to a
+            // still-locked table or not. It should never be nullptr.
+            std::shared_ptr<bool> has_table_lock_;
+
+            // The bucket index of the item being pointed to. For implementation
+            // convenience, we let it take on negative values.
+            intmax_t index_;
+            // The slot in the bucket of the item being pointed to. For
+            // implementation convenience, we let it take on negative values.
+            intmax_t slot_;
+
+        public:
+            //! Return true if the iterators are from the same locked table and
+            //! location, false otherwise. This will return false if either of
+            //! the iterators has lost ownership of its table.
+            template <bool OTHER_CONST>
+            bool operator==(const templated_iterator<OTHER_CONST>&
+                            it) const noexcept {
+                return (*has_table_lock_ && *it.has_table_lock_
+                        && &buckets_.get() == &it.buckets_.get()
+                        && index_ == it.index_ && slot_ == it.slot_);
             }
-            forward_filled_slot(index_, slot_);
-            return this;
-        }
 
-        //! The postfix increment operator behaves identically to the prefix
-        //! increment operator.
-        const_iterator* operator++(int) {
-            check_lock();
-            if (is_end()) {
-                throw end_increment;
-            }
-            forward_filled_slot(index_, slot_);
-            return this;
-        }
-
-        //! The prefix decrement operator moves the iterator backwards to the
-        //! previous nonempty slot. If we aren't at the beginning, then the
-        //! backward_filled_slot operation should not fail. If we are, it throws
-        //! an exception.
-        const_iterator* operator--() {
-            check_lock();
-            if (is_begin()) {
-                throw begin_decrement;
+            //! Equivalent to !operator==(it)
+            template <bool OTHER_CONST>
+            bool operator!=(const templated_iterator<OTHER_CONST>&
+                            it) const noexcept {
+                return !(operator==(it));
             }
-            backward_filled_slot(index_, slot_);
-            return this;
-        }
 
-        //! The postfix decrement operator behaves identically to the prefix
-        //! decrement operator.
-        const_iterator* operator--(int) {
-            check_lock();
-            if (is_begin()) {
-                throw begin_decrement;
+            //! Return the key-value pair pointed to by the iterator. Behavior
+            //! is undefined if the iterator is at the end.
+            const value_type& operator*() const {
+                check_iterator();
+                return buckets_.get()[index_].kvpair(slot_);
             }
-            backward_filled_slot(index_, slot_);
-            return this;
-        }
-
-    protected:
-        // A pointer to the associated hashmap
-        const cuckoohash_map<Key, T, Hash, Pred>& hm_;
-
-        // The hashmap's table info
-        typename cuckoohash_map<Key, T, Hash, Pred>::TableInfo* ti_;
 
-        // Indicates whether the iterator has the table lock
-        bool has_table_lock;
+            //! Returns a mutable reference to the current key-value pair
+            //! pointed to by the iterator. Behavior is undefined if the
+            //! iterator is at the end.
+            ENABLE_IF(, !IS_CONST, value_type&) operator*() {
+                check_iterator();
+                return buckets_.get()[index_].kvpair(slot_);
+            }
 
-        // Stores the bucket and slot of the end iterator, which is one past the
-        // end of the table. It is initialized during the iterator's
-        // constructor.
-        std::pair<size_t, size_t> end_pos;
+            //! Return a pointer to the immutable key-value pair pointed to by
+            //! the iterator. Behavior is undefined if the iterator is at the
+            //! end.
+            const value_type* operator->() const {
+                check_iterator();
+                return &buckets_.get()[index_].kvpair(slot_);
+            }
 
-        // Stotres the bucket and slot of the begin iterator, which is the first
-        // filled position in the table. It is initialized during the iterator's
-        // constructor. If the table is empty, it points past the end of the
-        // table, to the same position as end_pos.
-        std::pair<size_t, size_t> begin_pos;
+            //! Returns a mutable pointer to the current key-value pair pointed
+            //! to by the iterator. Behavior is undefined if the iterator is at
+            //! the end.
+            ENABLE_IF(, !IS_CONST, value_type*) operator->() {
+                check_iterator();
+                return &buckets_.get()[index_].kvpair(slot_);
+            }
 
-        // The bucket index of the item being pointed to
-        size_t index_;
 
-        // The slot in the bucket of the item being pointed to
-        size_t slot_;
+            //! Advance the iterator to the next item in the table, or to the
+            //! end of the table. Returns the iterator at its new position.
+            //! Behavior is undefined if the iterator is at the end.
+            templated_iterator& operator++() {
+                // Move forward until we get to a slot that is occupied, or we
+                // get to the end
+                check_iterator();
+                for (; (size_t)index_ < buckets_.get().size(); ++index_) {
+                    while ((size_t)++slot_ < SLOT_PER_BUCKET) {
+                        if (buckets_.get()[index_].occupied(slot_)) {
+                            return *this;
+                        }
+                    }
+                    slot_ = -1;
+                }
+                // We're at the end, so set index_ and slot_ to the end position
+                std::tie(index_, slot_) = end_pos(buckets_.get());
+                return *this;
+            }
 
-        // set_end sets the given index and slot to one past the last position
-        // in the table.
-        void set_end(size_t& index, size_t& slot) {
-            index = hm_.bucket_count();
-            slot = 0;
-        }
+            //! Advance the iterator to the next item in the table, or to the
+            //! end of the table. Returns the iterator at its old position.
+            //! Behavior is undefined if the iterator is at the end.
+            templated_iterator operator++(int) {
+                templated_iterator old(*this);
+                ++(*this);
+                return old;
+            }
 
-        // set_begin sets the given pair to the position of the first element in
-        // the table.
-        void set_begin(size_t& index, size_t& slot) {
-            if (hm_.empty()) {
-                set_end(index, slot);
-            } else {
-                index = slot = 0;
-                // There must be a filled slot somewhere in the table
-                if (!ti_->buckets_[index].occupied(slot)) {
-                    forward_filled_slot(index, slot);
-                    assert(!is_end());
+            //! Move the iterator back to the previous item in the table.
+            //! Returns the iterator at its new position. Behavior is undefined
+            //! if the iterator is at the beginning.
+            templated_iterator& operator--() {
+                // Move backward until we get to the beginning. If we try to
+                // move before that, we stop.
+                check_iterator();
+                for (; index_ >= 0; --index_) {
+                    while (--slot_ >= 0) {
+                        if (buckets_.get()[index_].occupied(slot_)) {
+                            return *this;
+                        }
+                    }
+                    slot_ = SLOT_PER_BUCKET;
                 }
+                // Either we iterated before begin(), which means we're in
+                // undefined territory, or we iterated from the end of the table
+                // back, which means the table is empty. Either way, setting the
+                // index_ and slot_ to end_pos() is okay.
+                std::tie(index_, slot_) = end_pos(buckets_.get());
+                return *this;
             }
-        }
 
-        // forward_slot moves the given index and slot to the next available
-        // slot in the forwards direction. It returns true if it successfully
-        // advances, and false if it has reached the end of the table, in which
-        // case it sets index and slot to end_pos.
-        bool forward_slot(size_t& index, size_t& slot) {
-            if (slot < SLOT_PER_BUCKET-1) {
-                ++slot;
-                return true;
-            } else if (index < hm_.bucket_count()-1) {
-                ++index;
-                slot = 0;
-                return true;
-            } else {
-                set_end(index, slot);
-                return false;
+            //! Move the iterator back to the previous item in the table.
+            //! Returns the iterator at its old position. Behavior is undefined
+            //! if the iterator is at the beginning.
+            templated_iterator operator--(int) {
+                templated_iterator old(*this);
+                --(*this);
+                return old;
             }
-        }
 
-        // backward_slot moves index and slot to the next available slot in the
-        // backwards direction. It returns true if it successfully advances, and
-        // false if it has reached the beginning of the table, setting the index
-        // and slot back to begin_pos.
-        bool backward_slot(size_t& index, size_t& slot) {
-            if (slot > 0) {
-                --slot;
-                return true;
-            } else if (index > 0) {
-                --index;
-                slot = SLOT_PER_BUCKET-1;
-                return true;
-            } else {
-                set_begin(index, slot);
-                return false;
+        private:
+            static const std::pair<intmax_t, intmax_t> end_pos(
+                const buckets_t& buckets) {
+                // When index_ == buckets.size() and slot_ == 0, we're at the
+                // end of the table. When index_ and slot_ point to the data
+                // with the lowest bucket and slot, we're at the beginning of
+                // the table. If there is nothing in the table, index_ ==
+                // buckets.size() and slot_ == 0 also means we're at the
+                // beginning of the table (so begin() == end()).
+                return {buckets.size(), 0};
             }
-        }
 
-        // forward_filled_slot moves index and slot to the next filled slot.
-        bool forward_filled_slot(size_t& index, size_t& slot) {
-            bool res = forward_slot(index, slot);
-            if (!res) {
-                return false;
-            }
-            while (!ti_->buckets_[index].occupied(slot)) {
-                res = forward_slot(index, slot);
-                if (!res) {
-                    return false;
+            // The private constructor is used by locked_table to create
+            // iterators from scratch. If the given index_-slot_ pair is at the
+            // end of the table, or that spot is occupied, stay. Otherwise, step
+            // forward to the next data item, or to the end of the table.
+            templated_iterator(
+                typename decltype(buckets_)::type& buckets,
+                std::shared_ptr<bool> has_table_lock, size_t index, size_t slot)
+                : buckets_(buckets), has_table_lock_(has_table_lock),
+                  index_(index), slot_(slot) {
+                if (std::make_pair(index_, slot_) != end_pos(buckets) &&
+                    !buckets[index_].occupied(slot_)) {
+                    operator++();
                 }
             }
-            return true;
-        }
 
-        // backward_filled_slot moves index and slot to the previous filled
-        // slot.
-        bool backward_filled_slot(size_t& index, size_t& slot) {
-            bool res = backward_slot(index, slot);
-            if (!res) {
-                return false;
-            }
-            while (!ti_->buckets_[index].occupied(slot)) {
-                res = backward_slot(index, slot);
-                if (!res) {
-                    return false;
+            // Throws an exception if the iterator has been invalidated because
+            // the locked_table lost ownership of the table info.
+            void check_iterator() const {
+                if (!(*has_table_lock_)) {
+                    throw std::runtime_error("Iterator has been invalidated");
                 }
             }
-            return true;
-        }
 
+            friend class cuckoohash_map<Key, T, Hash, Pred,
+                                        Alloc, SLOT_PER_BUCKET>;
+        };
 
-        // check_lock throws an exception if the iterator doesn't have a
-        // lock.
-        void check_lock() const {
-            if (!has_table_lock) {
-                throw std::runtime_error(
-                    "Iterator does not have a lock on the table");
-            }
-        }
+    public:
+        typedef templated_iterator<true> const_iterator;
+        typedef templated_iterator<false> iterator;
 
-        // Other error messages
-        static const std::out_of_range end_dereference;
-        static const std::out_of_range end_increment;
-        static const std::out_of_range begin_decrement;
-    };
+        //! begin returns an iterator to the beginning of the table
+        iterator begin() {
+            check_table();
+            return iterator(buckets_.get(), has_table_lock_, 0, 0);
+        }
 
+        //! begin returns a const_iterator to the beginning of the table
+        const_iterator begin() const {
+            check_table();
+            return const_iterator(buckets_.get(), has_table_lock_, 0, 0);
+        }
 
-    //! An iterator supports the same operations as the const_iterator and
-    //! provides an additional \ref set_value method to allow changing values in
-    //! the table.
-    class iterator : public const_iterator {
-        // This constructor does the same thing as the private const_iterator
-        // one.
-        iterator(cuckoohash_map<Key, T, Hash, Pred>& hm, bool is_end)
-            : const_iterator(hm, is_end) {}
+        //! cbegin returns a const_iterator to the beginning of the table
+        const_iterator cbegin() const {
+            return begin();
+        }
 
-        friend class cuckoohash_map<Key, T, Hash, Pred>;
+        //! end returns an iterator to the end of the table
+        iterator end() {
+            check_table();
+            const auto end_pos = const_iterator::end_pos(buckets_.get());
+            return iterator(buckets_.get(), has_table_lock_,
+                            end_pos.first, end_pos.second);
+        }
 
-    public:
-        //! This constructor is identical to the rvalue-reference constructor of
-        //! const_iterator.
-        iterator(iterator&& it)
-            : const_iterator(std::move(it)) {}
-
-        //! This constructor allows converting from a const_iterator to an
-        //! iterator.
-        iterator(const_iterator&& it)
-            : const_iterator(std::move(it)) {}
-
-        // The assignment operator behaves identically to the rvalue-reference
-        // constructor.
-        iterator* operator=(iterator&& it) {
-            if (this == &it) {
-                return this;
-            }
-            memcpy(this, &it, sizeof(iterator));
-            it.has_table_lock = false;
-            return this;
-        }
-
-        //! set_value sets the value pointed to by the iterator to \p val. This
-        //! involves modifying the hash table itself, but since we have a lock
-        //! on the table, we are okay. We are only changing the value in the
-        //! bucket, so the element will retain it's position in the table.
-        void set_value(const mapped_type val) {
-            this->check_lock();
-            if (this->is_end()) {
-                throw this->end_dereference;
-            }
-            assert(this->ti_->buckets_[this->index_].occupied(this->slot_));
-            this->ti_->buckets_[this->index_].val(this->slot_) = val;
+        //! end returns a const_iterator to the end of the table
+        const_iterator end() const {
+            check_table();
+            const auto end_pos = const_iterator::end_pos(buckets_.get());
+            return const_iterator(buckets_.get(), has_table_lock_,
+                                  end_pos.first, end_pos.second);
         }
-    };
 
-// Public iterator functions
-public:
-    //! cbegin returns a const_iterator to the first filled slot in the
-    //! table.
-    const_iterator cbegin() const {
-        return const_iterator(*this, false);
-    }
+        //! cend returns a const_iterator to the end of the table
+        const_iterator cend() const {
+            return end();
+        }
 
-    //! cend returns a const_iterator set past the end of the table.
-    const_iterator cend() const {
-        return const_iterator(*this, true);
-    }
+    private:
+        // Throws an exception if the locked_table has been invalidated because
+        // it lost ownership of the table info.
+        void check_table() const {
+            if (!has_table_lock()) {
+                throw std::runtime_error(
+                    "locked_table lost ownership of table");
+            }
+        }
 
-    //! begin returns an iterator to the first filled slot in the table.
-    iterator begin() {
-        return iterator(*this, false);
-    }
+        friend class cuckoohash_map<Key, T, Hash, Pred, Alloc, SLOT_PER_BUCKET>;
+    };
 
-    //! end returns an iterator set past the end of the table.
-    iterator end() {
-        return iterator(*this, true);
+    //! lock_table construct a \ref locked_table object that owns all the locks
+    //! in the table. This can be used to iterate through the table.
+    locked_table lock_table() {
+        return locked_table(*this);
     }
 
-    //! snapshot_table allocates a vector and, using a const_iterator stores all
-    //! the elements currently in the table.
-    std::vector<value_type> snapshot_table() const {
-        std::vector<value_type> items;
-        items.reserve(size());
-        for (auto it = cbegin(); !it.is_end(); ++it) {
-            items.push_back(*it);
-        }
-        return items;
-    }
+    // This class is a friend for unit testing
+    friend class UnitTestInternalAccess;
 };
 
-// Initializing the static members
-template <class Key, class T, class Hash, class Pred>
-    __thread typename cuckoohash_map<Key, T, Hash, Pred>::TableInfo**
-    cuckoohash_map<Key, T, Hash, Pred>::hazard_pointer = nullptr;
-
-template <class Key, class T, class Hash, class Pred>
-    __thread int cuckoohash_map<Key, T, Hash, Pred>::counterid = -1;
-
-template <class Key, class T, class Hash, class Pred>
-    typename cuckoohash_map<Key, T, Hash, Pred>::GlobalHazardPointerList
-    cuckoohash_map<Key, T, Hash, Pred>::global_hazard_pointers;
-
-template <class Key, class T, class Hash, class Pred>
-    const std::out_of_range
-    cuckoohash_map<Key, T, Hash, Pred>::const_iterator::end_dereference(
-        "Cannot dereference: iterator points past the end of the table");
-
-template <class Key, class T, class Hash, class Pred>
-    const std::out_of_range
-    cuckoohash_map<Key, T, Hash, Pred>::const_iterator::end_increment(
-        "Cannot increment: iterator points past the end of the table");
-
-template <class Key, class T, class Hash, class Pred>
-    const std::out_of_range
-    cuckoohash_map<Key, T, Hash, Pred>::const_iterator::begin_decrement(
-        "Cannot decrement: iterator points to the beginning of the table");
-
-template <class Key, class T, class Hash, class Pred>
-std::allocator<Key> cuckoohash_map<Key, T, Hash, Pred>::Bucket::key_allocator;
-
-template <class Key, class T, class Hash, class Pred>
-std::allocator<T> cuckoohash_map<Key, T, Hash, Pred>::Bucket::value_allocator;
-
 #endif // _CUCKOOHASH_MAP_HH
diff --git a/include/cuckoohash_util.h b/include/cuckoohash_util.h
deleted file mode 100644
index 79a6343..0000000
--- a/include/cuckoohash_util.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _CUCKOOHASH_UTIL_H
-#define _CUCKOOHASH_UTIL_H
-
-#include <pthread.h>
-#include "cuckoohash_config.h" // for LIBCUCKOO_DEBUG
-
-#if LIBCUCKOO_DEBUG
-#  define LIBCUCKOO_DBG(fmt, args...)                                   \
-     fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m", \
-             __FILE__,__LINE__, (unsigned long)pthread_self(), ##args)
-#else
-#  define LIBCUCKOO_DBG(fmt, args...)  do {} while (0)
-#endif
-
-// For enabling certain methods based on a condition. Here's an example.
-// ENABLE_IF(some_cond, type, static, inline) method() {
-//     ...
-// }
-#define ENABLE_IF(preamble, condition, return_type)                     \
-     template <class Bogus=void*>                                       \
-          preamble typename std::enable_if<sizeof(Bogus) &&             \
-          condition, return_type>::type
-#endif // _CUCKOOHASH_UTIL_H
diff --git a/include/cuckoohash_util.hh b/include/cuckoohash_util.hh
new file mode 100644
index 0000000..7fe43d6
--- /dev/null
+++ b/include/cuckoohash_util.hh
@@ -0,0 +1,88 @@
+/** \file */
+
+#ifndef _CUCKOOHASH_UTIL_HH
+#define _CUCKOOHASH_UTIL_HH
+
+#include <exception>
+#include <pthread.h>
+#include "cuckoohash_config.hh" // for LIBCUCKOO_DEBUG
+
+#if LIBCUCKOO_DEBUG
+#  define LIBCUCKOO_DBG(fmt, args...)                                   \
+     fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m", \
+             __FILE__,__LINE__, (unsigned long)pthread_self(), ##args)
+#else
+#  define LIBCUCKOO_DBG(fmt, args...)  do {} while (0)
+#endif
+
+// For enabling certain methods based on a condition. Here's an example.
+// ENABLE_IF(some_cond, type, static, inline) method() {
+//     ...
+// }
+#define ENABLE_IF(preamble, condition, return_type)                     \
+    template <class Bogus=void*>                                        \
+    preamble typename std::enable_if<sizeof(Bogus) &&                   \
+        condition, return_type>::type
+
+/**
+ * Thrown when an automatic expansion is triggered, but the load factor of the
+ * table is below a minimum threshold, which can be set by the \ref
+ * cuckoohash_map::minimum_load_factor method. This can happen if the hash
+ * function does not properly distribute keys, or for certain adversarial
+ * workloads.
+ */
+class libcuckoo_load_factor_too_low : public std::exception {
+public:
+    /**
+     * Constructor
+     *
+     * @param lf the load factor of the table when the exception was thrown
+     */
+    libcuckoo_load_factor_too_low(const double lf)
+        : load_factor_(lf) {}
+
+    virtual const char* what() const noexcept {
+        return "Automatic expansion triggered when load factor was below "
+            "minimum threshold";
+    }
+
+    /**
+     * @return the load factor of the table when the exception was thrown
+     */
+    double load_factor() {
+        return load_factor_;
+    }
+private:
+    const double load_factor_;
+};
+
+/**
+ * Thrown when an expansion is triggered, but the hashpower specified is greater
+ * than the maximum, which can be set with the \ref
+ * cuckoohash_map::maximum_hashpower method.
+ */
+class libcuckoo_maximum_hashpower_exceeded : public std::exception {
+public:
+    /**
+     * Constructor
+     *
+     * @param hp the hash power we were trying to expand to
+     */
+    libcuckoo_maximum_hashpower_exceeded(const size_t hp)
+        : hashpower_(hp) {}
+
+    virtual const char* what() const noexcept {
+        return "Expansion beyond maximum hashpower";
+    }
+
+    /**
+     * @return the hashpower we were trying to expand to
+     */
+    size_t hashpower() {
+        return hashpower_;
+    }
+private:
+    const size_t hashpower_;
+};
+
+#endif // _CUCKOOHASH_UTIL_HH
diff --git a/include/default_hasher.hh b/include/default_hasher.hh
new file mode 100644
index 0000000..48ff996
--- /dev/null
+++ b/include/default_hasher.hh
@@ -0,0 +1,29 @@
+#ifndef _DEFAULT_HASHER_HH
+#define _DEFAULT_HASHER_HH
+
+#include <string>
+#include <type_traits>
+
+/*! DefaultHasher is the default hash class used in the table. It overloads a
+ *  few types that std::hash does badly on (namely integers), and falls back to
+ *  std::hash for anything else. */
+template <class Key>
+class DefaultHasher {
+    std::hash<Key> fallback;
+
+public:
+    template <class T = Key>
+    typename std::enable_if<std::is_integral<T>::value, size_t>::type
+    operator()(const Key& k) const {
+        // This constant is found in the CityHash code
+        return k * 0x9ddfea08eb382d69ULL;
+    }
+
+    template <class T = Key>
+    typename std::enable_if<!std::is_integral<T>::value, size_t>::type
+    operator()(const Key& k) const {
+        return fallback(k);
+    }
+};
+
+#endif // _DEFAULT_HASHER_HH
diff --git a/include/kseq.h b/include/kseq.h
deleted file mode 100644
index 71a8589..0000000
--- a/include/kseq.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/* The MIT License
-
-   Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/* Last Modified: 05MAR2012 */
-
-#ifndef AC_KSEQ_H
-#define AC_KSEQ_H
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
-#define KS_SEP_TAB   1 // isspace() && !' '
-#define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
-#define KS_SEP_MAX   2
-
-#define __KS_TYPE(type_t)						\
-        typedef struct __kstream_t {				\
-                unsigned char *buf;						\
-                int begin, end, is_eof;					\
-                type_t f;								\
-        } kstream_t;
-
-#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
-#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
-
-#define __KS_BASIC(type_t, __bufsize)								\
-        static inline kstream_t *ks_init(type_t f)						\
-        {																\
-                kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
-                ks->f = f;													\
-                ks->buf = (unsigned char*)malloc(__bufsize);				\
-                return ks;													\
-        }																\
-        static inline void ks_destroy(kstream_t *ks)					\
-        {																\
-                if (ks) {													\
-                        free(ks->buf);											\
-                        free(ks);												\
-                }															\
-        }
-
-#define __KS_GETC(__read, __bufsize)						\
-        static inline int ks_getc(kstream_t *ks)				\
-        {														\
-                if (ks->is_eof && ks->begin >= ks->end) return -1;	\
-                if (ks->begin >= ks->end) {							\
-                        ks->begin = 0;									\
-                        ks->end = __read(ks->f, ks->buf, __bufsize);	\
-                        if (ks->end < __bufsize) { ks->is_eof = 1; } \
-                        if (ks->end == 0) { return -1; } \
-                }													\
-                return (int)ks->buf[ks->begin++];					\
-        }
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
-        size_t l, m;
-        char *s;
-} kstring_t;
-#endif
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#define __KS_GETUNTIL(__read, __bufsize)								\
-        static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
-        {																	\
-                if (dret) *dret = 0;											\
-                str->l = append? str->l : 0;									\
-                if (ks->begin >= ks->end && ks->is_eof) return -1;				\
-                for (;;) {														\
-                        int i;														\
-                        if (ks->begin >= ks->end) {									\
-                                if (!ks->is_eof) {										\
-                                        ks->begin = 0;										\
-                                        ks->end = __read(ks->f, ks->buf, __bufsize);		\
-                                        if (ks->end < __bufsize) { ks->is_eof = 1; } \
-                                        if (ks->end == 0) { break;} \
-                                } else break;											\
-                        }															\
-                        if (delimiter == KS_SEP_LINE) { \
-                                for (i = ks->begin; i < ks->end; ++i) \
-                                        if (ks->buf[i] == '\n') break; \
-                        } else if (delimiter > KS_SEP_MAX) {						\
-                                for (i = ks->begin; i < ks->end; ++i)					\
-                                        if (ks->buf[i] == delimiter) break;					\
-                        } else if (delimiter == KS_SEP_SPACE) {						\
-                                for (i = ks->begin; i < ks->end; ++i)					\
-                                        if (isspace(ks->buf[i])) break;						\
-                        } else if (delimiter == KS_SEP_TAB) {						\
-                                for (i = ks->begin; i < ks->end; ++i)					\
-                                        if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
-                        } else i = 0; /* never come to here! */						\
-                        if (str->m - str->l < (size_t)(i - ks->begin + 1)) {		\
-                                str->m = str->l + (i - ks->begin) + 1;					\
-                                kroundup32(str->m);										\
-                                str->s = (char*)realloc(str->s, str->m);				\
-                        }															\
-                        memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
-                        str->l = str->l + (i - ks->begin);							\
-                        ks->begin = i + 1;											\
-                        if (i < ks->end) {											\
-                                if (dret) *dret = ks->buf[i];							\
-                                break;													\
-                        }															\
-                }																\
-                if (str->s == 0) {												\
-                        str->m = 1;													\
-                        str->s = (char*)calloc(1, 1);								\
-                } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
-                str->s[str->l] = '\0';											\
-                return str->l;													\
-        } \
-        static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
-        { return ks_getuntil2(ks, delimiter, str, dret, 0); }
-
-#define KSTREAM_INIT(type_t, __read, __bufsize) \
-        __KS_TYPE(type_t)							\
-        __KS_BASIC(type_t, __bufsize)				\
-        __KS_GETC(__read, __bufsize)				\
-        __KS_GETUNTIL(__read, __bufsize)
-
-#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
-
-#define __KSEQ_BASIC(SCOPE, type_t)										\
-        SCOPE kseq_t *kseq_init(type_t fd)									\
-        {																	\
-                kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
-                s->f = ks_init(fd);												\
-                return s;														\
-        }																	\
-        SCOPE void kseq_destroy(kseq_t *ks)									\
-        {																	\
-                if (!ks) return;												\
-                free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
-                ks_destroy(ks->f);												\
-                free(ks);														\
-        }
-
-/* Return value:
-   >=0  length of the sequence (normal)
-   -1   end-of-file
-   -2   truncated quality string
- */
-#define __KSEQ_READ(SCOPE) \
-        SCOPE int kseq_read(kseq_t *seq) \
-        { \
-                int c; \
-                kstream_t *ks = seq->f; \
-                if (seq->last_char == 0) { /* then jump to the next header line */ \
-                        while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
-                        if (c == -1) return -1; /* end of file */ \
-                        seq->last_char = c; \
-                } /* else: the first header char has been read in the previous call */ \
-                seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
-                if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
-                if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
-                if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
-                        seq->seq.m = 256; \
-                        seq->seq.s = (char*)malloc(seq->seq.m); \
-                } \
-                while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
-                        if (c == '\n') continue; /* skip empty lines */ \
-                        seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
-                        ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
-                } \
-                if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
-                if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
-                        seq->seq.m = seq->seq.l + 2; \
-                        kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
-                        seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
-                } \
-                seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
-                if (c != '+') return seq->seq.l; /* FASTA */ \
-                if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
-                        seq->qual.m = seq->seq.m; \
-                        seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
-                } \
-                while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
-                if (c == -1) return -2; /* error: no quality string */ \
-                while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
-                seq->last_char = 0;	/* we have not come to the next header line */ \
-                if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
-                return seq->seq.l; \
-        }
-
-#define __KSEQ_TYPE(type_t)						\
-        typedef struct {							\
-                kstring_t name, comment, seq, qual;		\
-                int last_char;							\
-                kstream_t *f;							\
-        } kseq_t;
-
-#define KSEQ_INIT2(SCOPE, type_t, __read)		\
-        KSTREAM_INIT(type_t, __read, 16384)			\
-        __KSEQ_TYPE(type_t)							\
-        __KSEQ_BASIC(SCOPE, type_t)					\
-        __KSEQ_READ(SCOPE)
-
-#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
-
-#define KSEQ_DECLARE(type_t) \
-        __KS_TYPE(type_t) \
-        __KSEQ_TYPE(type_t) \
-        extern kseq_t *kseq_init(type_t fd); \
-        void kseq_destroy(kseq_t *ks); \
-        int kseq_read(kseq_t *seq);
-
-#endif
diff --git a/include/posix.h b/include/posix.h
deleted file mode 100644
index 70a0db1..0000000
--- a/include/posix.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- A C++ interface to POSIX functions.
-
- Copyright (c) 2014 - 2015, Victor Zverovich
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice, this
-    list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FMT_POSIX_H_
-#define FMT_POSIX_H_
-
-#ifdef __MINGW32__
-// Workaround MinGW bug https://sourceforge.net/p/mingw/bugs/2024/.
-# undef __STRICT_ANSI__
-#endif
-
-#include <errno.h>
-#include <fcntl.h>  // for O_RDONLY
-#include <stdio.h>
-
-#include <cstddef>
-
-#include "format.h"
-
-#ifdef FMT_INCLUDE_POSIX_TEST
-# include "test/posix-test.h"
-#endif
-
-#ifndef FMT_POSIX
-# if defined(_WIN32) && !defined(__MINGW32__)
-// Fix warnings about deprecated symbols.
-#  define FMT_POSIX(call) _##call
-# else
-#  define FMT_POSIX(call) call
-# endif
-#endif
-
-// Calls to system functions are wrapped in FMT_SYSTEM for testability.
-#ifdef FMT_SYSTEM
-# define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
-#else
-# define FMT_SYSTEM(call) call
-# ifdef _WIN32
-// Fix warnings about deprecated symbols.
-#  define FMT_POSIX_CALL(call) ::_##call
-# else
-#  define FMT_POSIX_CALL(call) ::call
-# endif
-#endif
-
-#if FMT_GCC_VERSION >= 407
-# define FMT_UNUSED __attribute__((unused))
-#else
-# define FMT_UNUSED
-#endif
-
-#if FMT_USE_STATIC_ASSERT || FMT_HAS_CPP_ATTRIBUTE(cxx_static_assert) || \
-  (FMT_GCC_VERSION >= 403 && FMT_HAS_GXX_CXX11) || _MSC_VER >= 1600
-# define FMT_STATIC_ASSERT(cond, message) static_assert(cond, message)
-#else
-# define FMT_CONCAT_(a, b) FMT_CONCAT(a, b)
-# define FMT_STATIC_ASSERT(cond, message) \
-  typedef int FMT_CONCAT_(Assert, __LINE__)[(cond) ? 1 : -1] FMT_UNUSED
-#endif
-
-// Retries the expression while it evaluates to error_result and errno
-// equals to EINTR.
-#ifndef _WIN32
-# define FMT_RETRY_VAL(result, expression, error_result) \
-  do { \
-    result = (expression); \
-  } while (result == error_result && errno == EINTR)
-#else
-# define FMT_RETRY_VAL(result, expression, error_result) result = (expression)
-#endif
-
-#define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1)
-
-namespace fmt {
-
-// An error code.
-class ErrorCode {
- private:
-  int value_;
-
- public:
-  explicit ErrorCode(int value = 0) FMT_NOEXCEPT : value_(value) {}
-
-  int get() const FMT_NOEXCEPT { return value_; }
-};
-
-// A buffered file.
-class BufferedFile {
- private:
-  FILE *file_;
-
-  friend class File;
-
-  explicit BufferedFile(FILE *f) : file_(f) {}
-
- public:
-  // Constructs a BufferedFile object which doesn't represent any file.
-  BufferedFile() FMT_NOEXCEPT : file_(0) {}
-
-  // Destroys the object closing the file it represents if any.
-  ~BufferedFile() FMT_NOEXCEPT;
-
-#if !FMT_USE_RVALUE_REFERENCES
-  // Emulate a move constructor and a move assignment operator if rvalue
-  // references are not supported.
-
- private:
-  // A proxy object to emulate a move constructor.
-  // It is private to make it impossible call operator Proxy directly.
-  struct Proxy {
-    FILE *file;
-  };
-
-public:
-  // A "move constructor" for moving from a temporary.
-  BufferedFile(Proxy p) FMT_NOEXCEPT : file_(p.file) {}
-
-  // A "move constructor" for for moving from an lvalue.
-  BufferedFile(BufferedFile &f) FMT_NOEXCEPT : file_(f.file_) {
-    f.file_ = 0;
-  }
-
-  // A "move assignment operator" for moving from a temporary.
-  BufferedFile &operator=(Proxy p) {
-    close();
-    file_ = p.file;
-    return *this;
-  }
-
-  // A "move assignment operator" for moving from an lvalue.
-  BufferedFile &operator=(BufferedFile &other) {
-    close();
-    file_ = other.file_;
-    other.file_ = 0;
-    return *this;
-  }
-
-  // Returns a proxy object for moving from a temporary:
-  //   BufferedFile file = BufferedFile(...);
-  operator Proxy() FMT_NOEXCEPT {
-    Proxy p = {file_};
-    file_ = 0;
-    return p;
-  }
-
-#else
- private:
-  FMT_DISALLOW_COPY_AND_ASSIGN(BufferedFile);
-
- public:
-  BufferedFile(BufferedFile &&other) FMT_NOEXCEPT : file_(other.file_) {
-    other.file_ = 0;
-  }
-
-  BufferedFile& operator=(BufferedFile &&other) {
-    close();
-    file_ = other.file_;
-    other.file_ = 0;
-    return *this;
-  }
-#endif
-
-  // Opens a file.
-  BufferedFile(fmt::StringRef filename, fmt::StringRef mode);
-
-  // Closes the file.
-  void close();
-
-  // Returns the pointer to a FILE object representing this file.
-  FILE *get() const FMT_NOEXCEPT { return file_; }
-
-  // We place parentheses around fileno to workaround a bug in some versions
-  // of MinGW that define fileno as a macro.
-  int (fileno)() const;
-
-  void print(fmt::StringRef format_str, const ArgList &args) {
-    fmt::print(file_, format_str, args);
-  }
-  FMT_VARIADIC(void, print, fmt::StringRef)
-};
-
-// A file. Closed file is represented by a File object with descriptor -1.
-// Methods that are not declared with FMT_NOEXCEPT may throw
-// fmt::SystemError in case of failure. Note that some errors such as
-// closing the file multiple times will cause a crash on Windows rather
-// than an exception. You can get standard behavior by overriding the
-// invalid parameter handler with _set_invalid_parameter_handler.
-class File {
- private:
-  int fd_;  // File descriptor.
-
-  // Constructs a File object with a given descriptor.
-  explicit File(int fd) : fd_(fd) {}
-
- public:
-  // Possible values for the oflag argument to the constructor.
-  enum {
-    RDONLY = FMT_POSIX(O_RDONLY), // Open for reading only.
-    WRONLY = FMT_POSIX(O_WRONLY), // Open for writing only.
-    RDWR   = FMT_POSIX(O_RDWR)    // Open for reading and writing.
-  };
-
-  // Constructs a File object which doesn't represent any file.
-  File() FMT_NOEXCEPT : fd_(-1) {}
-
-  // Opens a file and constructs a File object representing this file.
-  File(fmt::StringRef path, int oflag);
-
-#if !FMT_USE_RVALUE_REFERENCES
-  // Emulate a move constructor and a move assignment operator if rvalue
-  // references are not supported.
-
- private:
-  // A proxy object to emulate a move constructor.
-  // It is private to make it impossible call operator Proxy directly.
-  struct Proxy {
-    int fd;
-  };
-
- public:
-  // A "move constructor" for moving from a temporary.
-  File(Proxy p) FMT_NOEXCEPT : fd_(p.fd) {}
-
-  // A "move constructor" for for moving from an lvalue.
-  File(File &other) FMT_NOEXCEPT : fd_(other.fd_) {
-    other.fd_ = -1;
-  }
-
-  // A "move assignment operator" for moving from a temporary.
-  File &operator=(Proxy p) {
-    close();
-    fd_ = p.fd;
-    return *this;
-  }
-
-  // A "move assignment operator" for moving from an lvalue.
-  File &operator=(File &other) {
-    close();
-    fd_ = other.fd_;
-    other.fd_ = -1;
-    return *this;
-  }
-
-  // Returns a proxy object for moving from a temporary:
-  //   File file = File(...);
-  operator Proxy() FMT_NOEXCEPT {
-    Proxy p = {fd_};
-    fd_ = -1;
-    return p;
-  }
-
-#else
- private:
-  FMT_DISALLOW_COPY_AND_ASSIGN(File);
-
- public:
-  File(File &&other) FMT_NOEXCEPT : fd_(other.fd_) {
-    other.fd_ = -1;
-  }
-
-  File& operator=(File &&other) {
-    close();
-    fd_ = other.fd_;
-    other.fd_ = -1;
-    return *this;
-  }
-#endif
-
-  // Destroys the object closing the file it represents if any.
-  ~File() FMT_NOEXCEPT;
-
-  // Returns the file descriptor.
-  int descriptor() const FMT_NOEXCEPT { return fd_; }
-
-  // Closes the file.
-  void close();
-
-  // Returns the file size.
-  fmt::LongLong size() const;
-
-  // Attempts to read count bytes from the file into the specified buffer.
-  std::size_t read(void *buffer, std::size_t count);
-
-  // Attempts to write count bytes from the specified buffer to the file.
-  std::size_t write(const void *buffer, std::size_t count);
-
-  // Duplicates a file descriptor with the dup function and returns
-  // the duplicate as a file object.
-  static File dup(int fd);
-
-  // Makes fd be the copy of this file descriptor, closing fd first if
-  // necessary.
-  void dup2(int fd);
-
-  // Makes fd be the copy of this file descriptor, closing fd first if
-  // necessary.
-  void dup2(int fd, ErrorCode &ec) FMT_NOEXCEPT;
-
-  // Creates a pipe setting up read_end and write_end file objects for reading
-  // and writing respectively.
-  static void pipe(File &read_end, File &write_end);
-
-  // Creates a BufferedFile object associated with this file and detaches
-  // this File object from the file.
-  BufferedFile fdopen(const char *mode);
-};
-
-// Returns the memory page size.
-long getpagesize();
-}  // namespace fmt
-
-#if !FMT_USE_RVALUE_REFERENCES
-namespace std {
-// For compatibility with C++98.
-inline fmt::BufferedFile &move(fmt::BufferedFile &f) { return f; }
-inline fmt::File &move(fmt::File &f) { return f; }
-}
-#endif
-
-#endif  // FMT_POSIX_H_
diff --git a/include/tensemble/BaseForest.h b/include/tensemble/BaseForest.h
deleted file mode 100644
index 3a235ec..0000000
--- a/include/tensemble/BaseForest.h
+++ /dev/null
@@ -1,513 +0,0 @@
-/* * * * *
- *  BaseForest.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_Forest_h
-#define libTM_Forest_h
-#include "TreeRegressor.h"
-#include "TreeClassifier.h"
-#include "algorithm"
-#include "EvaluateMetric.h"
-#include "stdlib.h"
-#define FOREST_TREE_MAX_DEPTH 20
-
-boost::mutex RF_mutex;
-boost::mutex random_number_mutex;
-uint RF_n_trees_done;
-
-class BaseForest;
-
-
-class BaseForest {
-    //Base class for random forests
-    
-public:
-    BaseForest();
-    
-    BaseForest(int split_criterion,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,uint find_split_algorithm,bool bootstrap,bool oob,bool compute_importance,uint random_seed,uint n_jobs,bool verbose);
-    
-    virtual ~BaseForest();
-    
-    virtual int build(REAL** X,REAL* y,uint n_samples)=0;
-    
-    virtual void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures)=0;
-    
-    REAL* GetImportances();
-    
-    virtual int save_model(const char* filename)=0;
-    
-    bool load_model(const char* filename);
-protected:
-    void save_model(FILE* fp=NULL);
-    bool check_parameters();
-#ifdef DEBUG
-    void print_parameters();
-#endif
-    
-public:
-    Tree** tree;
-    uint n_jobs;
-    uint n_trees;
-    uint n_features;
-    uint n_classes;
-    uint max_depth;
-    uint min_sample_leaf;
-    uint max_features;
-    uint find_split_algorithm;
-    REAL oob_scores;
-    REAL* importances;
-    int bootstrap;
-    int oob;
-    int compute_importance;
-    uint random_seed;
-    int verbose;
-
-    int split_criterion;
-    int *original_classes;// converted classes=>original_classes
-};
-BaseForest::BaseForest(){
-    this->tree=NULL;
-    this->importances=NULL;
-    this->original_classes=NULL;
-}
-BaseForest::BaseForest(int split_criterion,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,uint find_split_algorithm,bool bootstrap,bool oob,bool compute_importance,uint random_seed,uint n_jobs,bool verbose){
-    
-    RF_n_trees_done=0;
-    int max_thread=boost::thread::hardware_concurrency();
-    if (n_jobs>n_trees) {
-        n_jobs=n_trees;
-    }
-    if (n_jobs>max_thread) {
-        fprintf(stderr, "WARN: Number of thread is exceed the maximum number of hardware concurrency.Now set number of thread = %d\n",max_thread);
-        n_jobs=max_thread;
-    }
-    this->tree=NULL;
-    this->importances=NULL;
-    this->original_classes=NULL;
-    this->split_criterion=split_criterion;
-    this->tree=new Tree*[n_trees];
-    this->n_jobs=n_jobs;
-    this->n_trees=n_trees;
-    this->n_features=n_features;
-    this->find_split_algorithm=find_split_algorithm;
-    this->max_depth=MIN(FOREST_TREE_MAX_DEPTH, max_depth);
-    this->min_sample_leaf=min_sample_leaf;
-    this->bootstrap=bootstrap;
-    //if compute oob scores,must bootstrap=true
-    if (oob && !bootstrap) {
-        fprintf(stderr, "Out of bag estimation only available if bootstrap=True,auto turn out of bag estimation off.\n");
-        oob=false;
-    }
-    this->oob=oob;
-    this->oob_scores=-1;
-    this->compute_importance=compute_importance;
-    this->random_seed=random_seed;
-    this->verbose=verbose;
-#ifdef DEBUG
-    assert(max_features_ratio<=1.0);
-#endif
-    if (max_features_ratio==MTRY_DEFAULT) {
-        this->max_features=sqrt(n_features);
-    }else {
-        this->max_features=MAX(1,MIN(max_features_ratio,1.0)*n_features);
-    }
-    if (compute_importance) {
-        importances=new REAL[n_features];
-    }
-    //initialize all tree to null
-    for (uint i=0; i<n_trees; i++) {
-        this->tree[i]=NULL;
-    }
-}
-BaseForest::~BaseForest(){
-    if (tree) {
-        for (uint i=0; i<n_trees; i++) {
-            if (tree[i]) {
-                delete tree[i];
-                tree[i]=NULL;
-            }
-        }
-        delete []tree;
-        tree=NULL;
-    }
-    if (importances) {
-        delete [] importances;
-        importances=NULL;
-    }
-    if (original_classes) {
-        delete []original_classes;
-        original_classes=NULL;
-    }
-}
-REAL* BaseForest::GetImportances(){
-    return this->importances;
-}
-
-void BaseForest::save_model(FILE *fp){
-    if (!fp) {
-        fprintf(stderr, "Invalid FILE handler for save GBM model.\n");
-        return;
-    }
-    fprintf(fp, "split_criterion %d\n",split_criterion);
-    fprintf(fp, "n_classes %d\n",n_classes);
-    if (split_criterion==CRITERION_ENTROPY || split_criterion==CRITERION_GINI) {
-#ifdef DEBUG
-        assert(n_classes>=2);
-#endif
-        fprintf(fp, "original_classes");
-        for (uint i=0; i<n_classes; i++) {
-            fprintf(fp, " %d ",original_classes[i]);
-        }
-        fprintf(fp, "\n");
-    }else {
-        assert(n_classes==1);
-    }
-    fprintf(fp, "n_jobs %d\n",n_jobs);
-    fprintf(fp, "n_trees %d\n",n_trees);
-    fprintf(fp, "n_features %d\n",n_features);
-    fprintf(fp, "max_depth %d\n",max_depth);
-    fprintf(fp, "min_sample_leaf %d\n",min_sample_leaf);
-    fprintf(fp, "max_features %d\n",max_features);
-    fprintf(fp, "find_split_algorithm %d\n",find_split_algorithm);
-    fprintf(fp, "bootstrap %d\n",bootstrap);
-    fprintf(fp, "oob %d\n",oob);
-    if (oob) {
-        fprintf(fp, "oob_score %lf\n",oob_scores);
-    }
-    fprintf(fp, "compute_importance %d\n",compute_importance);
-    fprintf(fp, "random_seed %d\n",random_seed);
-    fprintf(fp, "verbose %d\n",verbose);
-    
-    fprintf(fp, "trees\n");
-    for (uint i=0; i<n_trees; i++) {
-        Tree *t=tree[i];
-        fprintf(fp, "nodes %d\n",t->numNodes);
-        for (uint k=ROOT; k<=t->numNodes; k++) {
-            fprintf(fp, "%u %.16lg %u %u %u %lg %lg %d ",\
-                    t->nodes[k]->feature_split,t->nodes[k]->value_split,\
-                    t->nodes[k]->nSamples,t->nodes[k]->left_child,\
-                    t->nodes[k]->right_child,t->nodes[k]->ini_error,\
-                    t->nodes[k]->best_error,t->nodes[k]->leaf);
-            if (t->nodes[k]->leaf) {
-                for (uint j=0; j<n_classes; j++) {
-                    fprintf(fp, "%.16lg ",t->nodes[k]->pred[j]);
-                }
-            }
-        }
-        fprintf(fp, "\n");
-    }
-}
-
-bool BaseForest::load_model(const char *filename){
-    int is_leaf;
-    original_classes=NULL;
-    FILE* fp=fopen(filename,"r");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot open file %s for load RandomForest model.Please check your model file.\n",filename);
-        return false;
-    }
-    int parameter_count=0;
-    char cmd[100];
-    fscanf(fp, "%*s");
-    while (1) {
-        fscanf(fp, "%100s",cmd);
-        if (strcmp(cmd, "split_criterion")==0) {
-            parameter_count++;
-            fscanf(fp, "%d",&split_criterion);
-        }
-        else if(strcmp(cmd, "n_classes")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_classes);
-        }
-        else if(strcmp(cmd, "n_jobs")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_jobs);
-        }
-        else if(strcmp(cmd, "n_trees")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_trees);
-        }
-        else if(strcmp(cmd, "n_features")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_features);
-        }
-        else if(strcmp(cmd, "max_depth")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&max_depth);
-        }
-        else if(strcmp(cmd, "min_sample_leaf")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&min_sample_leaf);
-        }
-        else if(strcmp(cmd, "max_features")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&max_features);
-        }
-        else if(strcmp(cmd, "find_split_algorithm")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&find_split_algorithm);
-        }
-        else if(strcmp(cmd, "bootstrap")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&bootstrap);
-        }
-        else if(strcmp(cmd, "oob")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&oob);
-        }
-        else if(strcmp(cmd, "random_seed")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&random_seed);
-        }
-        else if(strcmp(cmd, "verbose")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&verbose);
-        }
-        else if(strcmp(cmd, "oob_score")==0){
-//            parameter_count++;
-            fscanf(fp, "%lf",&oob_scores);
-        }
-        else if(strcmp(cmd, "compute_importance")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&compute_importance);
-        }
-        else if(strcmp(cmd, "original_classes")==0){
-            parameter_count++;
-            original_classes=new int[n_classes];
-            for (uint i=0; i<n_classes; i++) {
-                fscanf(fp, "%d",&original_classes[i]);
-            }
-        }
-        else if(strcmp(cmd,"trees")==0){
-            break;
-        }
-        else {
-            fprintf(stderr, "RandomForest read model failed,unknowed parameter %s.model file %s is corrupted.\n",cmd,filename);
-            return false;
-        }
-    }
-    if (!check_parameters()) {
-        fprintf(stderr, "RandomForest read model failed,model file %s is corrupted.\n", filename);
-        return false;
-    }
-    //allocate trees
-    tree=new Tree*[n_trees];
-    for (uint i=0; i<n_trees; i++) {
-        tree[i]=NULL;
-    }
-    for (uint i=0; i<n_trees; i++) {
-        fscanf(fp, "%100s",cmd);
-        if (strcmp(cmd, "nodes")!=0) {
-            fprintf(stderr, "RandomForest read model failed,model file %s is corrupted.\n",filename);
-            return false;
-        }
-        Tree *t=new Tree(split_criterion, n_classes, n_features, max_features, min_sample_leaf, max_depth,FIND_BEST,random_seed,1);
-        fscanf(fp, "%d\n",&t->numNodes);
-        t->max_nNodes=t->numNodes+1;
-        t->nodes=new TreeNode*[t->max_nNodes];
-        t->nodes[0]=new TreeNode();//dummy node.
-        for (uint k=ROOT; k<=t->numNodes; k++) {
-            t->nodes[k]=new TreeNode();
-            t->nodes[k]->leaf=false;
-            fscanf(fp, "%u %lf %u %u %u %lf %lf %d ",\
-                   &t->nodes[k]->feature_split,&t->nodes[k]->value_split,\
-                   &t->nodes[k]->nSamples,&t->nodes[k]->left_child,\
-                   &t->nodes[k]->right_child,&t->nodes[k]->ini_error,\
-                   &t->nodes[k]->best_error,&is_leaf);
-            if (is_leaf) {
-                t->nodes[k]->leaf=true;
-                t->nodes[k]->pred=new REAL[n_classes];
-                for (uint j=0; j<n_classes; j++) {
-                    fscanf(fp, "%lf ",&t->nodes[k]->pred[j]);
-                }
-            }
-        }
-        tree[i]=t;
-    }
-    fclose(fp);
-    return true;
-}
-
-void RF_build_trees_range(REAL** X,REAL* y,uint n_samples,BaseForest* forest,pair<uint, uint> n_trees_range,uint* oob_count,REAL* oob_prediction,REAL* importances){
-#ifdef DEBUG
-    if (forest->split_criterion==CRITERION_MSE) {
-        assert(forest->n_classes==1);
-    }
-#endif
-    char* str=new char[100];
-    int split_criterion=forest->split_criterion;
-    Tree** tree=forest->tree;
-    uint n_features=forest->n_features;
-    uint max_features=forest->max_features;
-    uint max_depth=forest->max_depth;
-    uint min_sample_leaf=forest->min_sample_leaf;
-    uint find_split_algorithm=forest->find_split_algorithm;
-    uint random_seed=forest->random_seed;
-    uint n_classes=forest->n_classes;
-    uint n_trees_beg=n_trees_range.first;
-    uint n_trees_end=n_trees_range.second;
-    uint verbose=forest->verbose;
-    bool oob=forest->oob;
-    bool compute_importance=forest->compute_importance;
-    bool bootstrap=forest->bootstrap;
-    
-    uint i,j,k;
-    REAL *oob_prediction_tmp,*importance_tmp;
-    REAL** sub_X;
-    REAL* sub_y;
-    bool* mask;//for oob prediction
-    if (oob) {
-        oob_prediction_tmp=new REAL[n_samples*n_classes];
-        for (i=0; i<n_samples*n_classes; i++) {
-            oob_prediction[i]=0;
-        }
-        for (i=0; i<n_samples; i++){
-            oob_count[i]=0;
-        }
-        mask=new bool[n_samples];
-    }
-    if (compute_importance) {
-        importance_tmp=new REAL[n_features];
-        for (j=0; j<n_features; j++) {
-            importances[j]=0;
-        }
-    }
-    if (bootstrap) {
-        sub_X=new REAL* [n_samples];
-        sub_y=new REAL[n_samples];
-    }
-    for (i=n_trees_beg; i<n_trees_end; i++) {
-        boost::mutex::scoped_lock rand_lock(random_number_mutex);
-        uint job_random_seed=randomMT();
-        rand_lock.unlock();
-        if (oob) {
-            for (j=0; j<n_samples; j++) {
-                mask[j]=false;
-            }
-        }
-        if (bootstrap) {
-            for (j=0; j<n_samples; j++) {
-                uint idx=rand_r(&job_random_seed)%n_samples;
-//                uint idx=randomMT()%n_samples;
-                sub_X[j]=X[idx];
-                sub_y[j]=y[idx];
-                if (oob) {
-                    mask[idx]=true;
-                }
-            }
-        }else {
-            sub_X=X;
-            sub_y=y;
-        }
-        //build tree
-        if (split_criterion==CRITERION_MSE) {
-            tree[i]=new TreeRegressor(n_features,\
-                                      max_features,\
-                                      min_sample_leaf,\
-                                      max_depth,\
-                                      find_split_algorithm,\
-                                      job_random_seed,\
-                                      1);
-        }else{
-            tree[i]=new TreeClassifier(split_criterion,\
-                                       n_classes,\
-                                       n_features,\
-                                       max_features,\
-                                       min_sample_leaf,\
-                                       max_depth,\
-                                       find_split_algorithm,\
-                                       job_random_seed,\
-                                       1);
-        }
-        tree[i]->build(sub_X, sub_y, n_samples);
-        if (oob) {
-            tree[i]->predict(X, oob_prediction_tmp, mask, n_samples, n_features);
-            for (j=0; j<n_samples; j++) {
-                if (mask[j]) {
-                    continue;
-                }
-                oob_count[j]++;
-                for (k=0; k<n_classes; k++) {
-                    oob_prediction[j*n_classes+k]+=oob_prediction_tmp[j*n_classes+k];
-                }
-            }
-        }
-        if (compute_importance) {
-            tree[i]->compute_importance(importance_tmp);
-            for (j=0; j<n_features; j++) {
-                importances[j]+=importance_tmp[j];
-            }
-            
-        }
-        
-        //print info
-        boost::mutex::scoped_lock lock(RF_mutex);
-        RF_n_trees_done++;
-        if (verbose) {
-            if (RF_n_trees_done!=1) {
-                for (j=0; j<50; j++) {
-                    fprintf(stderr, "\b");
-                }
-            }
-            str[0]='\0';
-            sprintf(str, "Random forest progress: %d/%d",RF_n_trees_done,forest->n_trees);
-            int str_len=strlen(str);
-            fprintf(stderr, "%s",str);
-            for (j=str_len; j<50; j++) {
-                fprintf(stderr, " ");
-            }
-        }
-        lock.unlock();
-    }
-    if (oob) {
-        delete []mask;
-        delete []oob_prediction_tmp;
-    }
-    if (compute_importance) {
-        delete []importance_tmp;
-    }
-    if (bootstrap) {
-        delete []sub_X;
-        delete []sub_y;
-    }
-    delete []str;
-}
-bool BaseForest::check_parameters(){
-    bool ret=true;
-    if (n_classes<=0 || n_trees<=0 || n_features<=0 || max_features>n_features || n_jobs<1) {
-        ret=false;
-    }
-    if (min_sample_leaf<=0 ||max_depth<2 || ( find_split_algorithm!=FIND_BEST && find_split_algorithm!=FIND_RANDOM) ) {
-        ret=false;
-    }
-    return ret;
-}
-#ifdef DEBUG
-void BaseForest::print_parameters(){
-    fprintf(stderr, "n_classes %d\n",n_classes);
-    fprintf(stderr, "n_jobs %d\n",n_jobs);
-    fprintf(stderr, "n_trees %d\n",n_trees);
-    fprintf(stderr, "n_features %d\n",n_features);
-    fprintf(stderr, "max_depth %d\n",max_depth);
-    fprintf(stderr, "min_sample_leaf %d\n",min_sample_leaf);
-    fprintf(stderr, "max_features %d\n",max_features);
-    fprintf(stderr, "find_split_algorithm %d\n",find_split_algorithm);
-    fprintf(stderr, "bootstrap %d\n",bootstrap);
-    fprintf(stderr, "oob %d\n",oob);
-    if (oob) {
-        fprintf(stderr, "oob_score %lf\n",oob_scores);
-    }
-    fprintf(stderr, "compute_importance %d\n",compute_importance);
-    fprintf(stderr, "random_seed %d\n",random_seed);
-    fprintf(stderr, "verbose %d\n",verbose);
-}
-#endif
-#endif
diff --git a/include/tensemble/BaseGBM.h b/include/tensemble/BaseGBM.h
deleted file mode 100644
index 88f9525..0000000
--- a/include/tensemble/BaseGBM.h
+++ /dev/null
@@ -1,426 +0,0 @@
-/* * * * *
- *  BaseGBM.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_BaseGBM_h
-#define libTM_BaseGBM_h
-#include "LossFunction.h"
-#include "Tree.h"
-#include "EvaluateMetric.h"
-#define GBM_TREE_MAX_DEEP 15
-
-/* define loss */
-#define SQUARE_LOSS 0
-#define BINOMIAL_DEVIANCE   1
-#define MULTINOMIAL_DEVIANCE    2
-
-
-class BaseGBM {
-    /* Base gradient boosting machine,
-     * Don't use it directly.
-     */
-    
-public:
-    BaseGBM();
-    
-    BaseGBM(int loss_function,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,REAL subsample,REAL learning_rate,bool oob,bool compute_importance,uint random_seed,uint n_jobs,int verbose);
-    
-    virtual ~BaseGBM();
-    
-    virtual int build(REAL **X, REAL *y, uint n_samples,
-                      REAL** val_X=NULL,REAL* val_y=NULL,uint n_val_samples=0)=0;    
-    
-    virtual void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures,uint k_trees)=0;
-    
-    //may useful for validation
-    virtual void predict_verbose(REAL** X,REAL* y,REAL* pred,uint nSamples,uint nFeatures,\
-                                 uint k_trees)=0;
-    
-    virtual int save_model(const char* filename)=0;
-    
-    bool load_model(const char* filename);
-    
-    REAL* GetImportances();
-    
-protected:
-    void save_model(FILE* fp=NULL);
-    bool check_parameters();
-#ifdef DEBUG
-    void print_parameters();
-#endif
-    
-public:
-    int loss_function;
-    LossFunction* loss;
-    uint n_classes;
-    
-    Tree** tree;
-    uint n_jobs;
-    uint n_trees;
-    uint n_features;
-    uint max_depth;
-    uint min_sample_leaf;
-    uint max_features;
-    REAL subsample;
-    REAL learning_rate;
-    REAL* importances;
-    int oob;
-    int compute_importance;
-    uint random_seed;
-    int verbose;
-    
-    /* for regresson prior_pred has size 1,and original_classes=NULL
-     * for two-classes problems,prior_pred is the prior probability of positive classes.
-     */
-    REAL *prior_pred;
-    int *original_classes;// converted classes=>original_classes
-};
-
-BaseGBM::BaseGBM(){
-    tree=NULL;
-    loss=NULL;
-    importances=NULL;
-    prior_pred=NULL;
-    original_classes=NULL;
-}
-
-BaseGBM::BaseGBM(int loss_function,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,REAL subsample,REAL learning_rate,bool oob,bool compute_importance,uint random_seed,uint n_jobs,int verbose){
-#ifdef DEBUG
-    assert(subsample<=1.0 && subsample>0.0);
-    assert(loss_function==SQUARE_LOSS || loss_function==MULTINOMIAL_DEVIANCE || loss_function==BINOMIAL_DEVIANCE);
-    assert(max_features_ratio<=1.0 && max_features_ratio>0.0);
-#endif
-    if (max_features_ratio==MTRY_DEFAULT) {
-        this->max_features=(uint)sqrt((REAL)n_features);
-    }else {
-        this->max_features=(uint)MAX(1.0,MIN(max_features_ratio,1.0)*n_features);
-    }
-    if (max_depth>GBM_TREE_MAX_DEEP) {
-        max_depth=GBM_TREE_MAX_DEEP;
-    }
-    //subsample rate must be less than or equal to 1.0 
-    if (subsample>1.0 || subsample<=0.0) {
-        subsample=1.0;
-    }
-    int max_thread=boost::thread::hardware_concurrency();
-    if (n_jobs>max_thread) {
-        fprintf(stderr, "WARNNING: Number of thread is exceed the maximum number of hardware concurrency.Now set number of thread = %d\n",max_thread);  
-        n_jobs=max_thread;
-    }
-    //reset number of threads if max_features is too small
-    if (n_jobs>1 && max_features/n_jobs<THREAD_MIN_FEATURES) {
-        fprintf(stderr, "WARNNING: Number of thread is %d,but max_feature=%d.Each thread at least need %d features to find best split for internal nodes in decision tree.Now set number of thread = %d\n",n_jobs,max_features,THREAD_MIN_FEATURES,MAX(1,(uint)max_features/THREAD_MIN_FEATURES));
-        n_jobs=MAX(1,(uint)max_features/THREAD_MIN_FEATURES);
-    }
-    //if compute oob scores,must bootstrap=true
-    if (oob && subsample>=1.0) {
-        fprintf(stderr, "Out of bag estimation only available if subsample<1.0,auto turn out of bag estimation off.\n");
-        oob=false;
-    }
-    this->loss_function=loss_function;
-    this->n_classes=-1;
-    this->tree=NULL;
-    this->importances=NULL;
-    this->loss=NULL;
-    this->prior_pred=NULL;
-    this->original_classes=NULL;
-    this->n_jobs=n_jobs;
-    this->n_trees=n_trees;
-    this->n_features=n_features;
-    this->max_depth=max_depth;
-    this->min_sample_leaf=min_sample_leaf;
-    this->learning_rate=learning_rate;
-    this->subsample=subsample;
-    this->oob=oob;
-    this->compute_importance=compute_importance;
-    this->random_seed=random_seed;
-    this->verbose=verbose;
-    if (compute_importance) {
-        importances=new REAL[n_features];
-    }
-}
-BaseGBM::~BaseGBM(){
-    if (tree) {
-#ifdef DEBUG
-        if (loss_function!=MULTINOMIAL_DEVIANCE) {
-            assert(n_classes==1);
-        }
-#endif
-        for (uint i=0; i<n_trees*n_classes; i++) {
-            delete tree[i];
-            tree[i]=NULL;
-        }
-        delete []tree;
-        tree=NULL;
-    }
-    if (importances) {
-        delete [] importances;
-        importances=NULL;
-    }
-    if (loss) {
-        delete loss;
-        loss=NULL;
-    }
-    if (prior_pred) {
-        delete []prior_pred;
-        prior_pred=NULL;
-    }
-    if (original_classes) {
-        delete []original_classes;
-        original_classes=NULL;
-    }
-}
-
-void BaseGBM::save_model(FILE* fp){
-    /* SAVE GBM MODEL
-     * FORMAT:
-     * loss_function
-     * n_classes
-     * prior_pred
-     * original_classes(optional)
-     * n_jobs
-     * n_trees
-     * n_features
-     * max_depth
-     * min_sample_leaf
-     * max_features
-     * subsample
-     * learning_rate
-     * oob
-     * compute_importance
-     * random_seed
-     * verbose
-     * tree#1
-     * tree#2
-     * ...
-     * tree#n
-     */
-    if (!fp) {
-        fprintf(stderr, "Invalid FILE handler for save GBM model.\n");
-        return;
-    }
-    fprintf(fp, "loss_function %d\n",loss_function);
-    fprintf(fp, "n_classes %d\n",n_classes);
-    fprintf(fp, "prior_pred ");
-    for (uint i=0; i<n_classes; i++) {
-        fprintf(fp, " %lf",prior_pred[i]);
-    }
-    fprintf(fp, "\n");
-    if (loss_function==BINOMIAL_DEVIANCE) {
-        n_classes=2;
-    }
-    if (loss_function==BINOMIAL_DEVIANCE || loss_function==MULTINOMIAL_DEVIANCE) {
-        fprintf(fp, "original_classes ");
-        for (uint i=0; i<n_classes; i++) {
-            fprintf(fp, " %d",original_classes[i]);
-        }
-        fprintf(fp, "\n");
-    }
-    if (loss_function==BINOMIAL_DEVIANCE) {
-        n_classes=1;
-    }
-    fprintf(fp, "n_jobs %d\n",n_jobs);
-    fprintf(fp, "n_trees %d\n",n_trees);
-    fprintf(fp, "n_features %d\n",n_features);
-    fprintf(fp, "max_depth %d\n",max_depth);
-    fprintf(fp, "min_sample_leaf %d\n",min_sample_leaf);
-    fprintf(fp, "max_features %d\n",max_features);
-    fprintf(fp, "subsample %lf\n",subsample);
-    fprintf(fp, "learning_rate %lf\n",learning_rate);
-    fprintf(fp, "oob %d\n",oob);
-    fprintf(fp, "compute_importance %d\n",compute_importance);
-    fprintf(fp, "random_seed %d\n",random_seed);
-    fprintf(fp, "verbose %d\n",verbose);
-    fprintf(fp, "trees\n");
-    for (uint i=0; i<n_trees; i++) {
-        for (uint j=0; j<n_classes; j++) {
-            Tree *t=tree[i*n_classes+j];
-            fprintf(fp, "nodes %d\n",t->numNodes);
-            for (uint k=ROOT; k<=t->numNodes; k++) {
-                fprintf(fp, "%u %.20lg %u %u %u %lg %lg %d ",\
-                        t->nodes[k]->feature_split,t->nodes[k]->value_split,\
-                        t->nodes[k]->nSamples,t->nodes[k]->left_child,\
-                        t->nodes[k]->right_child,t->nodes[k]->ini_error,\
-                        t->nodes[k]->best_error,t->nodes[k]->leaf);
-                if (t->nodes[k]->leaf) {
-                    fprintf(fp, "%.20lg ",t->nodes[k]->pred[0]);
-                }
-            }
-            fprintf(fp, "\n");
-        }
-    }
-}
-bool BaseGBM::load_model(const char *filename){
-    
-    int is_leaf;
-    FILE* fp=fopen(filename,"r");
-    if (!fp) {
-        fscanf(stderr, "Error: Cannot open file %s for load GBM model.Please check your model file.\n",filename);
-        return false;
-    }
-    int parameter_count=0;
-    char cmd[100];
-    fscanf(fp, "%*s");
-    while (1) {
-        fscanf(fp, "%100s",cmd);
-        if (strcmp(cmd, "loss_function")==0) {
-            parameter_count++;
-            fscanf(fp, "%d",&loss_function);
-        }
-        else if(strcmp(cmd, "n_classes")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_classes);
-        }
-        else if(strcmp(cmd, "n_jobs")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_jobs);
-        }
-        else if(strcmp(cmd, "n_trees")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_trees);
-        }
-        else if(strcmp(cmd, "n_features")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&n_features);
-        }
-        else if(strcmp(cmd, "max_depth")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&max_depth);
-        }
-        else if(strcmp(cmd, "min_sample_leaf")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&min_sample_leaf);
-        }
-        else if(strcmp(cmd, "max_features")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&max_features);
-        }
-        else if(strcmp(cmd, "learning_rate")==0){
-            parameter_count++;
-            fscanf(fp, "%lf",&learning_rate);
-        }
-        else if(strcmp(cmd, "subsample")==0){
-            parameter_count++;
-            fscanf(fp, "%lf",&subsample);
-        }
-        else if(strcmp(cmd, "oob")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&oob);
-        }
-        else if(strcmp(cmd, "random_seed")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&random_seed);
-        }
-        else if(strcmp(cmd, "verbose")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&verbose);
-        }
-        else if(strcmp(cmd, "compute_importance")==0){
-            parameter_count++;
-            fscanf(fp, "%d",&compute_importance);
-        }
-        else if(strcmp(cmd, "original_classes")==0){
-            parameter_count++;
-            original_classes=new int[MAX(2,n_classes)];
-            for (uint i=0; i<MAX(2,n_classes); i++) {
-                fscanf(fp, "%d",&original_classes[i]);
-            }
-        }
-        else if(strcmp(cmd, "prior_pred")==0){
-            parameter_count++;
-            prior_pred=new REAL[n_classes];
-            for (uint i=0; i<n_classes; i++) {
-                fscanf(fp, "%lf",&prior_pred[i]);
-            }
-        }
-        else if(strcmp(cmd,"trees")==0){
-            break;
-        }
-        else {
-            fprintf(stderr, "Error: GBM read model failed,unknowed parameter %s.model file %s is corrupted.\n",cmd,filename);
-            return false;
-        }
-    }
-    if (!check_parameters()) {
-        fprintf(stderr, "Error: GBM read model failed,model file %s is corrupted.\n", filename);
-        return false;
-    }
-    //allocate trees
-    tree=new Tree*[n_trees*n_classes];
-    for (uint i=0; i<n_trees; i++) {
-        tree[i]=NULL;
-    }
-    for (uint i=0; i<n_trees; i++) {
-        for (uint j=0; j<n_classes; j++) {
-            fscanf(fp, "%100s",cmd);
-            if (strcmp(cmd, "nodes")!=0) {
-                fprintf(stderr, "Error: GBM read model failed,model file %s is corrupted.\n",filename);
-                return false;
-            }
-            Tree *t=new Tree(CRITERION_MSE, 1, n_features, max_features, min_sample_leaf, max_depth,FIND_BEST,random_seed,n_jobs);
-            fscanf(fp, "%d\n",&t->numNodes);
-            t->max_nNodes=t->numNodes+1;
-            t->nodes=new TreeNode*[t->max_nNodes];
-            t->nodes[0]=new TreeNode();//dummy node.
-            for (uint k=ROOT; k<=t->numNodes; k++) {
-                t->nodes[k]=new TreeNode();
-                t->nodes[k]->leaf=false;
-                fscanf(fp, "%u %lf %u %u %u %lf %lf %d ",\
-                        &t->nodes[k]->feature_split,&t->nodes[k]->value_split,\
-                        &t->nodes[k]->nSamples,&t->nodes[k]->left_child,\
-                        &t->nodes[k]->right_child,&t->nodes[k]->ini_error,\
-                        &t->nodes[k]->best_error,&is_leaf);
-                if (is_leaf) {
-                    t->nodes[k]->leaf=true;
-                    t->nodes[k]->pred=new REAL;
-                    fscanf(fp, "%lf ",&t->nodes[k]->pred[0]);
-                }
-            }
-            tree[i*n_classes+j]=t;
-        }
-    }
-    fclose(fp);
-    return true;
-}
-REAL* BaseGBM::GetImportances(){
-    return this->importances;
-}
-bool BaseGBM::check_parameters(){
-    bool ret=true;
-    if (n_classes<=0 || n_trees<=0 || n_features<=0 || max_features>n_features || n_jobs<1) {
-        ret=false;
-    }
-    if (min_sample_leaf<=0 ||max_depth<2 || subsample<=0.0 || subsample>1.0 || learning_rate<=0.0 || learning_rate>=1.0) {
-        ret=false;
-    }
-    if (loss_function!=SQUARE_LOSS && loss_function!=BINOMIAL_DEVIANCE && loss_function!=MULTINOMIAL_DEVIANCE) {
-        ret=false;
-    }
-    return ret;
-}
-#ifdef DEBUG
-void BaseGBM::print_parameters(){
-    fprintf(stderr, "n_classes %d\n",n_classes);
-    fprintf(stderr, "n_jobs %d\n",n_jobs);
-    fprintf(stderr, "n_trees %d\n",n_trees);
-    fprintf(stderr, "n_features %d\n",n_features);
-    fprintf(stderr, "max_depth %d\n",max_depth);
-    fprintf(stderr, "min_sample_leaf %d\n",min_sample_leaf);
-    fprintf(stderr, "max_features %d\n",max_features);
-    fprintf(stderr, "loss_function %d\n",loss_function);
-    fprintf(stderr, "learning_rate %lf\n",learning_rate);
-    fprintf(stderr, "subsample %f\n",subsample);
-    fprintf(stderr, "compute_importance %d\n",compute_importance);
-    fprintf(stderr, "random_seed %d\n",random_seed);
-    fprintf(stderr, "verbose %d\n",verbose);
-}
-#endif
-#endif
diff --git a/include/tensemble/ClassificationCriterion.h b/include/tensemble/ClassificationCriterion.h
deleted file mode 100644
index 885e137..0000000
--- a/include/tensemble/ClassificationCriterion.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/* * * * *
- *  ClassificationCriterion.h 
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_ClassificationCriterion_h
-#define libTM_ClassificationCriterion_h
-#include "Criterion.h"
-#include "cmath"
-class ClassificationCriterion:public Criterion{
-    //Abstract criterion for classification.
-public:
-    ClassificationCriterion(uint n_classes);
-    virtual ~ClassificationCriterion();
-    virtual void init(REAL *y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples);
-    virtual void estimate(REAL* pred);
-    virtual void reset();
-    virtual REAL eval()=0;
-    virtual int update (REAL *y,uint* loc,uint a,uint b);
-    virtual int update_next(REAL *y,uint a);
-public:
-    uint *label_count_init;
-    uint *label_count_left;
-    uint *label_count_right;
-};
-ClassificationCriterion::ClassificationCriterion(uint n_classes){
-#ifdef DEBUG
-    assert(n_classes>=2);
-#endif
-    this->n_classes=n_classes;
-    label_count_init=new uint[n_classes];
-    label_count_left=new uint[n_classes];
-    label_count_right=new uint[n_classes];
-}
-ClassificationCriterion::~ClassificationCriterion(){
-    delete []label_count_left;
-    delete []label_count_right;
-    delete []label_count_init;
-}
-void ClassificationCriterion::init(REAL *y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples){
-    /* Initialise the criterion class; assume all samples
-     * are in the right branch.
-     * put all samples y[idx] which idx belong to (sample_ind[s_ind_beg],sample_ind[s_ind_end]) 
-     * to right right branch 
-     * sample_ind stored the real index in y
-     * y[i] must be non-negative and less than n_classes
-     */
-    this->nSamples=nSamples;
-    this->nLeft=0;
-    for (uint i=0; i<n_classes; i++) {
-        label_count_left[i]=0;
-        label_count_right[i]=0;
-        label_count_init[i]=0;
-    }
-#ifdef DEBUG
-    uint count=0;
-#endif
-    for (uint i=s_ind_beg; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-#ifdef DEBUG
-            assert(y[i]>=0 && y[i]<n_classes);
-            count++;
-#endif
-        uint category=(uint)y[idx];
-        label_count_init[category]++;
-    }
-#ifdef DEBUG
-    assert(count==nSamples);
-#endif
-    reset();
-}
-void ClassificationCriterion::reset(){
-    nLeft=0;
-    for (uint i=0; i<n_classes; i++) {
-        label_count_left[i]=0;
-        label_count_right[i]=label_count_init[i];
-    }
-}
-void ClassificationCriterion::estimate(REAL* pred){
-    for (uint i=0; i<n_classes; i++) {
-        pred[i]=label_count_init[i];
-    }
-}
-
-//==================FIX ME===========================
-int ClassificationCriterion::update(REAL *y, uint *loc, uint a, uint b){
-    /*Update the criteria for each value in interval [a,b) and [b,end],
-     [a,b) in left side,[b,end] in right side(where a and b
-     are indices in loc).
-     */
-#ifdef DEBUG
-    assert(a>=0);
-    assert(b<=nSamples);
-#endif
-    for (uint i=a; i<b; i++) {
-        uint idx=loc[i];
-        uint category=y[idx];
-        label_count_left[category]++;
-        label_count_right[category]--;
-        nLeft++;
-    }
-    return nLeft;
-
-}
-//===================================================
-
-
-int ClassificationCriterion::update_next(REAL *y, uint a){
-    /* put sample[a] into left side and update the criteria*/
-#ifdef DEBUG
-    assert(a>=0);
-#endif
-    uint category=y[a];
-#ifdef DEBUG
-    assert(category>=0 && category<n_classes);
-#endif
-    label_count_left[category]++;
-    label_count_right[category]--;
-    nLeft++;
-    return nLeft;
-
-}
-
-class Entropy:public ClassificationCriterion {
-    //Cross Entropy splitting criteria.
-    
-public:
-    Entropy(uint n_classes);
-    
-    REAL eval();
-};
-
-Entropy::Entropy(uint n_classes):ClassificationCriterion(n_classes){
-    
-}
-
-REAL Entropy::eval(){
-    REAL l_entropy,r_entropy;
-    l_entropy=r_entropy=0;
-    uint nRight=nSamples-nLeft;
-    for (uint i=0; i<n_classes; i++) {
-        if (nLeft>0 && label_count_left[i]>0) {
-            l_entropy-=(1.0*label_count_left[i]/nLeft)*log((REAL)1.0*label_count_left[i]/nLeft);
-        }
-        if (nRight>0 && label_count_right[i]>0) {
-            r_entropy-=(1.0*label_count_right[i]/nRight)*log((REAL)1.0*label_count_right[i]/nRight);
-        }
-    }
-    l_entropy*=1.0*nLeft/nSamples;
-    r_entropy*=1.0*nRight/nSamples;
-    return l_entropy+r_entropy;
-}
-
-class Gini:public ClassificationCriterion {
-    //Gini spliting criteria
-    
-public:
-    Gini(uint n_classes);
-    REAL eval();
-};
-
-Gini::Gini(uint n_classes):ClassificationCriterion(n_classes){
-    
-}
-REAL Gini::eval(){
-    REAL l_score,r_score;
-    uint nRight=nSamples-nLeft;
-    l_score=(REAL)nLeft;
-    r_score=(REAL)nRight;
-    for (uint i=0; i<n_classes; i++) {
-        //Caution:avoid overflow.
-        if (label_count_left[i]>0) {
-            l_score-=(REAL)label_count_left[i]/nLeft*label_count_left[i];
-        }
-        if (label_count_right[i]>0) {
-            r_score-=(REAL)label_count_right[i]/nRight*label_count_right[i];
-        }
-    }
-#ifdef DEBUG
-    assert(l_score>=0);
-    assert(r_score>=0);
-#endif
-//    if (nLeft!=0) {
-//        l_score/=nLeft;
-//    }
-//    if (nRight!=0) {
-//        r_score/=nRight;
-//    }    
-    l_score/=nSamples;
-    r_score/=nSamples;
-    return l_score+r_score;
-}
-
-#endif
diff --git a/include/tensemble/Criterion.h b/include/tensemble/Criterion.h
deleted file mode 100644
index b1f13d7..0000000
--- a/include/tensemble/Criterion.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/* * * * *
- *  Criterion.h 
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_Criterion_h
-#define libTM_Criterion_h
-#include "TypeDef.h"
-#include "assert.h"
-#define CRITERION_MSE   1
-#define CRITERION_ENTROPY 2
-#define CRITERION_GINI 3
-
-class Criterion {
-    //splitting criteria (regression and classification)
-public:
-//    Criterion(uint n_classes=1){
-//    }
-    virtual ~Criterion(){};
-public:
-    virtual void init(REAL *y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples)=0;
-    virtual void estimate(REAL* pred)=0;
-    virtual void reset()=0;
-    virtual REAL eval()=0;
-    virtual int update (REAL *y,uint* loc,uint a,uint b)=0;
-    virtual int update_next(REAL *y,uint a)=0;
-public:
-    //interval [left:mid] is in the left side,[mid+1:end] in right side
-    uint nSamples;
-    uint nLeft;
-    uint n_classes;
-};
-
-
-class RegressionCriterion:public Criterion {
-    //Abstract criterion for regression
-    
-public:
-    virtual void init(REAL *y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples);
-    virtual void estimate(REAL* pred);
-    virtual void reset();
-    virtual REAL eval()=0;
-    virtual int update (REAL *y,uint* loc,uint a,uint b);
-    virtual int update_next(REAL *y,uint a);
-public:
-    REAL mean_all;
-    REAL mean_left;
-    REAL mean_right;
-    REAL sq_sum_all;
-    REAL sq_sum_left;
-    REAL sq_sum_right;
-    REAL var_left;
-    REAL var_right;
-};
-void RegressionCriterion::init(REAL *y,uint* sample_ind,\
-                               uint s_ind_beg,uint s_ind_end,uint nSamples){
-    /* Initialise the criterion class; assume all samples
-     * are in the right branch.
-     * put all samples y[idx] which idx belong to (sample_ind[s_ind_beg],sample_ind[s_ind_end]) 
-     * to right right branch 
-     * sample_ind stored the real index in y
-     */
-    this->n_classes=1;
-    this->nSamples=nSamples;
-    this->nLeft=0;
-    mean_all=0.0;
-    mean_left=0.0;
-    mean_right=0.0;
-    sq_sum_all=0.0;
-    sq_sum_left=0.0;
-    sq_sum_right=0.0;
-    var_left=0.0;
-    var_right=0.0;
-#ifdef DEBUG
-    uint count=0;
-#endif
-    for (uint i=s_ind_beg; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        mean_all+=y[idx];
-        sq_sum_all+=y[idx]*y[idx];
-#ifdef DEBUG
-        count++;
-#endif
-    }
-    
-#ifdef DEBUG
-    assert(count==nSamples);
-#endif
-    mean_all/=nSamples;
-    reset();
-}
-void RegressionCriterion::reset(){
-    nLeft=0;
-    mean_left=0.0;
-    mean_right=mean_all;
-    sq_sum_left=0.0;
-    sq_sum_right=sq_sum_all;
-    var_left=0.0;
-    var_right=sq_sum_all-nSamples*(mean_right*mean_right);
-}
-void RegressionCriterion::estimate(REAL* pred){
-    //regression only need pred[0]
-    pred[0]=this->mean_all;
-}
-
-//=====================FIX ME==========================================
-int RegressionCriterion::update(REAL* y,uint* loc,uint a,uint b){
-    /*Update the criteria for each value in interval [a,b) and [b,end],
-     [a,b) in left side,[b,end] in right side(where a and b
-     are indices in loc).
-     */
-#ifdef DEBUG
-    assert(a>=0);
-    assert(b<=nSamples);
-#endif
-    for (uint i=a; i<b; i++) {
-        uint idx=loc[i];
-        sq_sum_left+=y[idx]*y[idx];
-        sq_sum_right-=y[idx]*y[idx];
-        mean_left=(nLeft*mean_left+y[idx])/(nLeft+1);
-        mean_right=((nSamples-nLeft)*mean_right-y[idx])/(nSamples-nLeft-1);
-        nLeft++;
-    }
-    return nLeft;
-}
-//======================================================================
-
-int RegressionCriterion::update_next(REAL* y,uint a){
-     /* put sample y[a] into left side and update the criteria*/
-#ifdef DEBUG
-    assert(a>=0);
-#endif
-    sq_sum_left+=y[a]*y[a];
-    sq_sum_right-=y[a]*y[a];
-    mean_left=(nLeft*mean_left+y[a])/(nLeft+1);
-    if (nLeft+1==nSamples) {
-        mean_right=0.0;
-    }else{
-        mean_right=(1.0*(nSamples-nLeft)*mean_right-y[a])/(nSamples-nLeft-1);
-    }
-    nLeft++;
-    var_left=sq_sum_left-nLeft*(mean_left*mean_left);
-    var_right=sq_sum_right-(nSamples-nLeft)*(mean_right*mean_right);
-    return nLeft;
-}
-
-class MSE:public RegressionCriterion {
-    /*Mean squared error impurity criterion.*/
-    
-public:
-    REAL eval();
-};
-REAL MSE::eval(){
-    return this->var_left+this->var_right;
-}
-#endif
diff --git a/include/tensemble/Estimator.h b/include/tensemble/Estimator.h
deleted file mode 100644
index de329f6..0000000
--- a/include/tensemble/Estimator.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* * * * *
- *  Estimator.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_Estimator_h
-#define libTM_Estimator_h
-#include "TypeDef.h"
-#include "assert.h"
-#include <map>
-
-void MeanEstimator(REAL* y,REAL* pred,REAL* prior,uint nSamples){
-    REAL mean=0;
-    for (uint i=0; i<nSamples; i++) {
-        mean+=y[i];
-    }
-    mean/=nSamples;
-//    mean=0;
-    for (uint i=0; i<nSamples; i++) {
-        pred[i]=mean;
-    }
-    *prior=mean;
-}
-
-void LogOddsEstimator(REAL* y,REAL* pred,REAL* prior,uint nSamples){
-    /* for two class problem,y must be 0 or 1 */
-    
-    uint n_pos=0;
-    for (uint i=0; i<nSamples; i++) {
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        n_pos+=y[i];
-    }
-    REAL prob=log((REAL)n_pos/(nSamples-n_pos));
-    for (uint i=0; i<nSamples; i++) {
-        pred[i]=prob;
-    }
-    *prior=prob;
-}
-
-void MulticlassPriorEstimator(REAL* y,REAL* pred,REAL* prior,uint nSamples,uint n_classes){
-    /* for multi-class problems,y must be 0,1,...,n_classes-1 
-     * pred is a nSamples*n_classes vector
-     */
-    
-#ifdef DEBUG
-    assert(n_classes>2);
-#endif
-    map<uint,uint> count;
-    uint i,j,offset;
-    for (i=0; i<n_classes; i++) {
-        count[i]=0;
-    }
-    for (i=0; i<nSamples; i++) {
-#ifdef DEBUG
-        assert(y[i]>=0 && y[i]<n_classes);
-#endif
-        count[y[i]]++;
-    }
-    for (j=0; j<n_classes; j++) {
-        offset=j*nSamples;
-        for (i=0; i<nSamples; i++) {
-            pred[offset+i]=count[j]/nSamples;
-        }
-    }
-    //return prior
-    for (j=0; j<n_classes; j++) {
-        prior[j]=count[j]/nSamples;
-    }
-}
-#endif
diff --git a/include/tensemble/EvaluateMetric.h b/include/tensemble/EvaluateMetric.h
deleted file mode 100644
index 1b428a8..0000000
--- a/include/tensemble/EvaluateMetric.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* * * * *
- *  EvaluateMetric.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_EvaluateMetric_h
-#define libTM_EvaluateMetric_h
-#include "TypeDef.h"
-
-REAL mse(const REAL* preds, const REAL* label,uint nSamples) {
-    REAL r = 0;
-    int i, N = nSamples;
-    for (i=0; i<N; i++)
-        r += (label[i] - preds[i])*(label[i] - preds[i]); 
-    return 1.0 / N * r;
-}
-
-REAL rmse(const REAL* preds, const REAL* label,uint nSamples) {
-    REAL r = 0;
-    int i, N = nSamples;
-    for (i=0; i<N; i++)
-        r += (label[i] - preds[i])*(label[i] - preds[i]); 
-    return sqrt(1.0 / N * r);
-}
-
-REAL R2(const REAL* preds, const REAL* label,uint nSamples) {
-    REAL r1 = 0,r2=0,r3=0;
-    REAL mean_data=0,mean_preds=0;
-    int i, N = nSamples;
-    for (i=0; i<N; i++){
-        mean_data+=label[i];
-        mean_preds+=preds[i];
-    }
-    mean_data/=N;
-    mean_preds/=N;
-    for( i=0 ; i<N ; i++ )
-    {
-        r1+=(label[i]-mean_data)*(preds[i]-mean_preds);
-        r2+=(label[i]-mean_data)*(label[i]-mean_data);
-        r3+=(preds[i]-mean_preds)*(preds[i]-mean_preds);
-    }
-    if (r2==0.0 || r3==0.0) {
-        if (r1==0.0) {
-            return 1.0;
-        }
-        return 0.0;
-    }
-    return r1*r1/(r2*r3);
-}
-
-REAL BinomialDevianceLoss(REAL *y, REAL *pred,uint nSamples){
-    /* binomial deviance loss
-     * here y belong to [0,1]
-     */    
-    REAL deviance=0;
-    uint count=0;
-    for (uint i=0; i<nSamples; i++) {
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        deviance+=y[i]*pred[i]-log(1.0+exp(pred[i]));
-        count++;
-    }
-    return deviance/count;
-}
-
-REAL Accuracy(REAL* y,REAL *pred,uint nSamples){
-    REAL acc=0;
-    for (uint i=0; i<nSamples; i++) {
-        if (y[i]==pred[i]) {
-            acc++;
-        }
-    }
-    return acc/nSamples;
-}
-#endif
diff --git a/include/tensemble/FeatureData.h b/include/tensemble/FeatureData.h
deleted file mode 100644
index c971d3e..0000000
--- a/include/tensemble/FeatureData.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/* * * * *
- *  FeatureData.h 
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_FeatureData_h
-#define libTM_FeatureData_h
-#include <iostream>
-#include <algorithm>
-#include "TypeDef.h"
-using namespace std;
-
-class FeatureData { // represents a training data set distributed among processors feature-wise
-public:
-    // constructor/destructor
-//    FeatureData();
-    FeatureData(uint nSamples, uint nFeaures);
-    ~FeatureData();
-    
-	// reading and initialization
-    void sort();
-	
-	// manage sample_in_node
-	void reset();
-	
-    // queries
-    uint getNumSamples();
-    uint getNumFeatures();
-    int getSampleInNode(uint i);//get node of sample[i]
-    uint_vec getNodeSamples(uint i);//get all sample from node i
-    void setNode(uint i_sample, uint i_node);//set i_sample to i_node
-
-    REAL getFeature(uint f, uint i);
-    REAL getSortedFeature(uint f, uint i);
-    uint getSortedIndex(uint f, uint i);
-    
-    uint whoHasFeature(uint f);
-    bool isLocalFeature(uint f);
-    uint localFeatureIndex(uint gf);
-    uint globalFeatureIndex(uint lf);
-    
-    // prediction
-    void updatePred(uint i, double p);
-    void updateResiduals();
-    
-    
-public:
-	// dataset descriptors
-	uint nSamples; // number of data instances
-    uint nFeatures; // number of features stored on this processor
-    
-	// static attributes
-    REAL** rawfeatures; // feature values ordered by instance
-    REAL** sortedfeatures; // feature values ordered by value
-    uint** sortedindices; // indices of original instances for each sorted feature
-    REAL* label; // target label value of each instance
-    
-	// level-specific attributes
-	int* sample_in_node; // indicated sample in which nodes.
-	
-};
-
-//FeatureData::FeatureData(){
-//    this->nSamples=this->nFeatures=-1;
-//    rawfeatures=NULL;
-//    sortedfeatures=NULL;
-//    sortedindices=NULL;
-//    label=NULL;
-//    sample_in_node=NULL;
-//}
-
-FeatureData::FeatureData(uint nSamples, uint nFeaures) {
-    this->nSamples=nSamples;
-    this->nFeatures = nFeaures;
-
-    // rawfeatures: initialized to minimum value (for missing values)
-    rawfeatures = new REAL*[nSamples];
-	for (int i=0; i<nSamples; i++) {
-        rawfeatures[i] = new REAL[nFeaures];
-		for (int j=0; j<nFeaures; j++)
-            rawfeatures[i][j] = 0.f; //-9999999.f; // TODO replace with better minimum value -- min float?
-	}
-	
-	// sortedfeatures, sortedindices: limited init, completed during sort()
-    sortedfeatures = new REAL*[nFeaures];
-    sortedindices = new uint*[nFeaures];
-    for (int i=0; i<nFeaures; i++) {
-        sortedfeatures[i] = new REAL[nSamples];
-        sortedindices[i] = new uint[nSamples];
-	}
-	
-	// label: limited init, read from file
-    label = new REAL[nSamples];
-	
-	// node: initialized to 0
-    sample_in_node = new int[nSamples];
-    for (int i=0; i<nSamples; i++)
-		sample_in_node[i] = -1;
-
-}
-
-FeatureData::~FeatureData() {
-    // delete all 1-d arrays: qid, label, node, pred, residual, idealdcg
-    if (label) {
-        delete [] label;
-    }
-    if (sample_in_node) {
-        delete [] sample_in_node;
-    }
-    label=NULL;
-    sample_in_node=NULL;
-	
-	// delete all 2-d arrays: rawfeatures, sortedfeatures, sortedindices
-    for (int i=0; i<nSamples; i++) {
-        if(rawfeatures[i])
-            delete [] rawfeatures[i];
-        rawfeatures[i] = NULL;
-    }
-	for (int i=0; i<nFeatures; i++) {
-        if(sortedfeatures[i])
-            delete [] sortedfeatures[i];
-        sortedfeatures[i] = NULL;
-        if(sortedindices[i])
-            delete [] sortedindices[i];
-        sortedindices[i] = NULL;
-    }
-    delete[] rawfeatures;
-    delete[] sortedfeatures;
-    delete[] sortedindices;
-}
-
-
-void FeatureData::reset() {
-    // clear nodes before next tree
-	for (int i=0; i<nSamples; i++) {
-        sample_in_node[i]=-1;
-	}
-}
-
-class FeatureValuePair {
-public:
-    uint index;
-	REAL value;
-};
-
-struct CompareFeatureValuePairs {
-    bool operator() (FeatureValuePair* fv1, FeatureValuePair* fv2) {
-        return (fv1->value < fv2->value);
-    }
-};
-
-void FeatureData::sort() {
-    // initialize FeatureValue array
-    CompareFeatureValuePairs cfvp;
-    FeatureValuePair** pairs = new FeatureValuePair*[nSamples];
-    for (int i=0; i<nSamples; i++)
-        pairs[i] = new FeatureValuePair();
-    
-    // sort each feature
-	for (int f=0; f<nFeatures; f++) {
-	    // load feature into pairs array
-        for (int i=0; i<nSamples; i++) {
-            pairs[i]->index = i;
-            pairs[i]->value = rawfeatures[f][i];
-        }
-	    
-	    // sort pairs array
-		std::sort(pairs, pairs + nSamples, cfvp);
-		
-		// write feature to sortedfeatures, sortedindices
-        for (int i=0; i<nSamples; i++) {
-            sortedfeatures[f][i] = pairs[i]->value;
-            sortedindices[f][i] = pairs[i]->index;
-        }
-	}
-	
-	// delete FeatureValue array
-    for (int i=0; i<nSamples; i++) {
-        delete pairs[i];
-        pairs[i] = NULL;
-    }
-    delete [] pairs;
-}
-
-uint FeatureData::getNumSamples() {
-    return nSamples;
-}
-
-int FeatureData::getSampleInNode(uint i) {
-    return sample_in_node[i];
-}
-
-uint_vec FeatureData::getNodeSamples(uint i){
-    uint_vec ret;
-    for (uint k=0; k<nSamples; k++) {
-        if (sample_in_node[k]==i) {
-            ret.push_back(k);
-        }
-    }
-    return ret;
-}
-
-void FeatureData::setNode(uint i_sample, uint i_node) {
-    sample_in_node[i_sample]=i_node;
-}
-
-
-REAL FeatureData::getFeature(uint f, uint i) {
-    return rawfeatures[f][i];
-}
-
-REAL FeatureData::getSortedFeature(uint f, uint i) {
-    return sortedfeatures[f][i];
-}
-
-uint FeatureData::getSortedIndex(uint f, uint i) {
-    return sortedindices[f][i];
-}
-
-#endif
diff --git a/include/tensemble/GBMClassifier.h b/include/tensemble/GBMClassifier.h
deleted file mode 100644
index a6bcf57..0000000
--- a/include/tensemble/GBMClassifier.h
+++ /dev/null
@@ -1,400 +0,0 @@
-/* * * * *
- *  GBMClassifier.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_GBMClassifier_h
-#define libTM_GBMClassifier_h
-#include "BaseGBM.h"
-#include <map>
-
-class GBMClassifier:public BaseGBM {
-    //gradient boosting regressor
-    
-public:
-    GBMClassifier(){};
-    
-    GBMClassifier(int loss_function,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,REAL subsample,REAL learning_rate,bool oob,bool compute_importance,uint random_seed,uint n_jobs,int verbose);
-    ~GBMClassifier();
-    
-    int build(REAL **X, REAL *original_y, uint n_samples,\
-              REAL** val_X=NULL,REAL* val_y=NULL,uint n_val_samples=0);
-    
-    void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures,uint k_trees=0);
-    
-    void predict_prob(REAL** X,REAL* pred_prob,uint nSamples,uint nFeatures,uint k_trees=0);
-    
-    void predict_stage(REAL** X,REAL* pred,uint nSamples,uint nFeatures,uint k);
-    
-    void score2label(REAL* score,REAL* label,uint nSamples);
-    
-    void score2prob(REAL* score,REAL* prob,uint nSamples);
-    
-    int save_model(const char* filename);
-    
-    void predict_verbose(REAL** X,REAL* y,REAL* pred,uint nSamples,uint nFeatures,\
-                                 uint k_trees){};
-    
-private:
-    void convert_to_unique(REAL* original_y,REAL* y,uint nSamples);
-    
-};
-
-GBMClassifier::GBMClassifier(int loss_function,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,REAL subsample,REAL learning_rate,bool oob,bool compute_importance,uint random_seed,uint n_jobs,int verbose)\
-:BaseGBM(loss_function,n_trees,n_features,max_depth,min_sample_leaf,max_features_ratio,subsample,learning_rate,oob,compute_importance,random_seed,n_jobs,verbose){
-#ifdef DEBUG
-    assert(loss_function==BINOMIAL_DEVIANCE || loss_function==MULTINOMIAL_DEVIANCE);
-#endif
-}
-
-GBMClassifier::~GBMClassifier(){
-}
-
-void GBMClassifier::convert_to_unique(REAL *original_y, REAL *y,uint nSamples){
-    std::map<int,uint> key_values;
-    std::map<int,uint>::const_iterator iter;
-    vector<int> classes_map;
-    n_classes=0;
-    for (uint i=0; i<nSamples; i++) {
-        int origin_class=(int)original_y[i];
-        if ((iter=key_values.find(origin_class))==key_values.end()) {
-            key_values.insert(make_pair(origin_class, n_classes));
-            y[i]=n_classes;
-            classes_map.push_back(origin_class);
-            n_classes++;
-        }else {
-            y[i]=iter->second;
-        }
-    }
-    original_classes=new int[n_classes];
-    for (uint i=0; i<n_classes; i++) {
-        original_classes[i]=classes_map[i];
-    }
-}
-
-int GBMClassifier::build(REAL **X, REAL *original_y, uint n_samples,REAL** val_X,REAL* val_y,uint n_val_samples){
-    /* BUILD GRADIENT BOOSTING MODEL FOR CLASSIFICATION
-     * For classification, labels must correspond to classes 0, 1, ..., n_classes_-1
-     * For regression and two-classes classification,just need fit one tree in each GBM round
-     * For multi-classes classification,need fit n_classes trees in each GBM round
-     */
-#ifdef DEBUG
-    assert(loss_function==BINOMIAL_DEVIANCE || loss_function==MULTINOMIAL_DEVIANCE);
-#endif
-    uint i,j,k,idx,n_subsamples,*sample_index,count,offset;
-    REAL** sub_X,*sub_y,*y,*zero_one_y;
-    REAL* y_pred,*y_pred_label,*val_pred,*val_pred_stage,*val_pred_label,*residual,train_score,oob_score;
-    bool *mask;
-    //convert y to unique classes
-    y=new REAL[n_samples];
-    y_pred_label=new REAL[n_samples];
-    convert_to_unique(original_y, y, n_samples);
-    
-    if (n_classes==1) {
-        fprintf(stderr, "Error: The training file only have one label,at least two-labels are required for classification.Please check your file.\n");
-        delete []y;
-        return ENSEMBLE_FAIL;
-    }
-    
-    //initialize loss function
-    if (n_classes==2) {
-        loss_function=BINOMIAL_DEVIANCE;
-        loss=new BinomialDeviance;
-    }else {
-        loss_function=MULTINOMIAL_DEVIANCE;
-        loss=new MultinomialDeviance;
-    }
-    //set n_classes=1 for two-classes problems(just need build one tree each round)
-    if (loss_function==BINOMIAL_DEVIANCE) {
-        n_classes=1;
-    }
-    //allocate memory for prior and trees
-    prior_pred=new REAL[n_classes];
-    this->tree=new Tree*[n_trees*n_classes];
-    
-    n_subsamples=n_samples;
-    mask=new bool[n_samples];
-    y_pred=new REAL[n_samples*n_classes];
-    residual=new REAL[n_samples*n_classes];
-    if (subsample<1.0) {
-        sample_index=new uint[n_samples];
-        n_subsamples=(uint)floor((REAL)n_samples*subsample);
-        sub_X=new REAL*[n_subsamples];
-        sub_y=new REAL[n_subsamples];
-        for (i=0; i<n_samples; i++) {
-            sample_index[i]=i;
-        }
-#ifdef DEBUG
-        assert(n_subsamples<n_samples);
-#endif
-    }
-
-    //initialize prediction and save prior
-    loss->get_init_estimate(y, y_pred, prior_pred, n_samples, this->n_classes);
-    
-    //if have validation set,initialize prediction for validation set
-    if (val_X && val_y) {
-        val_pred=new REAL[n_classes*n_val_samples];
-        val_pred_stage=new REAL[n_classes*n_val_samples];
-        val_pred_label=new REAL[n_val_samples];
-        for (j=0; j<n_classes; j++) {
-            offset=j*n_val_samples;
-            for (i=0; i<n_val_samples; i++) {
-                val_pred[offset+i]=prior_pred[j];
-            }
-        }
-    }
-    if (loss_function==MULTINOMIAL_DEVIANCE) {
-        zero_one_y=new REAL[n_samples];
-    }
-    time_t beg,end;
-    beg=time(NULL);
-    //main iteration
-    for (i=0; i<n_trees; i++) {
-        //sub-sampling
-        memset(mask, false, n_samples*sizeof(bool));
-        if (subsample<1.0) {
-            count=n_samples;
-            for (j=0; j<n_subsamples; j++) {
-//                k=rand()%count;
-                k=randomMT()%count;
-                idx=sample_index[k];
-                sub_X[j]=X[idx];
-                mask[idx]=true;
-                swap(sample_index[k], sample_index[--count]);
-            }
-        }else {
-            sub_X=X;
-            for (j=0; j<n_samples; j++) {
-                mask[j]=true;
-            }
-        }
-        //build k trees each round
-        for(k=0;k<n_classes;k++){
-            REAL* kth_residual=residual+k*n_samples;
-            //convert to zero-one value for Multinomial Deviance
-            if (loss_function==MULTINOMIAL_DEVIANCE) {
-                for (j=0; j<n_samples; j++) {
-                    if (y[j]==k) {
-                        zero_one_y[j]=1;
-                    }else {
-                        zero_one_y[j]=0;
-                    }
-                }
-            }else {
-                zero_one_y=y;
-            }
-            //compute working residual for k-th tree
-            loss->compute_residual(kth_residual, zero_one_y, y_pred, n_samples, n_classes, k);
-            if (subsample<1.0) {
-                count=0;
-                for (j=n_samples-1; j>=n_samples-n_subsamples; j--) {
-                    idx=sample_index[j];
-                    sub_y[count++]=kth_residual[idx];
-                }
-#ifdef DEBUG
-                assert(count==n_subsamples);
-#endif
-            }else {
-                sub_y=kth_residual;
-            }
-            uint tree_random_seed=randomMT();
-            TreeRegressor *t=new TreeRegressor(n_features,\
-                                               max_features,\
-                                               min_sample_leaf,\
-                                               max_depth,\
-                                               FIND_BEST,\
-                                               tree_random_seed,\
-                                               n_jobs);
-            //build tree
-            t->build(sub_X, sub_y, n_subsamples);
-            //update
-            loss->update(t, X, mask, zero_one_y, y_pred+k*n_samples, kth_residual, n_samples, n_features, learning_rate, n_classes,k);
-            //add tree to ensemble
-            tree[i*n_classes+k]=t;
-        }         
-        if (val_X && val_y) {
-            predict_stage(val_X, val_pred_stage, n_val_samples, n_features, i);
-            for (k=0; k<n_classes; k++) {
-                offset=k*n_val_samples;
-                for (j=0; j<n_val_samples; j++) {
-                    val_pred[offset+j]+=this->learning_rate*val_pred_stage[offset+j];
-                }
-            }
-        }
-        if ((verbose && (i+1)%verbose==0) || i+1==n_trees) {
-            //calculate train score and oob score;
-//            train_score=loss->loss(y, y_pred, mask, true, n_samples, n_classes);
-            score2label(y_pred, y_pred_label, n_samples);
-            if (oob && false) {
-                oob_score=loss->loss(y, y_pred, mask, false, n_samples, n_classes);
-                oob_score=sqrt(oob_score);
-            }
-            if (verbose<=0) {
-                fprintf(stderr, "GBM train done.");
-            }else {
-                fprintf(stderr, "build tree %u of %u,",i+1,n_trees);
-            }
-            fprintf(stderr, "train Acc=%lf",Accuracy(original_y, y_pred_label, n_samples));
-            if (oob && false) {
-                fprintf(stderr, ",oob score(deviance)=%lf",oob_score);
-            }
-            if (val_y) {
-                score2label(val_pred, val_pred_label, n_val_samples);
-                fprintf(stderr, ",validation Acc=%f",Accuracy(val_y, val_pred_label, n_val_samples));
-            }
-            fprintf(stderr, ".\n");
-        }
-    }
-    delete []mask;
-    delete []y;
-    delete []y_pred;
-    delete []y_pred_label;
-    delete []residual;
-    if (subsample<1.0) {
-        delete []sample_index;
-        delete []sub_X;
-        delete []sub_y;
-    }
-    if (val_y) {
-        delete []val_pred_stage;
-        delete []val_pred;
-        delete []val_pred_label;
-    }
-    if (loss_function==MULTINOMIAL_DEVIANCE) {
-        delete []zero_one_y;
-    }
-    end=time(NULL);
-    fprintf(stderr, "|Gradient Boosting Classifier training done. | Using time: %.0lf secs|\n",difftime(end, beg));
-    return ENSEMBLE_SUCCESS;
-}
-
-void GBMClassifier::predict_stage(REAL **X, REAL *pred, uint nSamples, uint nFeatures, uint k){
-    /* predict k-th round */
-    for (uint i=0; i<n_classes; i++) {
-        tree[k*n_classes+i]->predict(X, pred+i*nSamples, nSamples, nFeatures);
-    }
-}
-
-void GBMClassifier::score2label(REAL *score, REAL *label,uint nSamples){
-    uint label_idx;
-    REAL max_score;
-    if (loss_function==BINOMIAL_DEVIANCE) {
-        for (uint i=0; i<nSamples; i++) {
-            if (1.0/(1.0+exp(-score[i]))>=0.5) {
-                label[i]=original_classes[1];
-            }else {
-                label[i]=original_classes[0];
-            }
-        }
-    }else{
-        for (uint i=0; i<nSamples; i++) {
-            max_score=-HUGE_VAL;
-            for (uint j=0; j<n_classes; j++) {
-                if (score[j*nSamples+i]>max_score) {
-                    max_score=score[j*nSamples+i];
-                    label_idx=j;
-                }
-            }
-            label[i]=original_classes[label_idx];
-        }
-    }
-}
-
-void GBMClassifier::predict(REAL **X, REAL *pred, uint nSamples, uint nFeatures,uint k_trees){
-    if (k_trees==0 || k_trees>this->n_trees) {
-        k_trees=n_trees;
-    }
-    uint i,j,k,offset;
-    REAL* score=new REAL[nSamples*n_classes];
-    REAL* score_stage=new REAL[nSamples];
-    for (i=0; i<n_classes; i++) {
-        offset=i*nSamples;
-        for (j=0; j<nSamples; j++) {
-            score[offset+j]=prior_pred[i];
-        }
-    }
-    for (i=0; i<k_trees; i++) {
-        for (k=0; k<n_classes; k++) {
-            offset=k*nSamples;
-            tree[i*n_classes+k]->predict(X, score_stage, nSamples, nFeatures);
-            for (j=0; j<nSamples; j++) {
-                score[offset+j]+=learning_rate*score_stage[j];
-            }
-        }
-    }
-    score2label(score, pred, nSamples);
-    delete []score;
-    delete []score_stage;
-}
-void GBMClassifier::score2prob(REAL *score, REAL *prob, uint nSamples){
-    uint i,j;
-    REAL sum_exp;
-    if (loss_function==BINOMIAL_DEVIANCE) {
-        for (i=0; i<nSamples; i++) {
-            prob[i]=1.0/(1.0+exp(-score[i]));
-        }
-    }else{
-        for (i=0; i<nSamples; i++) {
-            sum_exp=0.0;
-            for (j=0; j<n_classes; j++) {
-                sum_exp+=exp(score[j*nSamples+i]);
-            }
-            for (j=0; j<n_classes; j++) {
-                if (sum_exp<=EPS) {
-                    prob[j*nSamples+i]=0.0;
-                }else{
-                    prob[j*nSamples+i]=exp(score[j*nSamples+i])/sum_exp;
-                }
-            }
-        }
-    }
-}
-void GBMClassifier::predict_prob(REAL **X, REAL *pred_prob, uint nSamples, uint nFeatures, uint k_trees){
-    if (k_trees==0 || k_trees>this->n_trees) {
-        k_trees=n_trees;
-    }
-    uint i,j,k,offset;
-    REAL* score=new REAL[nSamples*n_classes];
-    REAL* score_stage=new REAL[nSamples];
-    for (i=0; i<n_classes; i++) {
-        offset=i*nSamples;
-        for (j=0; j<nSamples; j++) {
-            score[offset+j]=prior_pred[i];
-        }
-    }
-    for (i=0; i<k_trees; i++) {
-        for (k=0; k<n_classes; k++) {
-            offset=k*nSamples;
-            tree[i*n_classes+k]->predict(X, score_stage, nSamples, nFeatures);
-            for (j=0; j<nSamples; j++) {
-                score[offset+j]+=learning_rate*score_stage[j];
-            }
-        }
-    }
-    score2prob(score, pred_prob, nSamples);
-    delete []score;
-    delete []score_stage;
-
-}
-
-int GBMClassifier::save_model(const char *filename){
-    FILE* fp=fopen(filename,"w");
-    if (!fp) {
-        fprintf(stderr, "Cannot open file %s for save GBMClassifier model.Please check your file path is correct.\n",filename);
-        return false;
-    }
-    fprintf(fp, "GBMClassifier\n");
-    BaseGBM::save_model(fp);
-    fclose(fp);
-    return true;
-
-}
-#endif
diff --git a/include/tensemble/GBMRegressor.h b/include/tensemble/GBMRegressor.h
deleted file mode 100644
index f64b821..0000000
--- a/include/tensemble/GBMRegressor.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/* * * * *
- *  GBMRegressor.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_GBMRegressor_h
-#define libTM_GBMRegressor_h
-#include "BaseGBM.h"
-
-
-class GBMRegressor:public BaseGBM {
-    //gradient boosting regressor
-    
-public:
-    GBMRegressor(){};
-    
-    GBMRegressor(int loss_function,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,REAL subsample,REAL learning_rate,bool oob,bool compute_importance,uint random_seed,uint n_jobs,int verbose);
-    
-    ~GBMRegressor();
-    
-    int build(REAL **X, REAL *y, uint n_samples,\
-              REAL** val_X=NULL,REAL* val_y=NULL,uint n_val_samples=0);
-    
-    void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures,uint k_trees=0);
-    
-    //may useful for validation
-    void predict_verbose(REAL** X,REAL* y,REAL* pred,uint nSamples,uint nFeatures,\
-                         uint k);
-    int save_model(const char* filename);
-};
-
-GBMRegressor::GBMRegressor(int loss_function,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,REAL subsample,REAL learning_rate,bool oob,bool compute_importance,uint random_seed,uint n_jobs,int verbose)\
-:BaseGBM(loss_function,n_trees,n_features,max_depth,min_sample_leaf,max_features_ratio,subsample,learning_rate,oob,compute_importance,random_seed,n_jobs,verbose){
-}
-
-GBMRegressor::~GBMRegressor(){
-    
-}
-
-int GBMRegressor::build(REAL **X, REAL *y, uint n_samples,REAL** val_X,REAL* val_y,uint n_val_samples){
-    /* BUILD GRADIENT BOOSTING MODEL FOR REGRESSION
-     * For classification, labels must correspond to classes 0, 1, ..., n_classes_-1
-     * For regression and two-classes classification,just need fit one tree in each GBM round
-     * For multi-classes classification,need fit n_classes trees in each GBM round
-     */
-    uint i,j,k,idx,n_subsamples,*sample_index,count;
-    REAL** sub_X,*sub_y;
-    REAL* y_pred,*val_pred,*val_pred_stage,*residual,train_score,oob_score,valid_score;
-    bool *mask;
-    val_pred=NULL;
-#ifdef DEBUG
-    assert(n_trees>=1);
-#endif
-    //initialize loss function and set n_classes=1.
-    n_classes=1;
-    if (loss_function==SQUARE_LOSS) {
-        loss=new SquareLoss();
-    }else {
-        fprintf(stderr, "Error: Unknow loss function\n");
-        return ENSEMBLE_FAIL;
-    }
-    //allocate memory for prior and trees
-    prior_pred=new REAL[n_classes];
-    this->tree=new Tree*[n_trees];
-    
-    n_subsamples=n_samples;
-    mask=new bool[n_samples];
-    y_pred=new REAL[n_samples*n_classes];
-    if (subsample<1.0) {
-        sample_index=new uint[n_samples];
-        n_subsamples=(uint)floor((REAL)n_samples*subsample);
-        sub_X=new REAL*[n_subsamples];
-        sub_y=new REAL[n_subsamples];
-        for (i=0; i<n_samples; i++) {
-            sample_index[i]=i;
-        }
-#ifdef DEBUG
-        assert(n_subsamples<n_samples);
-#endif
-    }
-    residual=new REAL[n_samples];
-    //initialize prediction and save prior
-    loss->get_init_estimate(y, y_pred, prior_pred, n_samples, this->n_classes);
-    if (val_X && val_y) {
-        val_pred=new REAL[n_val_samples];
-        val_pred_stage=new REAL[n_val_samples];
-        for (i=0; i<n_val_samples; i++) {
-            val_pred[i]=*prior_pred;
-        }
-    }
-    time_t beg,end;
-    beg=time(NULL);
-    //main iteration
-    for (i=0; i<n_trees; i++) {
-        //compute working residual
-        loss->compute_residual(residual, y, y_pred, n_samples, n_classes, 0);
-        //sub-sampling
-        memset(mask, false, n_samples*sizeof(bool));
-        if (subsample<1.0) {
-            count=n_samples;
-            for (j=0; j<n_subsamples; j++) {
-//                k=rand()%count;
-                k=randomMT()%count;
-                idx=sample_index[k];
-                sub_X[j]=X[idx];
-                sub_y[j]=residual[idx];
-                mask[idx]=true;
-                swap(sample_index[k], sample_index[--count]);
-            }
-        }else {
-            sub_X=X;
-            sub_y=residual;
-            for (j=0; j<n_samples; j++) {
-                mask[j]=true;
-            }
-        }
-        uint tree_random_seed=randomMT();
-        TreeRegressor *t=new TreeRegressor(n_features,\
-                                           max_features,\
-                                           min_sample_leaf,\
-                                           max_depth,\
-                                           FIND_BEST,\
-                                           tree_random_seed,\
-                                           n_jobs);
-        //build tree
-        t->build(sub_X, sub_y, n_subsamples);
-        //update
-        loss->update(t, X, mask, y, y_pred, residual, n_samples, n_features, learning_rate, n_classes,0);
-        //add tree to ensemble
-        tree[i]=t;
-        if (val_X && val_y) {
-            t->predict(val_X, val_pred_stage, n_val_samples, n_features);
-            for (j=0; j<n_val_samples; j++) {
-                val_pred[j]+=learning_rate*val_pred_stage[j];
-            }
-        }
-        if ( (verbose && (i+1)%verbose==0) || i+1==n_trees) {      
-            //calculate train score and oob score;
-            train_score=loss->loss(y, y_pred, mask, true, n_samples, n_classes);
-            train_score=sqrt(train_score);
-            if (oob && false) {
-                oob_score=loss->loss(y, y_pred, mask, false, n_samples, n_classes);
-                oob_score=sqrt(oob_score);
-            }
-            if (verbose<=0) {
-                fprintf(stderr, "GBM train done.");
-            }else {
-                fprintf(stderr, "build tree %u of %u,",i+1,n_trees);
-            }
-            fprintf(stderr, "train RMSE=%f",train_score);
-            if (oob && false) {
-                fprintf(stderr, ",oob score(deviance)=%f",oob_score);
-            }
-            if (val_y) {
-                valid_score=rmse(val_pred, val_y, n_val_samples);
-                fprintf(stderr, ",validation RMSE=%f",valid_score);
-            }
-            fprintf(stderr, ".\n");
-
-        }
-    }
-    delete []mask;
-    delete []y_pred;
-    delete []residual;
-    if (subsample<1.0) {
-        delete []sample_index;
-        delete []sub_X;
-        delete []sub_y;
-    }
-    if (val_y) {
-        delete []val_pred_stage;
-        delete []val_pred;
-    }
-    end=time(NULL);
-    fprintf(stderr, "|Gradient Boosting Regressor training done. | Using time: %.0lf secs|\n",difftime(end, beg));
-    return ENSEMBLE_SUCCESS;
-}
-
-void GBMRegressor::predict(REAL **X, REAL *pred, uint nSamples, uint nFeatures,uint k_trees){
-#ifdef DEBUG
-    assert(k_trees<=n_trees);
-#endif
-    if (k_trees==0) {
-        k_trees=n_trees;
-    }
-    uint i,j;
-    REAL* pred_stage=new REAL[nSamples];
-    for (i=0; i<nSamples; i++) {
-        pred[i]=*prior_pred;
-    }
-    for (i=0; i<k_trees; i++) {
-        tree[i]->predict(X, pred_stage, nSamples, nFeatures);
-        for (j=0; j<nSamples; j++) {
-            pred[j]+=learning_rate*pred_stage[j];
-        }
-    }
-    delete []pred_stage;
-}
-
-void GBMRegressor::predict_verbose(REAL **X, REAL *y, REAL *pred, uint nSamples, uint nFeatures,uint k_trees){
-#ifdef DEBUG
-    assert(k_trees<=n_trees);
-#endif
-    if (k_trees==0) {
-        k_trees=n_trees;
-    }
-    uint i,j;
-    REAL* pred_stage=new REAL[nSamples];
-    for (i=0; i<nSamples; i++) {
-        pred[i]=*prior_pred;
-    }
-    for (i=0; i<k_trees; i++) {
-        tree[i]->predict(X, pred_stage, nSamples, nFeatures);
-        for (j=0; j<nSamples; j++) {
-            pred[j]+=learning_rate*pred_stage[j];
-        }
-        REAL MSE=mse(pred, y, nSamples);
-        REAL Rscore=R2(pred,y,nSamples);
-        fprintf(stderr,"GBMRegressor prediction: tree %u,MSE=%f,R^2=%f\n",i+1,MSE,Rscore);
-    }
-    delete []pred_stage;
-}
-
-int GBMRegressor::save_model(const char *filename){
-    FILE* fp=fopen(filename,"w");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot open file %s for save GBMRegressor model.Please check your file path is correct.\n",filename);
-        return false;
-    }
-    fprintf(fp, "GBMRegressor\n");
-    BaseGBM::save_model(fp);
-    fclose(fp);
-    return true;
-}
-#endif
diff --git a/include/tensemble/LossFunction.h b/include/tensemble/LossFunction.h
deleted file mode 100644
index 9f3a2cb..0000000
--- a/include/tensemble/LossFunction.h
+++ /dev/null
@@ -1,350 +0,0 @@
-/* * * * *
- *  LossFunction.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_LossFunction_h
-#define libTM_LossFunction_h
-#include "Estimator.h"
-#include "Tree.h"
-
-class LossFunction {
-    /* ABSTRACT LOSS FUNCTION
-     * parameter n_classes and k in the following function is for classification
-     * get_init_estimate: return the prior estimate(initialize predction)
-     * loss: calculate the loss
-     * compute_residual: return the residual(or pseudoresponse).For multi-class problems,return the
-     * k-th residual(or pseudoresponse)
-     */
-    
-public:
-    REAL alpha;//just for QuantileLossFunction and HuberLossFunction
-public:
-    virtual ~LossFunction(){};
-    
-    virtual void get_init_estimate(REAL* y,REAL* pred,REAL* prior,uint nSamples,uint n_classes=1)=0;
-    
-    virtual REAL loss(REAL* y,REAL* pred,bool* mask,bool invert_mask,uint nSamples,uint n_classes=1)=0;
-    
-    virtual void compute_residual(REAL* residual, REAL* y,REAL* pred,uint nSamples,uint n_classes=1,uint k=0)=0;
-    
-    virtual void update(Tree* tree, REAL** X,bool* mask,REAL* y,REAL* pred,REAL* residual,\
-                        uint nSamples,uint nFeatures,REAL learning_rate,uint n_classes=1,uint k=0);
-    
-    virtual void update_terminal_region(Tree* tree, uint* region, bool* mask,REAL* y, \
-                                        REAL* residual,uint nSamples,uint n_classes)=0;
-};
-
-void LossFunction::update(Tree* tree, REAL** X,bool* mask,REAL* y,REAL* pred,REAL* residual,\
-                          uint nSamples,uint nFeature,REAL learning_rate,uint n_classes,uint k){
-    /* UPDATE FUNCTION FOR GRADIENT BOOSTING
-     * update k-th tree for gradient boosting.
-     * k=0,n_classes=1 for regression and two-classes classification
-     */
-    uint *region=new uint[nSamples];
-    tree->predict_terminal_region(X, region, nSamples, nFeature);
-    update_terminal_region(tree, region, mask, y, residual, nSamples,n_classes);
-    for (uint i=0; i<nSamples; i++) {
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        uint node_id=region[i];
-        pred[i]+=learning_rate*tree->nodes[node_id]->pred[0];
-//#ifdef DEBUG
-//        assert(pred[i]>=0);
-//#endif
-    }
-    delete []region;
-}
-
-class SquareLoss:public LossFunction {
-    //Square loss
-    
-public:
-    void get_init_estimate(REAL* y,REAL* pred,REAL* prior,uint nSamples,uint n_classes);
-    
-    REAL loss(REAL* y, REAL* pred,bool* mask,bool invert_mask,uint nSamples,uint n_classes);
-    
-    void compute_residual(REAL* residual, REAL* y,REAL* pred,uint nSamples,uint n_classes,uint k);
-    
-    void update(Tree* tree, REAL** X,bool* mask,REAL* y,REAL* pred,REAL* residual,\
-                        uint nSamples,uint nFeatures,REAL learning_rate,uint n_classes,uint k);
-    
-    void update_terminal_region(Tree* tree, uint* region, bool* mask,REAL* y, \
-                                        REAL* residual,uint nSamples,uint n_classes);
-};
-
-void SquareLoss::get_init_estimate(REAL *y, REAL *pred, REAL* prior, uint nSamples,uint n_classes){
-    return MeanEstimator(y, pred, prior,nSamples);
-}
-
-REAL SquareLoss::loss(REAL *y, REAL *pred,bool* mask,bool invert_mask,uint nSamples,uint n_classes){
-    /* compute the loss
-     * if invert_mask=true,return train score,else return OOB score.
-     */
-    REAL ret=0.0;
-    uint count=0;
-    for (uint i=0; i<nSamples; i++) {
-        if ( (mask[i] && !invert_mask) || (!mask[i] && invert_mask) ) {
-            continue;
-        }
-        ret+=(y[i]-pred[i])*(y[i]-pred[i]);
-        count++;
-    }
-    return ret/count;
-}
-
-void SquareLoss::compute_residual(REAL *residual, REAL *y, REAL *pred, uint nSamples,uint n_classes,uint k){
-    /* compute the working residual, equals negative gradient. 
-     */
-//    uint count=0;
-    for (uint i=0; i<nSamples; i++) {
-//        if (!mask[i]) {
-//            continue;
-//        }
-        residual[i]=y[i]-pred[i];
-    }
-}
-
-void SquareLoss::update(Tree *tree, REAL **X, bool *mask, REAL *y, REAL *pred, REAL *residual, uint nSamples, uint nFeatures, REAL learning_rate,uint n_classes, uint k){
-    /* Least squares does not need to update terminal regions.*/
-#ifdef DEBUG
-    assert(n_classes==1);
-    assert(k==0);
-#endif
-    REAL* r_pred=new REAL[nSamples];
-    tree->predict(X, r_pred, nSamples, nFeatures);
-    for (uint i=0; i<nSamples; i++) {
-        pred[i]+=learning_rate*r_pred[i];
-    }
-    delete []r_pred;
-}
-
-void SquareLoss::update_terminal_region(Tree *tree, uint *region, bool *mask, REAL *y, REAL *residual, uint nSamples,uint n_classes){
-    /* Least squares does not need to update terminal regions.*/
-    
-}
-
-class BinomialDeviance:public LossFunction {
-    //negative binomial log-likelihood loss
-    
-public:
-    void get_init_estimate(REAL* y,REAL* pred,REAL* prior,uint nSamples,uint n_classes);
-    
-    REAL loss(REAL* y, REAL* pred,bool* mask,bool invert_mask,uint nSamples,uint n_classes);
-    
-    void compute_residual(REAL* residual, REAL* y,REAL* pred,uint nSamples,uint n_classes,uint k);
-    
-    void update_terminal_region(Tree* tree, uint* region, bool* mask,REAL* y, \
-                                REAL* residual,uint nSamples,uint n_classes);
-};
-
-void BinomialDeviance::get_init_estimate(REAL *y, REAL *pred, REAL* prior, uint nSamples,uint n_classes){
-    return LogOddsEstimator(y, pred, prior, nSamples);
-}
-
-REAL BinomialDeviance::loss(REAL *y, REAL *pred,bool* mask,bool invert_mask,uint nSamples,uint n_classes){
-    /* compute the loss
-     * if invert_mask=true,return train score,else return OOB score.
-     */
-    REAL deviance=0;
-    uint count=0;
-    for (uint i=0; i<nSamples; i++) {
-        if ( (mask[i] && !invert_mask) || (!mask[i] && invert_mask) ) {
-            continue;
-        }
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        deviance-=y[i]*pred[i]-log(1.0+exp(pred[i]));
-        count++;
-    }
-    return deviance/count;
-}
-
-void BinomialDeviance::compute_residual(REAL *residual, REAL *y, REAL *pred, uint nSamples,uint n_classes,uint k){
-    /* compute the working residual, equals negative gradient. 
-     * y has zero-one values.e.g y[i]=1 if sample i belong positive class,otherwise y[i]=0.
-     */
-
-//    uint count=0;
-    for (uint i=0; i<nSamples; i++) {
-//        if (!mask[i]) {
-//            continue;
-//        }
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        residual[i]=y[i]-1.0/(1.0+exp(-pred[i]));
-    }
-}
-
-void BinomialDeviance::update_terminal_region(Tree* tree, uint* region, bool* mask, REAL* y,\
-                            REAL* residual,uint nSamples,uint n_classes){
-    /* UPDATE TERMINAL REGION FOR BinomialDeviance
-     * Make a single Newton-Raphson step
-     * sample i is out of bag sample if mask[i]=false
-     */
-    uint i,k,numNodes=tree->numNodes;
-    REAL *numerator,*denominator;
-    numerator=new REAL[numNodes+1];
-    denominator=new REAL[numNodes+1];
-    for (i=ROOT; i<=numNodes; i++) {
-        numerator[i]=denominator[i]=0;
-    }
-//    uint count=0;
-    for (i=0; i<nSamples; i++) {
-        if (!mask[i]) {
-            continue;
-        }
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        k=region[i];
-        numerator[k]+=residual[i];
-        denominator[k]+=(y[i]-residual[i])*(1.0-y[i]+residual[i]);
-//        count++;
-    }
-    for (i=ROOT; i<=numNodes; i++) {
-        //update terminal nodes
-        if (tree->nodes[i]->leaf) {
-            //using pred[0] to store the GBM prediction
-            if (denominator[i]==0) {
-                tree->nodes[i]->pred[0]=0;
-            }else{
-                tree->nodes[i]->pred[0]=numerator[i]/denominator[i];
-            }
-        }
-    }
-    delete []numerator;
-    delete []denominator;
-}
-
-class MultinomialDeviance:public LossFunction {
-    //multi-class logistic loss
-    
-public:
-    void get_init_estimate(REAL* y,REAL* pred,REAL* prior,uint nSamples,uint n_classes);
-    
-    REAL loss(REAL* y, REAL* pred,bool* mask,bool invert_mask,uint nSamples,uint n_classes);
-    
-    void compute_residual(REAL* residual, REAL* y,REAL* pred,uint nSamples,uint n_classes,uint k);
-    
-    void update_terminal_region(Tree* tree, uint* region, bool* mask,REAL* y, \
-                                REAL* residual,uint nSamples,uint n_classes);
-};
-
-void MultinomialDeviance::get_init_estimate(REAL *y, REAL *pred,REAL* prior, uint nSamples, uint n_classes){
-    return MulticlassPriorEstimator(y, pred, prior, nSamples, n_classes);
-}
-
-REAL MultinomialDeviance::loss(REAL *y, REAL *pred,bool* mask,bool invert_mask, uint nSamples,uint n_classes){
-    /* multinomial deviance loss
-     * here y belong to [0,1,...,n_classes-1],not zero-one values
-     * if invert_mask=true,return train score,else return OOB score.
-     */
-    
-    uint *zero_one_y=new uint[nSamples*n_classes];
-    uint i,j,k;
-    REAL deviance=0.0,sum_exp;
-    uint count=0;
-    memset(zero_one_y, 0, nSamples*n_classes*sizeof(uint));
-    for (i=0; i<nSamples; i++) {
-#ifdef DEBUG
-        assert(y[i]>=0 && y[i]<n_classes);
-#endif
-        k=y[i];
-        zero_one_y[k*nSamples+i]=1;
-    }
-    
-    for (i=0; i<nSamples; i++) {
-        if ( (mask[i] && !invert_mask) || (!mask[i] && invert_mask) ) {
-            continue;
-        }
-        sum_exp=0.0;
-        for (j=0; j<n_classes; j++) {
-            sum_exp+=exp(pred[j*nSamples+i]);
-        }
-#ifdef DEBUG
-        assert(sum_exp!=0);
-#endif
-        for (j=0; j<n_classes; j++) {
-            deviance-=((REAL)zero_one_y[j*nSamples+i]*pred[j*nSamples+i]-log(sum_exp));
-        }
-        count++;
-    }
-    delete []zero_one_y;
-    return deviance/count;
-}
-
-void MultinomialDeviance::compute_residual(REAL *residual, REAL *y, REAL *pred, uint nSamples,uint n_classes, uint k){
-    /* Compute residual(or pseudoresponse,negative gradient) for the k-th class. 
-     * y has zero-one values.e.g y[i]=1 if sample i belong k-th class,otherwise y[i]=0.
-     * pred has size nSamples*n_classes,that is pred=[pred_1,pred_2,...,pred_n_classes]
-     * pred_k=[f_k(X_1),f_k(X_2),...,f_k(X_nSamples)]
-     */
-    uint i,j;
-    REAL sum_exp,prob_k;
-    for (i=0; i<nSamples; i++) {
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        sum_exp=0.0;
-        for (j=0; j<n_classes; j++) {
-            sum_exp+=exp(pred[j*nSamples+i]);
-        }
-        if (sum_exp==0.0) {
-            prob_k=0.0;
-        }else{
-            prob_k=exp(pred[k*nSamples+i])/sum_exp;
-        }
-        residual[i]=y[i]-prob_k;
-    }
-}
-
-void MultinomialDeviance::update_terminal_region(Tree *tree, uint *region, bool *mask, REAL *y, REAL *residual, uint nSamples,uint n_classes){
-    /* UPDATE TERMINAL REGION FOR MultinomialDeviance
-     * Make a single Newton-Raphson step
-     * sample i is out of bag sample if mask[i]=false
-     */
-    uint i,k,numNodes=tree->numNodes;
-    REAL *numerator,*denominator;
-    numerator=new REAL[numNodes+1];
-    denominator=new REAL[numNodes+1];
-    for (i=ROOT; i<=numNodes; i++) {
-        numerator[i]=denominator[i]=0.0;
-    }
-//    uint count=0;
-    for (i=0; i<nSamples; i++) {
-        if (!mask[i]) {
-            continue;
-        }
-#ifdef DEBUG
-        assert(y[i]==0 || y[i]==1);
-#endif
-        k=region[i];
-        numerator[k]+=residual[i];
-        denominator[k]+=(y[i]-residual[i])*(1-y[i]+residual[i]);
-    }
-    for (i=ROOT; i<=numNodes; i++) {
-        //update terminal nodes
-        if (tree->nodes[i]->leaf) {
-            //using pred[0] to store the GBM prediction
-            if (denominator[i]==0) {
-                tree->nodes[i]->pred[0]=0.0;
-            }else{
-                tree->nodes[i]->pred[0]=((REAL)(n_classes-1.0)/n_classes)*numerator[i]/denominator[i];
-            }
-        }
-    }
-    delete []numerator;
-    delete []denominator;
-}
-#endif
diff --git a/include/tensemble/RandomForestClassifier.h b/include/tensemble/RandomForestClassifier.h
deleted file mode 100644
index a5c34a9..0000000
--- a/include/tensemble/RandomForestClassifier.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/* * * * *
- *  RandomForestClassifier.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_RandomForestClassifier_h
-#define libTM_RandomForestClassifier_h
-#include "BaseForest.h"
-
-//global function for parallel build random forest classifier
-void RFC_build_trees_range(REAL** X,REAL* y,uint n_samples,BaseForest* forest,std::pair<uint, uint> n_trees_range,uint* oob_count,REAL* oob_prediction,REAL* importances);
-
-class RandomForestClassifier:public BaseForest {
-    //A random forest regressor
-    
-public:
-    RandomForestClassifier():BaseForest(){};
-    
-    RandomForestClassifier(int split_criterion,uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,bool bootstrap,bool oob,bool compute_importance,uint random_seed,uint n_jobs,bool verbose);
-    
-    ~RandomForestClassifier();
-    
-    int build(REAL** X,REAL* original_y,uint n_samples);
-    
-    void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures);
-    
-    void predict_prob(REAL** X,REAL* pred_prob,uint nSamples,uint nFeatures);
-    
-    void score2label(REAL* score,REAL* pred_label,uint nSamples);
-    
-    int save_model(const char* filename);
-    
-private:
-    void convert_to_unique(REAL* original_y,REAL* y,uint nSamples);
-};
-RandomForestClassifier::RandomForestClassifier(int split_criterion,\
-                                             uint n_trees,\
-                                             uint n_features,\
-                                             uint max_depth,\
-                                             uint min_sample_leaf,\
-                                             REAL max_features_ratio,\
-                                             bool bootstrap,\
-                                             bool oob,\
-                                             bool compute_importance,\
-                                             uint random_seed,\
-                                             uint n_jobs,\
-                                             bool verbose)\
-:BaseForest(split_criterion, n_trees,n_features,max_depth,min_sample_leaf, max_features_ratio,FIND_BEST, bootstrap,oob,compute_importance,random_seed,n_jobs,verbose){    
-    
-}
-RandomForestClassifier::~RandomForestClassifier(){
-    
-}
-
-
-int RandomForestClassifier::build(REAL** X,REAL* original_y,uint n_samples){
-    time_t beg,end;
-    beg=time(NULL);
-    uint i,j,k,**oob_count,n_trees_beg,n_trees_end;
-    REAL** oob_prediction,**importance_jobs,*oob_label_prediction;
-    REAL* y;
-    
-    //convert y to unique classes
-    y=new REAL[n_samples];
-    convert_to_unique(original_y, y, n_samples);
-    if (n_classes==1) {
-        fprintf(stderr, "The training file only have one label,at least two-labels are required for classification.Please check your file.\n");
-        delete []y;
-        return ENSEMBLE_FAIL;
-    }
-    
-    std::pair<uint, uint> n_trees_range;
-    oob_count=new uint* [n_jobs];
-    oob_prediction=new REAL* [n_jobs];
-    importance_jobs=new REAL*[n_jobs];
-    if (oob) {
-        oob_label_prediction=new REAL[n_samples];
-        for (j=0; j<n_jobs; j++) {
-            oob_count[j]=new uint[n_samples];
-            oob_prediction[j]=new REAL[n_samples*n_classes];
-        }
-    }
-    if (compute_importance) {
-        this->importances=new REAL[n_features];
-        for (j=0; j<n_jobs; j++) {
-            importance_jobs[j]=new REAL[n_features];
-        }
-    }
-    boost::thread** thread=new boost::thread*[n_jobs];
-    for (j=0; j<n_jobs; j++) {
-        n_trees_beg=(n_trees/n_jobs)*j;
-        n_trees_end=(n_trees/n_jobs)*(j+1);
-        if (j==n_jobs-1) {
-            //set the rest trees to the last job.
-            n_trees_end=n_trees;
-        }
-        n_trees_range.first=n_trees_beg;
-        n_trees_range.second=n_trees_end;
-        thread[j]=new boost::thread(bind(RF_build_trees_range, X, y, n_samples, this, n_trees_range, oob_count[j], oob_prediction[j], importance_jobs[j]));
-    }
-    for (j=0; j<n_jobs; j++) {
-        thread[j]->join();
-        delete thread[j];
-    }
-    if (verbose) {
-        fprintf(stderr, "\n");
-    }
-    if (oob) {
-        bool warn_flag=false;
-        for (i=0; i<n_samples; i++) {
-            for (j=1; j<n_jobs; j++) {
-                oob_count[0][i]+=oob_count[j][i];
-                for (k=0; k<n_classes; k++) {
-                    oob_prediction[0][i*n_classes+k]+=oob_prediction[j][i*n_classes+k];
-                }
-            }
-            if (oob_count[0][i]==0) {
-                warn_flag==true?warn_flag=true:\
-                (fprintf(stderr, "WARN: Some inputs do not have OOB scores.This probably means too few trees were used to compute any reliable oob estimates.\n"),warn_flag=true);
-                oob_count[0][i]=1;
-            }
-            for (k=0; k<n_classes; k++) {
-                oob_prediction[0][i*n_classes+k]/=oob_count[0][i];
-            }
-        }
-        score2label(oob_prediction[0], oob_label_prediction, n_samples);
-        oob_scores=Accuracy(original_y, oob_label_prediction, n_samples);
-        if (1) {
-            fprintf(stderr, "Out-of-bag score(Accuracy)=%lf\n",oob_scores);
-        }
-        for (i=0; i<n_jobs; i++) {
-            delete []oob_count[i];
-            delete []oob_prediction[i];
-        }
-        delete []oob_count;
-        delete []oob_prediction;
-        delete []oob_label_prediction;
-    }
-    if (compute_importance) {
-        for (i=0; i<n_features; i++) {
-            importances[i]=0;
-            for (j=0; j<n_jobs; j++) {
-                importances[i]+=importance_jobs[j][i];
-            }
-            importances[i]/=n_trees;
-        }
-        for (i=0; i<n_jobs; i++) {
-            delete []importance_jobs[i];
-        }
-        delete []importance_jobs;
-    }
-    delete []y;
-    end=time(NULL);
-    fprintf(stderr, "|Random Forest training done. | Using time: %.0lf secs|\n",difftime(end, beg));
-    return ENSEMBLE_SUCCESS;
-}
-
-void RandomForestClassifier::convert_to_unique(REAL *original_y, REAL *y,uint nSamples){
-    std::map<int,uint> key_values;
-    std::map<int,uint>::const_iterator iter;
-    vector<int> classes_map;
-    n_classes=0;
-    for (uint i=0; i<nSamples; i++) {
-        int origin_class=(int)original_y[i];
-        if ((iter=key_values.find(origin_class))==key_values.end()) {
-            key_values.insert(make_pair(origin_class, n_classes));
-            y[i]=n_classes;
-            classes_map.push_back(origin_class);
-            n_classes++;
-        }else {
-            y[i]=iter->second;
-        }
-    }
-    original_classes=new int[n_classes];
-    for (uint i=0; i<n_classes; i++) {
-        original_classes[i]=classes_map[i];
-    }
-}
-
-void RandomForestClassifier::score2label(REAL *score, REAL *pred_label, uint nSamples){
-    uint idx,i,j;
-    for (i=0; i<nSamples; i++) {
-        REAL prob=-1;
-        for (j=0; j<n_classes; j++) {
-            if (score[i*n_classes+j]>prob) {
-                prob=score[i*n_classes+j];
-                idx=j;
-            }
-        }
-        pred_label[i]=original_classes[idx];
-    }
-}
-
-void RandomForestClassifier::predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures){
-    uint i,j;
-    REAL* single_tree_pred=new REAL[nSamples*n_classes];
-    REAL* score=new REAL[nSamples*n_classes];
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-#endif
-    for (i=0; i<nSamples*n_classes; i++) {
-        score[i]=0;
-    }
-    for (i=0; i<n_trees; i++) {
-#ifdef DEBUG
-        assert(tree[i]!=NULL);
-#endif
-        tree[i]->predict(X, single_tree_pred, nSamples, nFeatures);
-        for (j=0; j<nSamples*n_classes; j++) {
-            score[j]+=single_tree_pred[j];
-        }
-    }
-    
-    //average prediction
-    for (i=0; i<nSamples*n_classes; i++) {
-        score[i]/=n_trees;
-    }
-//    predict_prob(X, score, nSamples, nFeatures);
-    score2label(score, pred, nSamples);
-    delete [] score;
-    delete [] single_tree_pred;
-}
-
-void RandomForestClassifier::predict_prob(REAL **X, REAL *pred_prob, uint nSamples, uint nFeatures){
-    uint i,j;
-    REAL* single_tree_pred=new REAL[nSamples*n_classes];
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-#endif
-    for (i=0; i<nSamples*n_classes; i++) {
-        pred_prob[i]=0;
-    }
-    for (i=0; i<n_trees; i++) {
-#ifdef DEBUG
-        assert(tree[i]!=NULL);
-#endif
-        tree[i]->predict(X, single_tree_pred, nSamples, nFeatures);
-        for (j=0; j<nSamples*n_classes; j++) {
-            pred_prob[j]+=single_tree_pred[j];
-        }
-    }
-    //average prediction
-    for (i=0; i<nSamples*n_classes; i++) {
-        pred_prob[i]/=n_trees;
-    }
-    delete []single_tree_pred;
-}
-
-
-int RandomForestClassifier::save_model(const char *filename){
-    FILE* fp=fopen(filename,"w");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot open file %s for save RandomForestClassifier model.Please check your file path is correct.\n",filename);
-        return false;
-    }
-    fprintf(fp, "RandomForestClassifier\n");
-    BaseForest::save_model(fp);
-    fclose(fp);
-    return true;
-}
-
-#endif
diff --git a/include/tensemble/RandomForestRegressor.h b/include/tensemble/RandomForestRegressor.h
deleted file mode 100644
index 3e837df..0000000
--- a/include/tensemble/RandomForestRegressor.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/* * * * *
- *  RandomForestRegressor.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_RandomForestRegressor_h
-#define libTM_RandomForestRegressor_h
-#include "BaseForest.h"
-
-extern boost::mutex RF_mutex;
-//global function for parallel build random forest regressor
-void RFG_build_trees_range(REAL** X,REAL* y,uint n_samples,BaseForest* forest,uint n_trees_beg,uint n_trees_end,uint* oob_count,REAL* oob_prediction,REAL* importances);
-
-class RandomForestRegressor:public BaseForest {
-    //A random forest regressor
-    
-public:
-    RandomForestRegressor():BaseForest(){};
-    
-    RandomForestRegressor(uint n_trees,uint n_features,uint max_depth,uint min_sample_leaf, REAL max_features_ratio,bool bootstrap,bool oob,bool compute_importance,uint random_seed,uint n_jobs,bool verbose);
-    
-    ~RandomForestRegressor();
-    
-    int build(REAL** X,REAL* y,uint n_samples);
-    
-    void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures);
-    
-    int save_model(const char* filename);
-    
-};
-RandomForestRegressor::RandomForestRegressor(uint n_trees,\
-                                             uint n_features,\
-                                             uint max_depth,\
-                                             uint min_sample_leaf,\
-                                             REAL max_features_ratio,\
-                                             bool bootstrap,\
-                                             bool oob,\
-                                             bool compute_importance,\
-                                             uint random_seed,\
-                                             uint n_jobs,\
-                                             bool verbose)\
-:BaseForest(CRITERION_MSE,n_trees,n_features,max_depth,min_sample_leaf, max_features_ratio,FIND_BEST, bootstrap,oob,compute_importance,random_seed,n_jobs,verbose){    
-}
-RandomForestRegressor::~RandomForestRegressor(){
-    
-}
-
-int RandomForestRegressor::build(REAL** X,REAL* y,uint n_samples){
-    
-    //set n_classes=1 for regression
-    this->n_classes=1;
-    
-    time_t beg,end;
-    beg=time(NULL);
-    uint i,j,**oob_count,n_trees_beg,n_trees_end;
-    REAL** oob_prediction,**importance_tmp;
-    oob_count=new uint* [n_jobs];
-    oob_prediction=new REAL* [n_jobs];
-    importance_tmp=new REAL*[n_jobs];
-    std::pair<uint,uint> n_trees_range;
-    if (oob) {
-        for (j=0; j<n_jobs; j++) {
-            oob_count[j]=new uint[n_samples];
-            oob_prediction[j]=new REAL[n_samples];
-        }
-    }
-    if (compute_importance) {
-        this->importances=new REAL[n_features];
-        for (j=0; j<n_jobs; j++) {
-            importance_tmp[j]=new REAL[n_features];
-        }
-    }
-    boost::thread** thread=new boost::thread*[n_jobs];
-    for (j=0; j<n_jobs; j++) {
-        n_trees_beg=(n_trees/n_jobs)*j;
-        n_trees_end=(n_trees/n_jobs)*(j+1);
-        if (j==n_jobs-1) {
-            //set the rest trees to the last job.
-            n_trees_end=n_trees;
-        }
-        n_trees_range.first=n_trees_beg;
-        n_trees_range.second=n_trees_end;
-        thread[j]=new boost::thread(bind(RF_build_trees_range, X, y, n_samples, this, n_trees_range, oob_count[j], oob_prediction[j], importance_tmp[j]));
-    }
-    for (j=0; j<n_jobs; j++) {
-        thread[j]->join();
-        delete thread[j];
-    }
-    if (verbose) {
-        fprintf(stderr, "\n");
-    }
-    if (oob) {
-        bool warn_flag=false;
-        for (i=0; i<n_samples; i++) {
-            for (j=1; j<n_jobs; j++) {
-                oob_count[0][i]+=oob_count[j][i];
-                oob_prediction[0][i]+=oob_prediction[j][i];
-            }
-            if (oob_count[0][i]==0) {
-                warn_flag==true?warn_flag=true:\
-                (fprintf(stderr, "WARN: Some inputs do not have OOB scores.This probably means too few trees were used to compute any reliable oob estimates.\n"),warn_flag=true);
-                oob_count[0][i]=1;
-            }
-            oob_prediction[0][i]/=oob_count[0][i];
-        }
-        REAL oob_R2=R2(oob_prediction[0], y, n_samples);
-        oob_scores=rmse(oob_prediction[0], y, n_samples);
-        if (1) {
-            fprintf(stderr, "Out-of-bag score(RMSE,Correlation Coefficient)=(%lf,%lf)\n",oob_scores,oob_R2);
-        }
-        for (i=0; i<n_jobs; i++) {
-            delete []oob_count[i];
-            delete []oob_prediction[i];
-        }
-        delete []oob_count;
-        delete []oob_prediction;
-    }
-    if (compute_importance) {
-        for (i=0; i<n_features; i++) {
-            importances[i]=0;
-            for (j=0; j<n_jobs; j++) {
-                importances[i]+=importance_tmp[j][i];
-            }
-            importances[i]/=n_trees;
-        }
-        for (i=0; i<n_jobs; i++) {
-            delete []importance_tmp[i];
-        }
-        delete []importance_tmp;
-    }
-    end=time(NULL);
-    fprintf(stderr, "|Random Forest training done. | Using time: %.0lf secs|\n",difftime(end, beg));
-    return ENSEMBLE_SUCCESS;
-}
-void RandomForestRegressor::predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures){
-    uint i,j;
-    REAL* single_tree_pred=new REAL[nSamples];
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-#endif
-    for (i=0; i<nSamples; i++) {
-        pred[i]=0;
-    }
-    for (i=0; i<n_trees; i++) {
-#ifdef DEBUG
-        assert(tree[i]!=NULL);
-#endif
-        tree[i]->predict(X, single_tree_pred, nSamples, nFeatures);
-        for (j=0; j<nSamples; j++) {
-            pred[j]+=single_tree_pred[j];
-        }
-    }
-    
-    //average prediction
-    for (i=0; i<nSamples; i++) {
-        pred[i]/=n_trees;
-    }
-    delete [] single_tree_pred;
-}
-
-
-void RFG_build_trees_range(REAL** X,REAL* y,uint n_samples,BaseForest* forest,uint n_trees_beg,uint n_trees_end,uint* oob_count,REAL* oob_prediction,REAL* importances){
-    Tree** tree=forest->tree;
-    uint n_features=forest->n_features;
-    uint max_features=forest->max_features;
-    uint max_depth=forest->max_depth;
-    uint min_sample_leaf=forest->min_sample_leaf;
-    uint find_split_algorithm=forest->find_split_algorithm;
-    uint random_seed=forest->random_seed;
-    bool oob=forest->oob;
-    bool compute_importance=forest->compute_importance;
-    bool bootstrap=forest->bootstrap;
-    
-    uint i,j;
-    REAL *oob_prediction_tmp,*importance_tmp;
-    REAL** sub_X;
-    REAL* sub_y;
-    bool* mask;//for oob prediction
-    if (oob) {
-        oob_prediction_tmp=new REAL[n_samples];
-        for (i=0; i<n_samples; i++) {
-            oob_prediction[i]=0;
-            oob_count[i]=0;
-        }
-        mask=new bool[n_samples];
-    }
-    if (compute_importance) {
-        importance_tmp=new REAL[n_features];
-        for (j=0; j<n_features; j++) {
-            importances[j]=0;
-        }
-    }
-    if (bootstrap) {
-        sub_X=new REAL* [n_samples];
-        sub_y=new REAL[n_samples];
-    }
-    for (i=n_trees_beg; i<n_trees_end; i++) {
-        if (oob) {
-            for (j=0; j<n_samples; j++) {
-                mask[j]=false;
-            }
-        }
-        if (bootstrap) {
-            for (j=0; j<n_samples; j++) {
-                //                uint idx=rand()%n_samples;
-                uint idx=randomMT()%n_samples;
-                sub_X[j]=X[idx];
-                sub_y[j]=y[idx];
-                if (oob) {
-                    mask[idx]=true;
-                }
-            }
-        }else {
-            sub_X=X;
-            sub_y=y;
-        }
-        if (1) {
-            fprintf(stderr, "Random forest: building tree %d\n",i);
-        }
-        //build tree
-        tree[i]=new TreeRegressor(n_features,\
-                                  max_features,\
-                                  min_sample_leaf,\
-                                  max_depth,\
-                                  find_split_algorithm,\
-                                  random_seed,\
-                                  1);
-        tree[i]->build(sub_X, sub_y, n_samples);
-//        tree[i]->resize();
-        if (oob) {
-            tree[i]->predict(X, oob_prediction_tmp, mask, n_samples, n_features);
-            for (j=0; j<n_samples; j++) {
-                if (mask[j]) {
-                    continue;
-                }
-                oob_count[j]++;
-                oob_prediction[j]+=oob_prediction_tmp[j];
-            }
-        }
-        if (compute_importance) {
-            tree[i]->compute_importance(importance_tmp);
-            for (j=0; j<n_features; j++) {
-                importances[j]+=importance_tmp[j];
-            }
-            
-        }
-    }
-    if (oob) {
-        delete []mask;
-        delete []oob_prediction_tmp;
-    }
-    if (compute_importance) {
-        delete []importance_tmp;
-    }
-    if (bootstrap) {
-        delete []sub_X;
-        delete []sub_y;
-    }
-}
-
-int RandomForestRegressor::save_model(const char *filename){
-    FILE* fp=fopen(filename,"w");
-    if (!fp) {
-        fprintf(stderr, "Error: Cannot open file %s for save RandomForestRegressor model.Please check your file path is correct.\n",filename);
-        return false;
-    }
-    fprintf(fp, "RandomForestRegressor\n");
-    BaseForest::save_model(fp);
-    fclose(fp);
-    return true;
-}
-#endif
diff --git a/include/tensemble/RandomGenerator.h b/include/tensemble/RandomGenerator.h
deleted file mode 100644
index 824cc20..0000000
--- a/include/tensemble/RandomGenerator.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* * * * *
- *  RandomGenerator.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_RandomGenerator_h
-#define libTM_RandomGenerator_h
-//#include    "cokus.h"
-#define MAX_UINT_COKUS 4294967295
-#define uint32 unsigned long
-#define SMALL_INT char
-
-#ifdef MATLAB
-#define SMALL_INT_CLASS mxCHAR_CLASS //will be used to allocate memory t
-#endif
-
-
-//this two function are implemented in cokus.cpp
-void seedMT(uint32 seed);
-uint32 randomMT(void);
-
-REAL unif_rand(){
-    return (((REAL)randomMT())/((REAL)MAX_UINT_COKUS));
-}
-
-#endif
diff --git a/include/tensemble/ReadData.h b/include/tensemble/ReadData.h
deleted file mode 100644
index 328054f..0000000
--- a/include/tensemble/ReadData.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/* * * * *
- *  ReadData.h 
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_ReadData_h
-#define libTM_ReadData_h
-#include "FeatureData.h"
-#include "fstream"
-#include <cstdlib>
-using namespace std;
-#define PRINT_DEBUG
-
-struct Data {
-public:
-    Data(){
-        X=NULL;
-        y=NULL;
-        n_samples=n_features=-1;
-    }
-    ~Data(){
-        if (X) {
-            for (int i=0; i<n_samples; i++) {
-                delete []X[i];
-            }
-            delete []X;
-            X=NULL;
-        }
-        if (y) {
-            delete []y;
-            y=NULL;
-        }
-    }
-    void set_size(int n_samples,int n_features){
-        this->n_samples=n_samples;
-        this->n_features=n_features;
-        X=new REAL*[n_samples];
-        y=new REAL[n_samples];
-        for (int i=0; i<n_samples; i++) {
-            X[i]=new REAL[n_features];
-        }
-    }
-public:
-    REAL** X;
-    REAL* y;
-    int n_samples;
-    int n_features;
-};
-
-bool readData(Data& data, const char* x_file,const char* y_file=NULL){
-    char* endptr,*idx,*val;
-    ifstream x_input(x_file);
-    ifstream y_input;
-    if (y_file) {
-        y_input.open(y_file);
-    }
-    int x_linenum,y_linenum;
-    x_linenum=y_linenum=0;
-    int n_samples=0;
-    string strline;
-	if (x_input.fail()) {
-	    fprintf(stderr, "Error: unable to open data point file \"%s\".\n", x_file);
-	    return false;
-    }
-    if (y_file && y_input.fail()) {
-        fprintf(stderr, "Error: unable to open target file \"%s\".\n",y_file);
-        return false;
-    }
-    //check file correctness
-    getline(x_input,strline);
-    while (!x_input.eof()) {
-        x_linenum++;
-        getline(x_input,strline);
-    }
-    if (y_file) {
-        getline(y_input,strline);
-        while (!y_input.eof()) {
-            y_linenum++;
-            getline(y_input,strline);
-        }
-        if (x_linenum!=y_linenum+1) {
-            fprintf(stderr, "Error: number of samples in data point file(%d samples) and target file(%d samples) are not consistent.Please check your input file.\n",x_linenum-1,y_linenum );
-            return false;
-        }
-        y_input.clear();
-        y_input.seekg(0,ios_base::beg);
-    }
-    //reset
-    x_input.clear();
-    x_input.seekg(0,ios_base::beg);
-    
-    n_samples=x_linenum-1;
-    getline(x_input, strline);
-    int n_features=(int)strtol(strline.c_str(), &endptr, 10);
-    if (endptr == strline.c_str() || (*endptr != '\0' && !isspace(*endptr))){
-        fprintf(stderr, "Error: wrong input format at line %d in file %s\n",1,x_file);
-        return false;
-    }
-    if (n_features<=0) {
-        fprintf(stderr, "Error: number of features is 0 in training file.Please check your input file.\n");
-        return false;
-    }
-    //read data
-    data.set_size(n_samples, n_features);
-    for (uint i=0; i<n_samples; i++) {
-        if (y_file) {
-            getline(y_input,strline);
-            data.y[i]=strtod(strline.c_str(), &endptr);        
-            if (endptr == strline.c_str() || (*endptr != '\0' && !isspace(*endptr))){
-                fprintf(stderr, "Error: wrong input format at line %d in file %s\n",i+1,y_file);
-                return false;
-            }
-        }
-        
-        getline(x_input, strline);
-        char* line = strdup(strline.c_str()); 
-//        cout<<line<<endl;
-        idx = NULL;
-        idx=strtok(line, ":");
-        if (idx==NULL) {
-            fprintf(stderr, "Error: Empty line at line %d in file %s\n",i+2,x_file);
-            free(line);
-            return false;
-        }
-        while (idx!=NULL) { // tok is feature:value
-            val=strtok(NULL, " \t");
-            if (val==NULL) {
-                break;
-            }
-            int index=(int)strtol(idx, &endptr, 10);
-            // check for errors
-            if (endptr == idx || (*endptr != '\0' && !isspace(*endptr))){
-                fprintf(stderr, "Error: wrong input format at line %d in file %s\n",i+2,x_file);
-                free(line);
-                return false;
-            }
-            if (index==0 || index>n_features) {
-                fprintf(stderr, "Error: wrong input format at line %d in file %s.index must from 1 to n_features(number of features).Please check your input file.\n",i+2,x_file);
-                free(line);
-                return false;
-            }
-            REAL value=strtod(val, &endptr);
-            if (endptr == val || (*endptr != '\0' && !isspace(*endptr))){
-                fprintf(stderr, "Error: wrong input format at line %d in file %s\n",i+2,x_file);
-                free(line);
-                return false;
-            }
-            data.X[i][index-1]=value;
-            idx=strtok(NULL, ":");
-        }
-        free(line);
-    }
-    return true;
-}
-
-#endif
diff --git a/include/tensemble/Tree.h b/include/tensemble/Tree.h
deleted file mode 100644
index 20e1911..0000000
--- a/include/tensemble/Tree.h
+++ /dev/null
@@ -1,731 +0,0 @@
-/* * * * *
- *  Tree.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_MultiThread_Tree_h
-#define libTM_MultiThread_Tree_h
-
-#include <iostream>
-#include <string>
-#include "TreeNode.h"
-#include "FeatureData.h"
-#include "Criterion.h"
-#include "ClassificationCriterion.h"
-#include <cmath>
-#include "RandomGenerator.h"
-#include "assert.h"
-
-#include <boost/bind.hpp>
-#include <boost/thread/thread.hpp>
-#define TREE_MAX_DEPTH  30
-//using namespace boost;
-
-enum SplitAlgorithm {
-    FIND_BEST = 0,
-    FIND_RANDOM = 1
-};
-
-struct MyPair {
-    uint index;
-    REAL value;
-    bool friend operator < (const MyPair& a,const MyPair& b){
-        return a.value<b.value;
-    }
-};
-
-//split_args for parallel find best split,because boost:bind can handle 8 parameter at most
-struct split_args {
-    uint* sample_ind;
-    uint s_ind_beg;
-    uint s_ind_end;
-    uint f_beg;
-    uint f_end;
-    uint nSamples;
-    uint min_samples_leaf;
-};
-//for parallel find_best_split
-void find_best_split_range(bool *skip,Criterion *criterion,REAL** X,REAL* y,split_args args,uint& feature_split,REAL& value_split,REAL& min_error);
-
-class Tree {
-    /*base decision tree*/
-
-public:
-    Criterion **p_criterion;
-    int criterion_name;
-    uint min_sample_leaf;
-    uint n_features;
-    uint n_classes;//n_classes=1 for regression
-    uint max_features;
-    uint max_depth;
-    uint find_split_algorithm;
-    uint random_seed;
-
-    TreeNode **nodes;
-    sint *nodes_deep;
-    uint numNodes;
-    uint max_nNodes;
-
-    uint n_threads;
-
-public:
-    Tree(int criterion,uint n_classes,uint n_feature,uint max_features,uint min_sample_leaf,uint max_depth,uint find_split_algorithm=FIND_BEST,\
-         uint random_seed=0,uint n_thread=1);
-
-    virtual ~Tree();
-
-    int build(REAL** X,REAL* y,uint nSamples);
-
-    bool find_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error);
-
-    bool find_best_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error);
-
-    bool find_random_split(uint node_id,uint& feature_split,REAL& value_split,REAL& min_error);
-
-    virtual void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures);
-
-    //predict with mask
-    virtual void predict(REAL** X,REAL* pred,bool* mask,uint nSamples,uint nFeatures);
-
-    void predict_terminal_region(REAL** X,uint* region,uint nSamples,uint nFeatures);
-
-    virtual void compute_importance(REAL* importance){};
-
-    //for parallel find best split
-    bool find_best_split_parallel(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error);
-
-    void resize();
-
-protected:
-    void init_nodes(uint nSamples);
-
-    void init_criterion();
-
-    uint updateNodeSampleMap(uint node_id,uint l_node_id,uint r_node_id,\
-                             uint* sample_ind,uint* sample_ind_swap,REAL** X,\
-                             uint s_ind_beg,uint s_ind_end,uint feature_split,\
-                             REAL value_split);
-};
-
-Tree::Tree(int criterion,uint n_classes,uint n_features,uint max_features,uint min_sample_leaf,uint max_depth,uint find_split_algorithm,uint random_seed,uint n_threads){
-#ifdef DEBUG
-    assert(criterion==CRITERION_MSE || criterion==CRITERION_GINI || criterion==CRITERION_ENTROPY);
-    assert(find_split_algorithm==FIND_RANDOM || find_split_algorithm==FIND_BEST);
-    assert(max_features<=n_features);
-    assert(n_threads>=1);
-    if (criterion==CRITERION_MSE) {
-        assert(n_classes==1);
-    }
-#endif
-    if (max_features>n_features || max_features<=0) {
-        max_features=n_features;
-    }
-    int max_thread=boost::thread::hardware_concurrency();
-    if (n_threads>max_thread) {
-        n_threads=max_thread;
-    }
-    max_features=MAX(1,max_features);
-
-    //reset number of threads if max_features is too small
-    if (n_threads>1 && max_features/n_threads<THREAD_MIN_FEATURES) {
-//        fprintf(stderr, "WARNNING:Number of thread is %d,but max_feature=%d.Each thread at least need %d features to find best split for internal nodes in decision tree.Now set number of thread = %d\n",n_threads,max_features,THREAD_MIN_FEATURES,MAX(1,(uint)max_features/THREAD_MIN_FEATURES));
-        n_threads=MAX(1,(uint)max_features/THREAD_MIN_FEATURES);
-    }
-    this->n_threads=n_threads;
-    this->criterion_name=criterion;
-    switch (find_split_algorithm) {
-        case FIND_BEST:
-            this->find_split_algorithm=FIND_BEST;
-            break;
-        case FIND_RANDOM:
-            this->find_split_algorithm=FIND_RANDOM;
-            break;
-        default:
-            break;
-    }
-    this->n_features=n_features;
-    this->n_classes=n_classes;
-    this->max_depth=MIN(TREE_MAX_DEPTH, max_depth);
-    this->max_features=max_features;
-    this->min_sample_leaf=min_sample_leaf;
-    this->random_seed=random_seed;
-    this->numNodes=0;
-    this->nodes=NULL;
-    this->p_criterion=NULL;
-    this->nodes_deep=NULL;
-}
-Tree::~Tree(){
-    if (nodes) {
-        for (uint i=0; i<max_nNodes; i++) {
-            delete nodes[i];
-            nodes[i]=NULL;
-        }
-        delete []nodes;
-        nodes=NULL;
-    }
-    if (p_criterion){
-        for (uint i=0; i<n_threads; i++) {
-            delete p_criterion[i];
-            p_criterion[i]=NULL;
-        }
-        delete []p_criterion;
-        p_criterion=NULL;
-    }
-    if (nodes_deep) {
-        delete []nodes_deep;
-        nodes_deep=NULL;
-    }
-}
-void Tree::resize(){
-    if ((REAL)max_nNodes/numNodes<1.3) {
-//        cout<<max_nNodes<<endl;
-//        cout<<(REAL)max_nNodes/numNodes<<endl;
-//        cout<<numNodes<<endl;
-        return;
-    }
-    if (nodes) {
-        for (uint i=numNodes+1; i<max_nNodes; i++) {
-            delete nodes[i];
-            nodes[i]=NULL;
-        }
-        max_nNodes=numNodes+1;
-    }
-}
-void Tree::init_criterion(){
-    this->p_criterion=new Criterion*[n_threads];
-    switch (criterion_name) {
-        case CRITERION_MSE:
-            for (uint i=0; i<n_threads; i++) {
-                p_criterion[i]=new MSE;
-            }
-            break;
-        case CRITERION_GINI:
-            for (uint i=0; i<n_threads; i++) {
-                p_criterion[i]=new Gini(n_classes);
-            }
-            break;
-        case CRITERION_ENTROPY:
-            for (uint i=0; i<n_threads; i++) {
-                p_criterion[i]=new Entropy(n_classes);
-            }
-            break;
-        default:
-            break;
-    }
-}
-void Tree::init_nodes(uint nSamples){
-    numNodes=1;
-    /*===============max_nNodes must choose carefully==========================
-     * too small will get poor result,too big will waste memory and may slow down the speed
-     */
-    max_nNodes=2*(uint)((REAL)floor((REAL)(nSamples/(1>((int)min_sample_leaf-4)?1:(min_sample_leaf-4))))+1);
-    if (max_nNodes>pow((REAL)2.0,(REAL)max_depth)) {
-        max_nNodes=pow((REAL)2.0, (REAL)max_depth)+1;
-    }
-    nodes=new TreeNode*[max_nNodes];
-    nodes_deep=new sint[max_nNodes];
-    for (uint i=0; i<max_nNodes; i++) {
-        nodes[i]=new TreeNode;
-    }
-    nodes_deep[ROOT]=1;
-}
-
-uint Tree::updateNodeSampleMap(uint node_id,uint l_node_id,uint r_node_id,\
-                               uint* sample_ind,uint* sample_ind_swap,REAL** X,\
-                               uint s_ind_beg,uint s_ind_end,uint feature_split,\
-                               REAL value_split){
-    /* Map sample index in parent node to left child and right child
-     * return the number of sample in left child.
-     */
-
-#ifdef DEBUG
-    assert(s_ind_beg>=0 && s_ind_beg<s_ind_end);
-    assert(s_ind_end>0);
-#endif
-    uint i,count=s_ind_beg;
-    if (l_node_id>=max_nNodes || r_node_id>=max_nNodes) {
-        return -1;
-    }
-    nodes[l_node_id]->nSamples=0;
-    nodes[r_node_id]->nSamples=0;
-    for (i=s_ind_beg; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        if (X[idx][feature_split]<=value_split) {
-            nodes[l_node_id]->nSamples++;
-            sample_ind_swap[count++]=idx;
-        }
-    }
-    for (i=s_ind_beg; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        if (X[idx][feature_split]>value_split) {
-            nodes[r_node_id]->nSamples++;
-            sample_ind_swap[count++]=idx;
-        }
-    }
-#ifdef DEBUG
-    assert(count==s_ind_end);
-#endif
-
-    for (i=s_ind_beg; i<s_ind_end; i++) {
-        sample_ind[i]=sample_ind_swap[i];
-    }
-    return nodes[l_node_id]->nSamples;
-}
-
-int Tree::build(REAL** X,REAL* y,uint nSamples){
-    /* build decision tree
-     * X,y:the original data
-     * nSamples: number of sample in X
-     */
-#ifdef DEBUG
-    if (criterion_name==CRITERION_MSE) {
-        assert(n_classes==1);
-    }else{
-        assert(n_classes>=2);
-    }
-#endif
-    uint i,l_node_id,r_node_id,work_node_id,feature_split,sample_ind_beg,sample_ind_end;
-    REAL value_split,ini_error,min_error;
-    uint *node_beg,*node_end,*sample_ind,*sample_ind_swap;
-
-    //initialize criterion and nodes
-    init_criterion();
-    init_nodes(nSamples);
-    nodes[ROOT]->nSamples=nSamples;
-    node_beg=new uint[max_nNodes];
-    node_end=new uint[max_nNodes];
-    sample_ind=new uint[nSamples];
-    sample_ind_swap=new uint[nSamples];
-    node_beg[ROOT]=0;
-    node_end[ROOT]=nSamples;
-    for (i=0; i<nSamples; i++) {
-        sample_ind[i]=i;
-    }
-    //main
-    for (work_node_id=ROOT; work_node_id<max_nNodes; work_node_id++) {
-        if (work_node_id>numNodes) {
-            break;
-        }
-        sample_ind_beg=node_beg[work_node_id];
-        sample_ind_end=node_end[work_node_id];
-        //initialize threads criterion,let every thread has a criterion copy
-        for (i=0; i<n_threads; i++) {
-            p_criterion[i]->init(y, sample_ind,sample_ind_beg,sample_ind_end, nodes[work_node_id]->nSamples);
-        }
-        ini_error=p_criterion[0]->eval();
-        nodes[work_node_id]->ini_error=ini_error;
-        //terminal node if the number of sample in a node is less than min_sample_leaf
-        if (nodes[work_node_id]->nSamples<2*min_sample_leaf || nodes_deep[work_node_id]==max_depth || numNodes>=max_nNodes-2) {
-//        if (nodes[work_node_id]->nSamples<=min_sample_leaf || nodes_deep[work_node_id]==max_depth || numNodes>=max_nNodes-2) {
-            nodes[work_node_id]->leaf=true;
-            //allocate memory for prediction
-            nodes[work_node_id]->pred=new REAL[n_classes];
-            p_criterion[0]->estimate(nodes[work_node_id]->pred);
-            continue;
-        }
-        //find split
-        min_error=ini_error;
-        bool flag=find_split(work_node_id, X, y,sample_ind, sample_ind_beg,sample_ind_end, nodes[work_node_id]->nSamples, feature_split, value_split,min_error);
-        //terminal node
-        if (!flag) {
-            nodes[work_node_id]->leaf=true;
-            p_criterion[0]->reset();
-            //allocate memory for prediction
-            nodes[work_node_id]->pred=new REAL[n_classes];
-            p_criterion[0]->estimate(nodes[work_node_id]->pred);
-            continue;
-        }
-        //found split
-        nodes[work_node_id]->best_error=min_error;
-        nodes[work_node_id]->feature_split=feature_split;
-        nodes[work_node_id]->value_split=value_split;
-
-        //set left,right child
-        l_node_id=++numNodes;
-        r_node_id=++numNodes;
-        nodes[work_node_id]->left_child=l_node_id;
-        nodes[work_node_id]->right_child=r_node_id;
-        nodes_deep[l_node_id]=nodes_deep[r_node_id]=nodes_deep[work_node_id]+1;
-
-        //map samples in parent node to left,right child
-        uint mid=updateNodeSampleMap(work_node_id, l_node_id,r_node_id, sample_ind,sample_ind_swap, X, sample_ind_beg, sample_ind_end, feature_split, value_split);
-        node_beg[l_node_id]=sample_ind_beg;
-        node_end[l_node_id]=sample_ind_beg + mid;
-        node_beg[r_node_id]=sample_ind_beg + mid;
-        node_end[r_node_id]=sample_ind_end;
-    }
-    delete []sample_ind_swap;
-    delete []sample_ind;
-    delete []node_beg;
-    delete []node_end;
-    delete []nodes_deep;//no more need nodes_deep
-    nodes_deep=NULL;
-    //no more need criterion.
-    for (uint i=0; i<n_threads; i++) {
-        delete p_criterion[i];
-    }
-    delete []p_criterion;
-    p_criterion=NULL;
-    resize();
-    return ENSEMBLE_SUCCESS;
-}
-
-bool Tree::find_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error){
-    if (find_split_algorithm==FIND_BEST) {
-        if (n_threads>1) {
-            return find_best_split_parallel(node_id, X,y, sample_ind, s_ind_beg,s_ind_end,nSamples, feature_split, value_split,min_error);
-        }
-        return find_best_split(node_id, X,y, sample_ind, s_ind_beg,s_ind_end,nSamples, feature_split, value_split,min_error);
-    }
-    return -1;
-}
-
-bool Tree::find_best_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error){
-    /* SINGLE-THREAD FIND SPLIT
-     * find best split for internal node(node_id)
-     * X,y is the original data
-     * samples in internal node(node_id) is specify by spsample_ind[s_ind_beg:s_ind_end]
-     */
-#ifdef DEBUG
-    assert(s_ind_beg>=0 && s_ind_beg<s_ind_end);
-    assert((s_ind_end-s_ind_beg) ==nSamples);
-#endif
-    REAL ini_error=min_error;
-    uint i,j,node_nSamples,f,count;
-    bool same_data=true;
-    //do not consider split if all data are identical.
-    REAL pre_y=y[sample_ind[s_ind_beg]];
-    for (i=s_ind_beg+1; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        if (y[idx]!=pre_y) {
-            same_data=false;
-            break;
-        }
-    }
-    if (same_data) {
-        return false;
-    }
-
-    Criterion *criterion=p_criterion[0];
-    bool* skip=new bool[n_features];
-    uint* skip_idx=new uint[n_features];
-    node_nSamples=nodes[node_id]->nSamples;
-    MyPair* x_f=new MyPair[nSamples];
-
-    //generate random split feature
-    for (i=0; i<n_features; i++) {
-        skip[i]=true;
-        skip_idx[i]=i;
-    }
-    count=n_features;
-    for (i=0; i<max_features; i++) {
-        j=rand_r(&this->random_seed)%count;
-//        j=randomMT()%count;
-        uint idx=skip_idx[j];
-        skip[idx]=false;
-        std::swap(skip_idx[j], skip_idx[--count]);
-    }
-    //find best split main loop
-    for (f=0; f<n_features; f++) {
-        if (skip[f]) {
-            continue;
-        }
-        criterion->reset();
-        count=0;
-        for (i=s_ind_beg; i<s_ind_end; i++) {
-            uint idx=sample_ind[i];
-            x_f[count].index=idx;
-            x_f[count].value=X[idx][f];
-            count++;
-        }
-#ifdef DEBUG
-        assert(count==nSamples);
-#endif
-        //sort sample with feature f
-        sort(x_f, x_f+nSamples);
-        if (x_f[0].value>=x_f[nSamples-1].value) {
-            continue;
-        }
-        count=0;
-        for (i=s_ind_beg; i<s_ind_end-1; i++) {
-            uint loc=x_f[count].index;
-            uint nLeft=criterion->update_next(y, loc);
-            //do not consider split if feature value are identical.
-            if (x_f[count].value==x_f[count+1].value || nLeft<min_sample_leaf || nSamples-nLeft<min_sample_leaf) {
-//            if (x_f[count].value==x_f[count+1].value){
-                count++;
-                continue;
-            }
-            REAL error=criterion->eval();
-            if (error<min_error) {
-                min_error=error;
-                feature_split=f;
-                value_split=0.5*(x_f[count].value+x_f[count+1].value);
-            }
-            count++;
-        }
-    }
-//    fprintf(stderr, "\t%d,%lf,%lf\n",min_error==ini_error, ini_error, min_error);
-    delete []skip;
-    delete []skip_idx;
-    delete []x_f;
-    return min_error!=ini_error;
-}
-
-bool Tree::find_best_split_parallel(uint node_id, REAL **X, REAL *y, uint *sample_ind, uint s_ind_beg, uint s_ind_end, uint nSamples, uint &feature_split, REAL &value_split, REAL &min_error){
-    /* MULTI-THREAD FIND SPLIT
-     * parallel find best split for internal node(node_id)
-     * X,y is the original data
-     * samples in internal node(node_id) is specify by spsample_ind[s_ind_beg:s_ind_end]
-     */
-    uint *f_split,i,j,count;
-    REAL *v_split,*p_min_error;
-    struct split_args args;
-    f_split=new uint[n_threads];
-    v_split=new REAL[n_threads];
-    p_min_error=new REAL[n_threads];
-    bool* skip=new bool[n_features];
-    uint* skip_idx=new uint[n_features];
-    args.s_ind_beg=s_ind_beg;
-    args.s_ind_end=s_ind_end;
-    args.sample_ind=sample_ind;
-    args.nSamples=nSamples;
-    args.min_samples_leaf=min_sample_leaf;
-    REAL ini_error=min_error;
-
-    //generate random split feature
-    for (i=0; i<n_features; i++) {
-        skip[i]=true;
-        skip_idx[i]=i;
-    }
-    count=n_features;
-    for (i=0; i<max_features; i++) {
-        j=rand_r(&this->random_seed)%count;
-//        j=randomMT()%count;
-        uint idx=skip_idx[j];
-        skip[idx]=false;
-        std::swap(skip_idx[j], skip_idx[--count]);
-    }
-    boost::thread **thread=new boost::thread*[n_threads];
-    for (i=0; i<n_threads; i++) {
-        p_min_error[i]=ini_error;
-        args.f_beg=(n_features/n_threads*i);
-        args.f_end=(n_features/n_threads*(i+1));
-        if (i==n_threads-1) {
-            args.f_end=n_features;
-        }
-        thread[i]=new boost::thread(boost::bind(find_best_split_range,skip, p_criterion[i], X, y, args, boost::ref(f_split[i]), boost::ref(v_split[i]), boost::ref(p_min_error[i])));
-    }
-    for (i=0; i<n_threads; i++) {
-        thread[i]->join();
-        delete thread[i];
-        if (p_min_error[i]<min_error) {
-            min_error=p_min_error[i];
-            feature_split=f_split[i];
-            value_split=v_split[i];
-        }
-    }
-    delete thread;
-    delete []skip;
-    delete []skip_idx;
-    delete []f_split;
-    delete []v_split;
-    delete []p_min_error;
-    return min_error!=ini_error;
-}
-void Tree::predict(REAL **X,REAL* pred,uint nSamples,uint nFeatures){
-    /* Make Prediction
-     * for classification.pred is the probability of each classes.
-     */
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-    if (criterion_name==CRITERION_MSE) {
-        assert(n_classes==1);
-    }else {
-        assert(n_classes>=2);
-    }
-#endif
-    uint node_id,f_s;
-    REAL v_s,normalizer=0.0;
-    for (uint i=0; i<nSamples; i++) {
-        node_id=ROOT;
-        while (!nodes[node_id]->leaf) {
-            f_s=nodes[node_id]->feature_split;
-            v_s=nodes[node_id]->value_split;
-            if (X[i][f_s]<=v_s) {
-                node_id=nodes[node_id]->left_child;
-            }else {
-                node_id=nodes[node_id]->right_child;
-            }
-        }
-        normalizer=0.0;
-        for (uint j=0; j<n_classes; j++) {
-            pred[n_classes*i+j]=nodes[node_id]->pred[j];
-            normalizer+=nodes[node_id]->pred[j];
-        }
-        if (criterion_name!=CRITERION_MSE) {
-            if (normalizer!=0) {
-                for (uint j=0; j<n_classes; j++) {
-                    pred[n_classes*i+j]/=normalizer;
-                }
-            }
-        }
-    }
-}
-void Tree::predict(REAL **X,REAL* pred,bool* mask,uint nSamples,uint nFeatures){
-    /* Make Prediction with mask
-     * for classification.pred is the probability of each classes.
-     */
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-    if (criterion_name==CRITERION_MSE) {
-        assert(n_classes==1);
-    }
-#endif
-    uint node_id,f_s;
-    REAL v_s,normalizer;
-    for (uint i=0; i<nSamples; i++) {
-        if (mask[i]) {
-            continue;
-        }
-        node_id=ROOT;
-        while (!nodes[node_id]->leaf) {
-            f_s=nodes[node_id]->feature_split;
-            v_s=nodes[node_id]->value_split;
-            if (X[i][f_s]<=v_s) {
-                node_id=nodes[node_id]->left_child;
-            }else {
-                node_id=nodes[node_id]->right_child;
-            }
-        }
-        normalizer=0.0;
-        for (uint j=0; j<n_classes; j++) {
-            pred[n_classes*i+j]=nodes[node_id]->pred[j];
-            normalizer+=nodes[node_id]->pred[j];
-        }
-        if (criterion_name!=CRITERION_MSE) {
-            if (normalizer!=0) {
-                for (uint j=0; j<n_classes; j++) {
-                    pred[n_classes*i+j]/=normalizer;
-                }
-            }
-        }
-
-    }
-}
-
-void Tree::predict_terminal_region(REAL **X, uint* region,uint nSamples, uint nFeatures){
-    /* GET TERMINAL REGION FOR EACH SAMPLE */
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-#endif
-    uint node_id,f_s;
-    REAL v_s;
-    for (uint i=0; i<nSamples; i++) {
-        node_id=ROOT;
-        while (!nodes[node_id]->leaf) {
-            f_s=nodes[node_id]->feature_split;
-            v_s=nodes[node_id]->value_split;
-            if (X[i][f_s]<=v_s) {
-                node_id=nodes[node_id]->left_child;
-            }else {
-                node_id=nodes[node_id]->right_child;
-            }
-        }
-        region[i]=node_id;
-    }
-}
-
-void find_best_split_range(bool *skip,Criterion *criterion,REAL** X,REAL* y,split_args args,uint& feature_split,REAL& value_split,REAL& min_error){
-    /* FOR MULTI-THREAD FIND SPLIT
-     * find best split for internal node(node_id),feature range in [f_beg,f_end]
-     * X,y is the original data
-     * samples in internal node(node_id) is specify by spsample_ind[s_ind_beg:s_ind_end]
-     * [f_beg,f_end] /in [0,n_features] is the feature range for each thread.
-     *
-     * notice：
-     * criterion must be initialied before calling find_best_split_range
-     * one criterion for one thread
-     */
-    uint s_ind_beg,s_ind_end,nSamples,f_beg,f_end,*sample_ind,min_samples_leaf;
-    min_samples_leaf=args.min_samples_leaf;
-    s_ind_beg=args.s_ind_beg;
-    s_ind_end=args.s_ind_end;
-    nSamples=args.nSamples;
-    f_beg=args.f_beg;
-    f_end=args.f_end;
-    sample_ind=args.sample_ind;
-    REAL ini_error=min_error;
-#ifdef DEBUG
-    assert(s_ind_beg>=0 && s_ind_beg<s_ind_end);
-    assert((s_ind_end-s_ind_beg) ==nSamples);
-#endif
-    uint i,f,count;
-    bool same_data=true;
-
-    //do not consider split if all data are identical.
-    REAL pre_y=y[sample_ind[s_ind_beg]];
-    for (i=s_ind_beg+1; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        if (y[idx]!=pre_y) {
-            same_data=false;
-            break;
-        }
-    }
-    if (same_data) {
-        return;
-    }
-
-    MyPair* x_f=new MyPair[nSamples];
-    criterion->init(y, sample_ind,s_ind_beg,s_ind_end, nSamples);
-    //find best split main loop
-    for (f=f_beg; f<f_end; f++) {
-        if (skip[f]) {
-            continue;
-        }
-        criterion->reset();
-        count=0;
-        for (i=s_ind_beg; i<s_ind_end; i++) {
-            uint idx=sample_ind[i];
-            x_f[count].index=idx;
-            x_f[count].value=X[idx][f];
-            count++;
-        }
-#ifdef DEBUG
-        assert(count==nSamples);
-#endif
-        //sort sample with feature f
-        sort(x_f, x_f+nSamples);
-        if (x_f[0].value>=x_f[nSamples-1].value) {
-            continue;
-        }
-        count=0;
-        for (i=s_ind_beg; i<s_ind_end-1; i++) {
-            uint loc=x_f[count].index;
-            uint nLeft=criterion->update_next(y, loc);
-            //do not consider split if feature value are identical.
-            if (x_f[count].value==x_f[count+1].value || nLeft<min_samples_leaf || nSamples-nLeft<min_samples_leaf) {
-//            if (x_f[count].value==x_f[count+1].value){
-                count++;
-                continue;
-            }
-            REAL error=criterion->eval();
-            if (error<min_error) {
-                min_error=error;
-                feature_split=f;
-                value_split=0.5*(x_f[count].value+x_f[count+1].value);
-            }
-            count++;
-        }
-    }
-    delete []x_f;
-
-}
-#endif
diff --git a/include/tensemble/Tree.h.backup.h b/include/tensemble/Tree.h.backup.h
deleted file mode 100644
index bcfa0e6..0000000
--- a/include/tensemble/Tree.h.backup.h
+++ /dev/null
@@ -1,391 +0,0 @@
-//
-//  Tree.h
-//  libTM
-//
-//  Created by apple on 12-10-29.
-//  Copyright (c) 2012年 Rongkai Xia. All rights reserved.
-//
-
-#ifndef libTM_Tree_h
-#define libTM_Tree_h
-#include <iostream>
-#include <string>
-#include "TreeNode.h"
-#include "FeatureData.h"
-#include "Criterion.h"
-#include <math.h>
-//#include "cokus.cpp"
-#include "RandomGenerator.h"
-#include "assert.h"
-#define LEFT(A) ((A)<<1)
-#define RIGHT(A) (((A)<<1)+1)
-#define MAX_DEPTH   100
-#define ROOT    1
-
-enum SplitAlgorithm {
-    FIND_BEST = 0,
-    FIND_RANDOM = 1
-    };
-
-struct MyPair {
-    uint index;
-    REAL value;
-    bool friend operator < (const MyPair& a,const MyPair& b){
-        return a.value<b.value;
-    }
-};
-
-class Tree {
-    /*decision tree*/
-    
-public:
-    Criterion *criterion;
-    uint min_sample_leaf;
-    uint n_features;
-    uint n_classes;//n_classes=1 for regression
-    uint max_features;
-    uint max_depth;
-    uint find_split_algorithm;
-    uint random_seed;
-    
-    TreeNode **nodes;
-    sint *nodes_deep;
-    uint numNodes;
-    uint max_nNodes;
-
-public:
-    Tree(int criterion,uint n_classes,uint n_feature,uint max_features,uint min_sample_leaf,\
-         uint max_depth,uint find_split_algorithm,\
-         uint random_seed=0);
-    
-    virtual ~Tree();
-    
-    int build(REAL** X,REAL* y,uint nSamples);
-    
-    bool find_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error);
-    
-    bool find_best_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error);
-    
-    bool find_random_split(uint node_id,uint& feature_split,REAL& value_split,REAL& min_error);
-    
-    void predict(REAL** X,REAL* pred,uint nSamples,uint nFeatures);
-    
-    virtual void compute_importance(REAL* importance)=0;
-protected:
-    void init_nodes(uint nSamples);
-    
-    uint updateNodeSampleMap(uint node_id,uint l_node_id,uint r_node_id,\
-                             uint* sample_ind,uint* sample_ind_swap,REAL** X,\
-                             uint s_ind_beg,uint s_ind_end,uint feature_split,\
-                             REAL value_split);
-};
-Tree::Tree(int criterion,uint n_classes,uint n_features,uint max_features,uint min_sample_leaf,\
-               uint max_depth,uint find_split_algorithm,\
-           uint random_seed){
-#ifdef DEBUG
-    assert(max_features<=n_features);
-#endif
-    max_features=MAX(1,max_features);
-    if (criterion==CRITERION_MSE) {
-        this->criterion=new MSE();
-    }else{
-        fprintf(stderr, "invalid criterion: %d\n",criterion);
-    }
-    switch (find_split_algorithm) {
-        case FIND_BEST:
-            this->find_split_algorithm=FIND_BEST;
-            break;
-        case FIND_RANDOM:
-            this->find_split_algorithm=FIND_RANDOM;
-            break;
-        default:
-            break;
-    }
-    this->n_features=n_features;
-    this->n_classes=n_classes;
-    this->max_depth=max_depth;
-    this->max_features=max_features;
-    this->min_sample_leaf=min_sample_leaf;
-    this->random_seed=random_seed;
-    this->numNodes=0;
-}
-Tree::~Tree(){
-    for (uint i=0; i<max_nNodes; i++) {
-        delete nodes[i];
-    }
-    delete []nodes;
-}
-
-void Tree::init_nodes(uint nSamples){
-    numNodes=1;//ROOT node
-    max_nNodes=2*(uint)((REAL)floor((REAL)(nSamples/(1>((int)min_sample_leaf-5)?1:(min_sample_leaf-5))))+1);
-    nodes=new TreeNode*[max_nNodes];
-    nodes_deep=new sint[max_nNodes];
-    for (uint i=0; i<max_nNodes; i++) {
-        nodes[i]=new TreeNode;
-    }
-    nodes_deep[ROOT]=1;
-}
-
-uint Tree::updateNodeSampleMap(uint node_id,uint l_node_id,uint r_node_id,\
-                               uint* sample_ind,uint* sample_ind_swap,REAL** X,\
-                               uint s_ind_beg,uint s_ind_end,uint feature_split,\
-                               REAL value_split){
-    /* Map sample index in parent node to left child and right child
-     * return the number of sample in left child.
-     */
-    
-#ifdef DEBUG
-    assert(s_ind_beg>=0 && s_ind_beg<s_ind_end);
-    assert(s_ind_end>0);
-#endif
-    uint i,count=s_ind_beg;
-    if (l_node_id>=max_nNodes || r_node_id>=max_nNodes) {
-        return -1;
-    }
-    nodes[l_node_id]->nSamples=0;
-    nodes[r_node_id]->nSamples=0;
-    for (i=s_ind_beg; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        if (X[idx][feature_split]<=value_split) {
-            nodes[l_node_id]->nSamples++;
-            sample_ind_swap[count++]=idx;
-        }
-    }
-    for (i=s_ind_beg; i<s_ind_end; i++) {
-        uint idx=sample_ind[i];
-        if (X[idx][feature_split]>value_split) {
-            nodes[r_node_id]->nSamples++;
-            sample_ind_swap[count++]=idx;
-        }
-    }
-#ifdef DEBUG
-    assert(count==s_ind_end);
-#endif
-    
-    for (i=s_ind_beg; i<s_ind_end; i++) {
-        sample_ind[i]=sample_ind_swap[i];
-    }
-    return nodes[l_node_id]->nSamples;
-}
-
-int Tree::build(REAL** X,REAL* y,uint nSamples){
-    //build decision tree
-    
-    uint i,l_node_id,r_node_id,work_node_id,feature_split,sample_ind_beg,sample_ind_end;
-    REAL value_split,ini_error,min_error;
-    uint *node_beg,*node_end,*sample_ind,*sample_ind_swap;
-    
-    //initialize nodes
-    init_nodes(nSamples);
-    nodes[ROOT]->nSamples=nSamples;
-    node_beg=new uint[max_nNodes];
-    node_end=new uint[max_nNodes];
-    sample_ind=new uint[nSamples];
-    sample_ind_swap=new uint[nSamples];
-    node_beg[ROOT]=0;
-    node_end[ROOT]=nSamples;
-    for (i=0; i<nSamples; i++) {
-        sample_ind[i]=i;
-    }
-    
-//    cout<<"build tree: max_nNodes="<<max_nNodes<<endl;
-    for (work_node_id=ROOT; work_node_id<max_nNodes; work_node_id++) {
-        if (work_node_id>numNodes) {
-            break;
-        }
-        sample_ind_beg=node_beg[work_node_id];
-        sample_ind_end=node_end[work_node_id];
-//        cout<<"work node="<<work_node_id<<endl;
-        //initialize criterion
-        criterion->init(y, sample_ind,sample_ind_beg,sample_ind_end, nodes[work_node_id]->nSamples);
-        ini_error=criterion->eval();
-        nodes[work_node_id]->ini_error=ini_error;
-        //terminal node if the number of sample in a node is less than min_sample_leaf
-        if (nodes[work_node_id]->nSamples<=min_sample_leaf || nodes_deep[work_node_id]==max_depth \
-            || numNodes>=max_nNodes-2) {
-            nodes[work_node_id]->leaf=true;
-            //allocate memory for prediction
-            nodes[work_node_id]->pred=new REAL[n_classes];
-            criterion->estimate(nodes[work_node_id]->pred);
-            cout<<"leaf node:"<<work_node_id<<" pred="<<nodes[work_node_id]->pred[0]<< endl;
-            continue;
-        }
-        //find split
-        bool flag=find_split(work_node_id, X, y,sample_ind, sample_ind_beg,sample_ind_end, nodes[work_node_id]->nSamples, feature_split, value_split,min_error);
-//            cout<<"split flag min_error "<<min_error<<endl;
-        //terminal node
-        if (!flag) {
-            nodes[work_node_id]->leaf=true;
-//                cout<<"leaf node:"<<work_node_id<<endl;
-//                criterion->init(y, mask, nodes[work_node_id]->nSamples, total_nSamples);
-            criterion->reset();
-            //allocate memory for prediction
-            nodes[work_node_id]->pred=new REAL[n_classes];
-            criterion->estimate(nodes[work_node_id]->pred);
-            cout<<"leaf node:"<<work_node_id<<" pred="<<nodes[work_node_id]->pred[0]<< endl;
-            continue;
-        }
-        //found split
-        nodes[work_node_id]->best_error=min_error;
-        nodes[work_node_id]->feature_split=feature_split;
-        nodes[work_node_id]->value_split=value_split;
-        
-        //set left,right child
-        l_node_id=++numNodes;
-        r_node_id=++numNodes;
-        nodes[work_node_id]->left_child=l_node_id;
-        nodes[work_node_id]->right_child=r_node_id;
-        nodes_deep[l_node_id]=nodes_deep[r_node_id]=nodes_deep[work_node_id]+1;
-        
-        fprintf(stderr, "deep=%d,(%d,%f,%f,pred=%f)\n",nodes_deep[work_node_id],feature_split+1,value_split,min_error,0.0);
-        
-        //map samples in parent node to left,right child
-        uint mid=updateNodeSampleMap(work_node_id, l_node_id,r_node_id, sample_ind,sample_ind_swap, X, sample_ind_beg, sample_ind_end, feature_split, value_split);
-        node_beg[l_node_id]=sample_ind_beg;
-        node_end[l_node_id]=sample_ind_beg + mid;
-        node_beg[r_node_id]=sample_ind_beg + mid;
-        node_end[r_node_id]=sample_ind_end;
-        //        cout<<"left_nSamples["<<l_node_id<<"]="<<nodes[l_node_id]->nSamples<<" right_nSamples["<<r_node_id<<"]="<<nodes[r_node_id]->nSamples<<endl;
-//        cout<<"left_beg="<<node_beg[l_node_id]<<"left_end="<<node_end[l_node_id]<<endl;
-//        cout<<"right_beg="<<node_beg[r_node_id]<<"right_end="<<node_end[r_node_id]<<endl;
-#ifdef PRINT_DEBUG
-        fprintf(stderr, "deep=%d,node_id=%d,feature_split=%d,value_split=%f,min error=%f\n",\
-                deep,work_node_id,feature_split,value_split,min_error);
-#endif
-    }
-//    delete []mask;
-    //release NodeSampleMap;
-    delete []sample_ind_swap;
-    delete []sample_ind;
-    delete []node_beg;
-    delete []node_end;
-    delete []nodes_deep;//no more need nodes_deep
-    delete criterion; //no more need Criterion
-    return SUCCESS;
-}
-
-bool Tree::find_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error){
-    if (find_split_algorithm==FIND_BEST) {
-        return find_best_split(node_id, X,y, sample_ind, s_ind_beg,s_ind_end,nSamples, feature_split, value_split,min_error);
-    }
-    return -1;
-}
-
-bool Tree::find_best_split(uint node_id,REAL** X,REAL* y,uint* sample_ind,uint s_ind_beg,uint s_ind_end,uint nSamples,uint& feature_split,REAL& value_split,REAL& min_error){
-    
-#ifdef DEBUG
-    assert(s_ind_beg>=0 && s_ind_beg<s_ind_end);
-    assert((s_ind_end-s_ind_beg) <=nSamples);
-#endif
-    
-    uint i,j,node_nSamples,f,count;
-    bool* skip=new bool[n_features];
-    uint* skip_idx=new uint[n_features];
-    min_error=HUGE_VAL;
-    node_nSamples=nodes[node_id]->nSamples;
-    MyPair* x_f=new MyPair[nSamples];
-    
-    //generate random split feature
-    for (i=0; i<n_features; i++) {
-        skip[i]=true;
-        skip_idx[i]=i;
-    }
-    count=n_features;
-//    cout<<"======================"<<endl;
-    for (i=0; i<max_features; i++) {
-//        j=rand()%count;
-        j=randomMT()%count;
-        uint idx=skip_idx[j];
-        skip[idx]=false;
-        std::swap(skip_idx[j], skip_idx[--count]);
-//        cout<<"skip="<<idx<<endl;
-    }
-//    criterion->init(y, mask, node_nSamples, total_nSamples);
-    //main loop
-//    cout<<"find_best_split"<<endl;
-    for (f=0; f<n_features; f++) {
-//        cout<<"feature="<<f<<endl;
-        if (skip[f]) {
-            continue;
-        }
-        criterion->reset();
-//        criterion->init(y, mask, node_nSamples, total_nSamples);
-        count=0;
-        for (i=s_ind_beg; i<s_ind_end; i++) {
-            uint idx=sample_ind[i];
-            x_f[count].index=idx;
-            x_f[count].value=X[idx][f];
-            count++;
-        }
-#ifdef DEBUG
-        assert(count==nSamples);
-#endif
-        //sort sample with feature f
-        sort(x_f, x_f+nSamples);
-        if (x_f[0].value>=x_f[nSamples-1].value) {
-            continue;
-        }
-        count=0;
-        for (i=s_ind_beg; i<s_ind_end-1; i++) {
-            uint loc=x_f[count].index;
-            uint nLeft=criterion->update_next(y, loc);
-//            cout<<"nLeft="<<nLeft<<endl;
-            //do not consider split if feature value are identical.
-            if (x_f[count].value==x_f[count+1].value) {
-                count++;
-                continue;
-            }
-            REAL error=criterion->eval();
-            cout<<"f="<<f<<' '<<error<<endl;
-            if (error<min_error) {
-                fprintf(stderr, "f=%d,pre_error=%.8f,now_error=%.8f\n",f,min_error,error);
-                min_error=error;
-                feature_split=f;
-                value_split=0.5*(x_f[count].value+x_f[count+1].value);
-            }
-//            cout<<"error="<<error<<endl;
-            count++;
-        }
-//        fprintf(stderr, "node_id=%d,nSamples=%d,f=%d,min_error=%.8lf\n",node_id,node_nSamples,f,min_error);
-    }
-    delete []skip;
-    delete []skip_idx;
-    delete []x_f;
-    return min_error!=HUGE_VAL;
-}
-void Tree::predict(REAL **X,REAL* pred,uint nSamples,uint nFeatures){
-#ifdef DEBUG
-    assert(nFeatures==this->n_features);
-#endif
-    uint node_id,f_s;
-    REAL v_s;
-    for (uint i=0; i<nSamples; i++) {
-        node_id=ROOT;
-//        fprintf(stderr, "sample %d:",i);
-//        for (uint j=0; j<n_features; j++) {
-//            cout<<"X["<<j<<"]="<<X[j][i]<<endl;
-//        }
-        while (!nodes[node_id]->leaf) {
-//            cout<<node_id<<' '<<nodes[node_id]->leaf<<endl;
-//            fprintf(stderr, "(%d,%d,%f)\n",node_id,nodes[node_id]->feature_split,\
-                    nodes[node_id]->value_split);
-            f_s=nodes[node_id]->feature_split;
-            v_s=nodes[node_id]->value_split;
-            if (X[i][f_s]<=v_s) {
-//                cout<<X[f_s][i]<<":"<<v_s<<" left"<<endl;
-//                node_id=LEFT(node_id);
-                node_id=nodes[node_id]->left_child;
-            }else {
-//                cout<<X[f_s][i]<<":"<<v_s<<" right"<<endl;
-//                node_id=RIGHT(node_id);
-                node_id=nodes[node_id]->right_child;
-            }
-        }
-//        cout<<node_id<<" pred="<<nodes[node_id]->pred[0]<<endl;
-        for (uint j=0; j<n_classes; j++) {
-            pred[i]=nodes[node_id]->pred[j];
-        }
-    }
-}
-#endif
diff --git a/include/tensemble/TreeClassifier.h b/include/tensemble/TreeClassifier.h
deleted file mode 100644
index e0f4c3f..0000000
--- a/include/tensemble/TreeClassifier.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* * * * *
- *  TreeClassifier.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-
-#ifndef libTM_TreeClassifier_h
-#define libTM_TreeClassifier_h
-
-#include "Tree.h"
-
-class TreeClassifier:public Tree {
-    /* DecisionTreeClassifier
-     * label must be [0,1,...,n_classes-1] before calling TreeClassifier
-     */
-    
-public:
-    TreeClassifier(int split_criterion,\
-                   uint n_classes,\
-                   uint n_features,\
-                   uint max_features,\
-                   uint min_sample_leaf,\
-                   uint max_depth,\
-                   uint find_split_algorithm=FIND_BEST,\
-                   uint random_seed=0,\
-                   uint n_threads=1);
-    
-    void compute_importance(REAL* importance);
-};
-
-TreeClassifier::TreeClassifier(int split_criterion,\
-                               uint n_classes,\
-                               uint n_features,\
-                               uint max_features,\
-                               uint min_sample_leaf,\
-                               uint max_depth,\
-                               uint find_split_algorithm,\
-                               uint random_seed,\
-                               uint n_threads)\
-:Tree(split_criterion,n_classes,n_features,max_features,min_sample_leaf,max_depth,find_split_algorithm,random_seed,n_threads){
-}
-void TreeClassifier::compute_importance(REAL* importance){
-    //node from 1....numNodes
-    uint f;
-    REAL normalize=0;
-    for (uint i=0; i<n_features; i++) {
-        importance[i]=0.0;
-    }
-    for (uint i=ROOT; i<=numNodes; i++) {
-        // don't calculate in redundant nodes or leaf node
-        if (nodes[i]->leaf || nodes[i]->nSamples<=0) {
-            continue;
-        }
-        f=nodes[i]->feature_split;
-        importance[f]+=\
-        nodes[i]->nSamples*(nodes[i]->ini_error-nodes[i]->best_error);
-    }
-    for (uint i=0; i<n_features; i++) {
-        normalize+=importance[i];
-    }
-    if (normalize!=0) {
-        for (uint i=0; i<n_features; i++) {
-            importance[i]/=normalize;
-        }
-    }
-}
-#endif
diff --git a/include/tensemble/TreeNode.h b/include/tensemble/TreeNode.h
deleted file mode 100644
index a6b5599..0000000
--- a/include/tensemble/TreeNode.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* * * * *
- *  TreeNode.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-
-#ifndef libTM_TreeNode_h
-#define libTM_TreeNode_h
-#include    "TypeDef.h"
-
-class TreeNode {
-public:
-    uint feature_split;
-    REAL value_split;
-    bool leaf;
-    uint nSamples;
-    uint left_child;
-    uint right_child;
-    REAL* pred;//only leaf node need allocate memory
-    
-    //for compute feature importance
-    REAL ini_error;
-    REAL best_error;
-    TreeNode():feature_split(0),value_split(0.0),leaf(false),nSamples(0),left_child(0),right_child(0),pred(NULL),ini_error(0.0),best_error(0.0){
-    }
-    ~TreeNode(){
-        if (pred) {
-            delete pred;
-            pred=NULL;
-        }
-    }
-};
-
-#endif
diff --git a/include/tensemble/TreeRegressor.h b/include/tensemble/TreeRegressor.h
deleted file mode 100644
index 3ff811a..0000000
--- a/include/tensemble/TreeRegressor.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* * * * *
- *  TreeRegressor.h 
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_TreeRegressor_h
-#define libTM_TreeRegressor_h
-#include "Tree.h"
-
-class TreeRegressor:public Tree {
-    //DecisionTreeRegressor
-    
-public:
-    //constructor,regression set n_classes=1; 
-    TreeRegressor(uint n_features,\
-                  uint max_features,\
-                  uint min_sample_leaf,\
-                  uint max_depth,\
-                  uint find_split_algorithm=FIND_BEST,\
-                  uint random_seed=0,\
-                  uint n_threads=1);
-    void compute_importance(REAL* importance);
-};
-
-TreeRegressor::TreeRegressor(uint n_features,\
-                             uint max_features,\
-                             uint min_sample_leaf,\
-                             uint max_depth,\
-                             uint find_split_algorithm,\
-                             uint random_seed,\
-                             uint n_threads)\
-:Tree(CRITERION_MSE,1,n_features,max_features,min_sample_leaf,max_depth,find_split_algorithm,random_seed,n_threads){
-}
-void TreeRegressor::compute_importance(REAL* importance){
-    //node from 1....numNodes
-    uint f;
-    REAL normalize=0;
-    for (uint i=0; i<n_features; i++) {
-        importance[i]=0.0;
-    }
-    for (uint i=ROOT; i<=numNodes; i++) {
-        // don't calculate in redundant nodes or leaf node
-        if (nodes[i]->leaf || nodes[i]->nSamples<=0) {
-            continue;
-        }
-        f=nodes[i]->feature_split;
-        importance[f]+=\
-        (nodes[i]->ini_error-nodes[i]->best_error)*(nodes[i]->ini_error-nodes[i]->best_error);
-    }
-    for (uint i=0; i<n_features; i++) {
-        normalize+=importance[i];
-    }
-    if (normalize!=0) {
-        for (uint i=0; i<n_features; i++) {
-            importance[i]/=normalize;
-        }
-    }
-}
-#endif
diff --git a/include/tensemble/TypeDef.h b/include/tensemble/TypeDef.h
deleted file mode 100644
index 70dca8d..0000000
--- a/include/tensemble/TypeDef.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* * * * *
- *  TypeDef.h 
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_TypeDef_h
-#define libTM_TypeDef_h
-#include <vector>
-#include "cstdio"
-#define ENSEMBLE_SUCCESS 0
-#define ENSEMBLE_FAIL    -1
-
-#define EPS 1e-18
-#define LEFT(A) ((A)<<1)
-#define RIGHT(A) (((A)<<1)+1)
-
-#define MAX_DEPTH   100
-#define ROOT    1
-#define THREAD_MIN_FEATURES 5
-#define MTRY_DEFAULT    0
-
-typedef double REAL;
-typedef unsigned int uint;
-typedef short int sint;
-typedef std::vector<uint> uint_vec;
-
-
-#define MAX(A,B)    (((A)>(B))?(A):(B))
-#define MIN(A,B)    (((A)>(B))?(B):(A))
-
-
-#endif
diff --git a/include/tensemble/cmdline.h b/include/tensemble/cmdline.h
deleted file mode 100644
index 125fddb..0000000
--- a/include/tensemble/cmdline.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* * * * *
- *  cmdline.h
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Copyright (C) 2012, Rongkai Xia, shinekaixia at gmail.com
- *
- * * * * */
-
-#ifndef libTM_cmdline_h
-#define libTM_cmdline_h
-#include <string>
-#include <vector>
-#include <map>
-
-static int param_offset=25;
-static int line_max_char=80;
-class cmdline {
-    
-public:
-    
-    bool parse_cmdline(int argc,const char * argv[]);
-    
-    void print_help();
-    
-    std::string get_value(const std::string& param);
-    
-    bool check_param(const std::string *param,int len);
-    
-    bool has_param(const std::string& param);
-    
-    void register_help(const std::string& param,const std::string& help);
-    
-    void register_help(const std::string *param,const std::string *help,int len);
-    
-    void clear_help();
-protected:
-    bool parse_name(std::string& name);
-    std::map<std::string,std::string> value;
-    
-    std::vector<std::pair<std::string, std::string> > help;
-    
-};
-
-bool cmdline::has_param(const std::string &param){
-    if (value.find(param)!=value.end()) {
-        return true;
-    }
-    return false;
-}
-std::string cmdline::get_value(const std::string &param){
-    if (value.find(param)==value.end()) {
-        return "";
-    }
-    return value[param];
-}
-
-bool cmdline::parse_name(std::string& name){
-    if (name.length()>=1 && name[0]!='-') {
-        return false;
-    }
-    if (name.length()>=2 && name[1]=='-') {
-        name=name.substr(2);
-    }else {
-        name=name.substr(1);
-    }
-    return true;
-}
-
-bool cmdline::parse_cmdline(int argc,const char * argv[]){
-    int i = 1;
-    while (i < argc) {
-        std::string s(argv[i]);
-        if (parse_name(s)) {
-            if (value.find(s) != value.end()) {
-                fprintf(stderr, "Error: The parameter -%s is already specified.\n",s.c_str());
-                return false;
-            }
-            if ((i+1) < argc) {
-                std::string s_next(argv[i+1]);
-                if (! parse_name(s_next)) {
-                    value[s] = s_next;
-                    i++;
-                } else {
-                    value[s] = "";
-                }
-            } else {
-                value[s] = "";
-            }
-        } else {
-            fprintf(stderr, "Error: Unknow parameter %s.\n",s.c_str());
-            print_help();
-            return false;
-        }
-        i++;
-    }
-    return true;
-}
-
-void cmdline::print_help() {
-    std::vector< std::pair<std::string, std::string> >::const_iterator pv;
-    
-    for ( pv = help.begin(); pv != help.end(); ++pv) {
-        std::cout << "-" << pv->first;
-        for (int i=pv->first.size()+1; i < param_offset; i++) { std::cout << " "; } 
-        std::string s_out = pv->second;
-        while (s_out.size() > 0) {
-            if (s_out.size() > (line_max_char-param_offset)) {
-                size_t p = s_out.substr(0, line_max_char-param_offset).find_last_of(" \t");
-                if (p == 0) {
-                    p = line_max_char-param_offset;
-                }
-                std::cout << s_out.substr(0, p) << std::endl;
-                s_out = s_out.substr(p+1, s_out.length()-p);            
-            } else {
-                std::cout << s_out << std::endl;
-                s_out = "";  
-            }
-            if (s_out.size() > 0) {
-                for (int i=0; i < param_offset; i++) { std::cout << " "; }
-            }
-        }
-    }
-}
-
-bool cmdline::check_param(const std::string *param, int len){
-    int i;
-    for (std::map<std::string,std::string>::const_iterator iter=value.begin(); iter!=value.end(); iter++) {
-        for (i=0; i<len; i++) {
-            if (iter->first==param[i]) {
-                break;
-            }
-        }
-        if (i==len) {
-            fprintf(stderr, "Error: Unknow parameter %s.\n",iter->first.c_str());
-//            print_help();
-            return false;
-        }
-    }
-    return true;
-}
-
-void cmdline::register_help(const std::string &param, const std::string &help){
-    this->help.push_back(make_pair(param, help));
-}
-
-void cmdline::register_help(const std::string *param,const std::string *help,int len){
-    clear_help();
-    for (int i=0; i<len; i++) {
-        register_help(param[i], help[i]);
-    }
-}
-
-void cmdline::clear_help(){
-    this->help.clear();
-}
-#endif
diff --git a/include/xxhash.h b/include/xxhash.h
new file mode 100644
index 0000000..3c95781
--- /dev/null
+++ b/include/xxhash.h
@@ -0,0 +1,196 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#pragma once
+#ifndef XXHASH_H
+#define XXHASH_H
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+*  Definitions
+*****************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*****************************
+*  Namespace Emulation
+*****************************/
+/* Motivations :
+
+If you need to include xxHash into your library,
+but wish to avoid xxHash symbols to be present on your library interface
+in an effort to avoid potential name collision if another library also includes xxHash,
+
+you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
+with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
+
+Note that no change is required within the calling program :
+it can still call xxHash functions using their regular name.
+They will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#endif
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    Faster on 64-bits systems. Slower on 32-bits systems.
+*/
+
+
+
+/*****************************
+*  Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif // XXHASH_H
diff --git a/scripts/cpld.bash b/scripts/cpld.bash
deleted file mode 100755
index 2b890a0..0000000
--- a/scripts/cpld.bash
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash 
-# Author : Hemanth.HM
-# Email : hemanth[dot]hm[at]gmail[dot]com
-# License : GNU GPLv3
-#
-
-function useage()
-{
-	cat << EOU
-Useage: bash $0 <path to the binary> <path to copy the dependencies>
-EOU
-exit 1
-}
-
-#Validate the inputs
-[[ $# < 2 ]] && useage
-
-#Check if the paths are vaild
-[[ ! -e $1 ]] && echo "Not a vaild input $1" && exit 1 
-[[ -d $2 ]] || echo "No such directory $2 creating..."&& mkdir -p "$2"
-
-#Get the library dependencies
-echo "Collecting the shared library dependencies for $1..."
-deps=$(ldd $1 | awk 'BEGIN{ORS=" "}$1~/^\//{print $1}$3~/^\//{print $3}' | sed 's/,$/\n/')
-echo "Copying the dependencies to $2"
-
-#Copy the deps
-for dep in $deps
-do
-	echo "Copying $dep to $2"
-	cp "$dep" "$2"
-done
-
-echo "Done!"
-
diff --git a/scripts/fetchRapMap.sh b/scripts/fetchRapMap.sh
new file mode 100755
index 0000000..68f49f2
--- /dev/null
+++ b/scripts/fetchRapMap.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+CURR_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+EXTERNAL_DIR=${CURR_DIR}/../external
+INSTALL_DIR=${CURR_DIR}/../external/install
+
+if [ -d ${EXTERNAL_DIR}/RapMap ] ; then
+    rm -fr ${EXTERNAL_DIR}/RapMap
+fi
+
+if [ -d ${INSTALL_DIR}/include/rapmap ] ; then
+    rm -fr ${INSTALL_DIR}/include/rapmap
+fi
+
+if [ -d ${INSTALL_DIR}/src/rapmap ] ; then
+    rm -fr ${INSTALL_DIR}/src/rapmap
+fi
+
+mkdir -p ${EXTERNAL_DIR}
+curl -k -L https://github.com/COMBINE-lab/RapMap/archive/large-index.zip -o ${EXTERNAL_DIR}/rapmap.zip
+rm -fr ${EXTERNAL_DIR}/RapMap
+unzip ${EXTERNAL_DIR}/rapmap.zip -d ${EXTERNAL_DIR}
+mv ${EXTERNAL_DIR}/RapMap-large-index ${EXTERNAL_DIR}/RapMap
+
+mkdir -p ${INSTALL_DIR}/include/rapmap
+mkdir -p ${INSTALL_DIR}/src/rapmap
+
+rm ${EXTERNAL_DIR}/RapMap/src/xxhash.c
+rm ${EXTERNAL_DIR}/RapMap/include/xxhash.h
+
+cp -r ${EXTERNAL_DIR}/RapMap/external/libdivsufsort.zip ${EXTERNAL_DIR}
+cp -r ${EXTERNAL_DIR}/RapMap/src/*.c ${INSTALL_DIR}/src/rapmap
+cp -r ${EXTERNAL_DIR}/RapMap/src/*.cpp ${INSTALL_DIR}/src/rapmap
+cp -r ${EXTERNAL_DIR}/RapMap/include/tclap ${INSTALL_DIR}/include/rapmap
+cp -r ${EXTERNAL_DIR}/RapMap/include/*.h ${INSTALL_DIR}/include/rapmap
+cp -r ${EXTERNAL_DIR}/RapMap/include/*.hpp ${INSTALL_DIR}/include/rapmap
diff --git a/src/BWAUtils.cpp b/src/BWAUtils.cpp
new file mode 100644
index 0000000..0309eba
--- /dev/null
+++ b/src/BWAUtils.cpp
@@ -0,0 +1,146 @@
+#include "BWAUtils.hpp"
+
+namespace bwautils {
+static void bwt_reverse_intvs(bwtintv_v *p)
+{
+    if (p->n > 1) {
+        int j;
+        for (j = 0; j < p->n>>1; ++j) {
+            bwtintv_t tmp = p->a[p->n - 1 - j];
+            p->a[p->n - 1 - j] = p->a[j];
+            p->a[j] = tmp;
+        }
+    }
+}
+
+    // Function modified from bwt_smem1a:
+    // https://github.com/lh3/bwa/blob/eb428d7d31ced059ad39af2701a22ebe6d175657/bwt.c#L289
+    /**
+     * Search for the k-mer of length @len starting at @q. 
+     * Return true if an interval is found for the k-mer and false 
+     * otherwise. The appropriate bwt interval will be placed 
+     * in @resInterval upon success. 
+     *
+     */
+    bool getIntervalForKmer(const bwt_t* bwt, // the bwt index
+                            int len, // k-mer length
+                            const uint8_t *q, // query
+                            bwtintv_t& resInterval
+            ) {
+            int i, j, c, ret;
+            int x = 0;
+            bwtintv_t ik, ok[4];
+            bwtintv_v a[2], *prev, *curr, *swap;
+
+            if (q[x] > 3) return false;
+            //if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
+            kv_init(a[0]); kv_init(a[1]);
+            prev = &a[0]; // use the temporary vector if provided
+            curr = &a[1];
+            bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
+            ik.info = x + 1;
+
+            for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
+                if (q[i] < 4) { // an A/C/G/T base
+                    c = 3 - q[i]; // complement of q[i]
+                    bwt_extend(bwt, &ik, ok, 0);
+                    ik = ok[c]; ik.info = i + 1;
+                } else { // an ambiguous base
+                    break; // always terminate extension at an ambiguous base; in this case, i<len always stands
+                }
+            }
+            if (i == len) { //kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
+                /// Copy over the final interval to our output
+                resInterval.x[0] = ik.x[0];
+                resInterval.x[1] = ik.x[1];
+                resInterval.x[2] = ik.x[2];
+                resInterval.info = ik.info;
+                return true;
+            } else { // we didn't find a k-mer of the requested length
+                return false;
+            }
+    }
+
+
+    // NOTE: $max_intv is not currently used in BWA-MEM
+    // NOTE: Modified from the original functions to take an initial interval for the search query
+    int bwt_smem1a_with_kmer(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_t initial_interval, bwtintv_v *mem, bwtintv_v *tmpvec[2])
+    {
+        int i, j, c, ret;
+        bwtintv_t ik, ok[4];
+        bwtintv_v a[2], *prev, *curr, *swap;
+
+        mem->n = 0;
+        if (q[x] > 3) return x + 1;
+        if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
+        kv_init(a[0]); kv_init(a[1]);
+        prev = tmpvec && tmpvec[0]? tmpvec[0] : &a[0]; // use the temporary vector if provided
+        curr = tmpvec && tmpvec[1]? tmpvec[1] : &a[1];
+        //bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
+        //ik.info = x + 1;
+        
+        // ROB: set ik to our initial interval
+        ik.x[0] = initial_interval.x[0]; 
+        ik.x[1] = initial_interval.x[1]; 
+        ik.x[2] = initial_interval.x[2]; 
+        // Is this last one right?
+        int k = initial_interval.info;
+        ik.info = x + k; 
+
+        for (i = x + k, curr->n = 0; i < len; ++i) { // forward search
+            if (ik.x[2] < max_intv) { // an interval small enough
+                kv_push(bwtintv_t, *curr, ik);
+                break;
+            } else if (q[i] < 4) { // an A/C/G/T base
+                c = 3 - q[i]; // complement of q[i]
+                bwt_extend(bwt, &ik, ok, 0);
+                if (ok[c].x[2] != ik.x[2]) { // change of the interval size
+                    kv_push(bwtintv_t, *curr, ik);
+                    if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further
+                }
+                ik = ok[c]; ik.info = i + 1;
+            } else { // an ambiguous base
+                kv_push(bwtintv_t, *curr, ik);
+                break; // always terminate extension at an ambiguous base; in this case, i<len always stands
+            }
+        }
+        if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
+        bwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
+        ret = curr->a[0].info; // this will be the returned value
+        swap = curr; curr = prev; prev = swap;
+
+        for (i = x - 1; i >= -1; --i) { // backward search for MEMs
+            c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base
+            for (j = 0, curr->n = 0; j < prev->n; ++j) {
+                bwtintv_t *p = &prev->a[j];
+                if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1);
+                if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
+                    if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches
+                        if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
+                            ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
+                            kv_push(bwtintv_t, *mem, ik);
+                        }
+                    } // otherwise the match is contained in another longer match
+                } else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2]) {
+                    ok[c].info = p->info;
+                    kv_push(bwtintv_t, *curr, ok[c]);
+                }
+            }
+            if (curr->n == 0) break;
+            swap = curr; curr = prev; prev = swap;
+        }
+        bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate
+
+        if (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a);
+        if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
+        return ret;
+    }
+
+    int bwt_smem1_with_kmer(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_t initial_interval, bwtintv_v *mem, bwtintv_v *tmpvec[2])
+    {
+        return bwt_smem1a_with_kmer(bwt, len, q, x, min_intv, 0, initial_interval, mem, tmpvec);
+    }
+}
+
+
+
diff --git a/src/BiasCorrectionDriver.cpp b/src/BiasCorrectionDriver.cpp
deleted file mode 100644
index 86c7973..0000000
--- a/src/BiasCorrectionDriver.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
->HEADER
-    Copyright (c) 2013 Rob Patro robp at cs.cmu.edu
-
-    This file is part of Salmon.
-
-    Salmon is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Salmon is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Salmon.  If not, see <http://www.gnu.org/licenses/>.
-<HEADER
-**/
-
-
-#include <iostream>
-#include <cstdio>
-#include <cstdlib>
-#include <boost/filesystem.hpp>
-
-namespace bfs = boost::filesystem;
-
-int performBiasCorrection(
-        bfs::path featureFile,
-        bfs::path expressionFile,
-        double estimatedReadLength,
-        double kmersPerRead,
-        uint64_t mappedKmers,
-        uint32_t merLen,
-        bfs::path outputFile,
-        size_t numThreads);
-
-int main(int argc, char* argv[]) {
-        bfs::path featureFile(argv[1]);
-        bfs::path expressionFile(argv[2]);
-        double estimatedReadLength = atod(argv[3]);
-        double kmersPerRead = atod(argv[4]);
-        uint64_t mappedKmers = atol(argv[5]);
-        uint32_t mappedKmers = atoi(argv[6]);
-        bfs::path outputFile(argv[7]);
-        size_t numThreads = atoi(argv[8]);
-
-        performBiasCorrection(featureFile, expressionFile, estimatedReadLength, kmersPerRead,
-                              mappedKmers, merLen, outputFile, numThreads);
-}
diff --git a/src/BuildSalmonIndex.cpp b/src/BuildSalmonIndex.cpp
index 1255d5d..ca65ade 100644
--- a/src/BuildSalmonIndex.cpp
+++ b/src/BuildSalmonIndex.cpp
@@ -43,9 +43,10 @@
 
 #include "Transcript.hpp"
 #include "SalmonUtils.hpp"
+#include "SalmonIndex.hpp"
 #include "GenomicFeature.hpp"
-#include "format.h"
 #include "spdlog/spdlog.h"
+#include "spdlog/details/format.h"
 
 using my_mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;
 
@@ -56,88 +57,7 @@ extern "C" {
 int bwa_index(int argc, char* argv[]);
 }
 
-int computeBiasFeatures(
-    std::vector<std::string>& transcriptFiles,
-    boost::filesystem::path outFilePath,
-    bool useStreamingParser,
-    size_t numThreads);
-
-/*
-bool buildAuxKmerIndex(boost::filesystem::path& outputPrefix, uin32_t k,
-                       std::shared_ptr<spdlog::logger>& logger){
-    namespace bfs = boost::filesystem;
-
-    bfs::path indexPath = outputPrefix / "bwaindex";
-    // Load the bwa index
-    bwaidx_t *idx_{nullptr};
-    {
-        logger->info("Reading BWT index from file");
-        if ((idx_ = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_BWT|BWA_IDX_BNS|BWA_IDX_PAC)) == 0) {
-            logger->error("Couldn't open index [{}] --- ", indexPath);
-            logger->error("Please make sure that 'salmon index' has been run successfully");
-            std::exit(1);
-        }
-    }
-
-    size_t numRecords = idx_->bns->n_seqs;
-    std::vector<Transcript> transcripts_tmp;
-    { // Load transcripts from file
-
-            logger->info("Index contained {} targets; loading them", numRecords);
-            //transcripts_.resize(numRecords);
-            for (auto i : boost::irange(size_t(0), numRecords)) {
-                uint32_t id = i;
-                char* name = idx_->bns->anns[i].name;
-                uint32_t len = idx_->bns->anns[i].len;
-                // copy over the length, then we're done.
-                transcripts_tmp.emplace_back(id, name, len);
-            }
-
-            std::sort(transcripts_tmp.begin(), transcripts_tmp.end(),
-                    [](const Transcript& t1, const Transcript& t2) -> bool {
-                    return t1.id < t2.id;
-                    });
-            double alpha = 0.005;
-            char nucTab[256];
-            nucTab[0] = 'A'; nucTab[1] = 'C'; nucTab[2] = 'G'; nucTab[3] = 'T';
-            for (size_t i = 4; i < 256; ++i) { nucTab[i] = 'N'; }
-
-            // Load the transcript sequence from file
-            for (auto& t : transcripts_tmp) {
-                transcripts_.emplace_back(t.id, t.RefName.c_str(), t.RefLength, alpha);
-                // from BWA
-                uint8_t* rseq = nullptr;
-                int64_t tstart, tend, compLen, l_pac = idx_->bns->l_pac;
-                tstart  = idx_->bns->anns[t.id].offset;
-                tend = tstart + t.RefLength;
-                rseq = bns_get_seq(l_pac, idx_->pac, tstart, tend, &compLen);
-                if (compLen != t.RefLength) {
-                    fmt::print(stderr,
-                               "For transcript {}, stored length ({}) != computed length ({}) --- index may be corrupt. exiting\n",
-                               t.RefName, compLen, t.RefLength);
-                    std::exit(1);
-                }
-
-                std::string seq(t.RefLength, ' ');
-                if (rseq != 0) {
-                    for (size_t i = 0; i < compLen; ++i) {
-                        seq[i] = rseq[i];
-                                 //nst_nt4_table[static_cast<int>(nucTab[rseq[i]])];
-                    }
-                }
-                transcripts_.back().Sequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), t.RefLength);
-                free(rseq);
-                // end BWA code
-            }
-            // Since we have the de-coded reference sequences, we no longer need
-            // the encoded sequences, so free them.
-            free(idx_->pac); idx_->pac = nullptr;
-            transcripts_tmp.clear();
-            // ====== Done loading the transcripts from file
-}
-*/
-
-// Cool way to do this from 
+// Cool way to do this from
 // http://stackoverflow.com/questions/108318/whats-the-simplest-way-to-test-whether-a-number-is-a-power-of-2-in-c
 bool isPowerOfTwo(uint32_t n) {
   return (n > 0 and (n & (n-1)) == 0);
@@ -151,18 +71,25 @@ int salmonIndex(int argc, char* argv[]) {
 
     bool useStreamingParser = true;
 
+    string indexTypeStr = "fmd";
     uint32_t saSampInterval = 1;
+    uint32_t auxKmerLen = 0;
     uint32_t maxThreads = std::thread::hardware_concurrency();
     uint32_t numThreads;
+    bool useQuasi{false};
 
     po::options_description generic("Command Line Options");
     generic.add_options()
     ("version,v", "print version string")
     ("help,h", "produce help message")
     ("transcripts,t", po::value<string>()->required(), "Transcript fasta file.")
+    ("kmerLen,k", po::value<uint32_t>(&auxKmerLen)->default_value(31)->required(),
+                    "The size of k-mers that should be used for the quasi index.")
     ("index,i", po::value<string>()->required(), "Salmon index.")
     ("threads,p", po::value<uint32_t>(&numThreads)->default_value(maxThreads)->required(),
                             "Number of threads to use (only used for computing bias features)")
+    ("type", po::value<string>(&indexTypeStr)->default_value("quasi")->required(), "The type of index to build; options are \"fmd\" and \"quasi\" "
+    							   			   "\"quasi\" is recommended, and \"fmd\" may be removed in the future")
     ("sasamp,s", po::value<uint32_t>(&saSampInterval)->default_value(1)->required(),
                             "The interval at which the suffix array should be sampled. "
                             "Smaller values are faster, but produce a larger index. "
@@ -188,16 +115,22 @@ Creates a salmon index.
         }
         po::notify(vm);
 
-	uint32_t sasamp = vm["sasamp"].as<uint32_t>();
-	if (!isPowerOfTwo(sasamp)) {
-	  fmt::MemoryWriter errWriter;
-	  errWriter << "Error: The suffix array sampling interval must be "
-		       "a power of 2. The value provided, " << sasamp << ", is not.";
-	  throw(std::logic_error(errWriter.str()));
-	}
-
-        fmt::MemoryWriter optWriter;
-        optWriter << vm["sasamp"].as<uint32_t>();
+        if (!(indexTypeStr == "quasi" or indexTypeStr == "fmd")) {
+            fmt::MemoryWriter errWriter;
+            errWriter << "Error: The index type must be either "
+                "\"fmd\" or \"quasi\", but " << indexTypeStr << ", was "
+                "provided.";
+            throw(std::logic_error(errWriter.str()));
+        }
+        bool useQuasi = (indexTypeStr == "quasi");
+
+        uint32_t sasamp = vm["sasamp"].as<uint32_t>();
+        if (!isPowerOfTwo(sasamp) and !useQuasi) {
+          fmt::MemoryWriter errWriter;
+          errWriter << "Error: The suffix array sampling interval must be "
+                       "a power of 2. The value provided, " << sasamp << ", is not.";
+          throw(std::logic_error(errWriter.str()));
+        }
 
         string transcriptFile = vm["transcripts"].as<string>();
         bfs::path indexDirectory(vm["index"].as<string>());
@@ -206,7 +139,7 @@ Creates a salmon index.
         if (!bfs::exists(indexDirectory)) {
             std::cerr << "index [" << indexDirectory << "] did not previously exist "
                       << " . . . creating it\n";
-            bfs::create_directory(indexDirectory);
+            bfs::create_directories(indexDirectory);
         }
 
         bfs::path logPath = indexDirectory / "indexing.log";
@@ -219,45 +152,52 @@ Creates a salmon index.
         auto fileLog = spdlog::create("fileLog", {fileSink});
         auto jointLog = spdlog::create("jointLog", {fileSink, consoleSink});
 
-        // First, compute the transcript features in case the user
-        // ever wants to bias-correct his / her results
-        // NOTE: Currently, we're using the same bias correction technique here that
-        // we use in Sailfish. In the future, test more "traditional" bias correction
-        // techniques to see if we should adopt them instead
-        bfs::path transcriptBiasFile(indexDirectory); transcriptBiasFile /= "bias_feats.txt";
-
         std::vector<std::string> transcriptFiles = {transcriptFile};
         fmt::MemoryWriter infostr;
-        infostr << "computeBiasFeatures( {";
-        for (auto& tf : transcriptFiles) {
-            infostr << "[" << tf << "] ";
-        }
-        infostr << ", " << transcriptBiasFile.c_str() << ", " << useStreamingParser << ", " << numThreads << ")\n";
-        jointLog->info() << infostr.str();
-        computeBiasFeatures(transcriptFiles, transcriptBiasFile, useStreamingParser, numThreads);
-        // ==== finished computing bias fetures
-
-        bfs::path outputPrefix = indexDirectory / "bwaidx";
 
-        std::vector<char const*> bwaArgVec{ "index",
-                                    "-s",
-                                    optWriter.str().c_str(),
-                                    "-p",
-                                    outputPrefix.string().c_str(),
-                                    transcriptFile.c_str() };
-
-        char* bwaArgv[] = { const_cast<char*>(bwaArgVec[0]),
-                            const_cast<char*>(bwaArgVec[1]),
-                            const_cast<char*>(bwaArgVec[2]),
-                            const_cast<char*>(bwaArgVec[3]),
-                            const_cast<char*>(bwaArgVec[4]),
-                            const_cast<char*>(bwaArgVec[5]) };
-        int bwaArgc = 6;
-
-        ret = bwa_index(bwaArgc, bwaArgv);
+        bfs::path outputPrefix;
+        std::unique_ptr<std::vector<std::string>> argVec(new std::vector<std::string>);
+	fmt::MemoryWriter optWriter;
+
+        std::unique_ptr<SalmonIndex> sidx = nullptr;
+        // Build a quasi-mapping index
+        if (useQuasi) {
+            outputPrefix = indexDirectory;
+            argVec->push_back("dummy");
+            argVec->push_back("-k");
+
+            if (auxKmerLen == 0) {
+                jointLog->info("You cannot have a k-mer length of 0 with the quasi-index.");
+                jointLog->info("Setting to the default value of 31.");
+                auxKmerLen = 31;
+            }
 
-        jointLog->info("done building BWT Index");
+            optWriter << auxKmerLen;
+            argVec->push_back(optWriter.str());
+            argVec->push_back("-t");
+            argVec->push_back(transcriptFile);
+            argVec->push_back("-i");
+            argVec->push_back(outputPrefix.string());
+            sidx.reset(new SalmonIndex(jointLog, SalmonIndexType::QUASI));
+        } else {
+            // Build the FMD-based index
+            bfs::path outputPrefix = indexDirectory / "bwaidx";
+            std::cerr << "outputPrefix = " << outputPrefix << "\n";
+            argVec->push_back("index");
+            argVec->push_back("-s");
+	        optWriter << vm["sasamp"].as<uint32_t>();
+            argVec->push_back(optWriter.str());
+            argVec->push_back("-p");
+            argVec->push_back(outputPrefix.string());
+            argVec->push_back(transcriptFile);
+            sidx.reset(new SalmonIndex(jointLog, SalmonIndexType::FMD));
+    	    // Disable the auxiliary k-mer index for now
+	        auxKmerLen = 0;
+        }
 
+        jointLog->info("building index");
+	    sidx->build(indexDirectory, *(argVec.get()), auxKmerLen);
+        jointLog->info("done building index");
         // If we want to build the auxiliary k-mer index, do it here.
         /*
         uint32_t k = 15;
@@ -276,4 +216,3 @@ Creates a salmon index.
     }
     return ret;
 }
-
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c2ad83e..1190e95 100755
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,55 +1,69 @@
+include_directories(
+${GAT_SOURCE_DIR}/include
+${GAT_SOURCE_DIR}/include/eigen3
+${GAT_SOURCE_DIR}/external
+${GAT_SOURCE_DIR}/external/cereal/include
+${GAT_SOURCE_DIR}/external/install/include
+${GAT_SOURCE_DIR}/external/install/include/jellyfish-2.2.3
+${GAT_SOURCE_DIR}/external/install/include/bwa
+${ZLIB_INCLUDE_DIR}
+${TBB_INCLUDE_DIRS}
+${Boost_INCLUDE_DIRS}
+${GAT_SOURCE_DIR}/external/install/include/rapmap
+)
+
 set ( SALMON_MAIN_SRCS
 QSufSort.c
 is.c
 bwt_gen.c
 bwtindex.c
+xxhash.c
 CollapsedEMOptimizer.cpp
 CollapsedGibbsSampler.cpp
 Salmon.cpp
 BuildSalmonIndex.cpp
 SalmonQuantify.cpp
-FragmentLengthDistribution.cpp 
+FragmentLengthDistribution.cpp
 FragmentStartPositionDistribution.cpp
 SequenceBiasModel.cpp
 StadenUtils.cpp
 TranscriptGroup.cpp
+GZipWriter.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapFileSystem.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAIndexer.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAIndex.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapSAMapper.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/RapMapUtils.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/HitManager.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/rank9b.cpp
+${GAT_SOURCE_DIR}/external/install/src/rapmap/bit_array.c
+#${GAT_SOURCE_DIR}/external/install/src/rapmap/sais.c
 )
 
 set (SALMON_ALIGN_SRCS
-FASTAParser.cpp 
-ErrorModel.cpp 
+FASTAParser.cpp
+ErrorModel.cpp
 AlignmentModel.cpp
-FragmentLengthDistribution.cpp 
-SalmonQuantifyAlignments.cpp 
+FragmentLengthDistribution.cpp
+SalmonQuantifyAlignments.cpp
 )
 
 set (SALMON_LIB_SRCS
+BWAUtils.cpp
 LibraryFormat.cpp
 GenomicFeature.cpp
 VersionChecker.cpp
 SalmonUtils.cpp
 SalmonStringUtils.cpp
-ComputeBiasFeatures.cpp
-PerformBiasCorrection.cpp
-cokus.cpp
-merge_files.cc
-format.cc
 )
 
-include_directories( 
-${GAT_SOURCE_DIR}/include
-${GAT_SOURCE_DIR}/include/eigen3
-${GAT_SOURCE_DIR}/external
-${GAT_SOURCE_DIR}/external/cereal/include
-${GAT_SOURCE_DIR}/external/install/include
-${GAT_SOURCE_DIR}/external/install/include/jellyfish-2.1.3
-${GAT_SOURCE_DIR}/external/install/include/bwa
-${ZLIB_INCLUDE_DIR}
-${TBB_INCLUDE_DIRS}
-${Boost_INCLUDE_DIRS}
+set ( UNIT_TESTS_SRCS
+    ${GAT_SOURCE_DIR}/tests/UnitTests.cpp
+    FragmentLengthDistribution.cpp
 )
 
-link_directories( 
+
+link_directories(
 ${GAT_SOURCE_DIR}/lib
 ${GAT_SOURCE_DIR}/external/install/lib
 ${Boost_LIBRARY_DIRS}
@@ -61,10 +75,10 @@ ${BLAS_LIBRARY_DIR}
 message("TBB_LIBRARIES = ${TBB_LIBRARIES}")
 message("Boost_LIBRARIES = ${Boost_LIBRARIES}")
 
-# Set the RPATH 
+# Set the RPATH
 if (APPLE)
-    ## This DOES NOT do what I / any one sane, expects.  Setting the 
-    ## linker path on OSX is messed up.  Just tell the user to use 
+    ## This DOES NOT do what I / any one sane, expects.  Setting the
+    ## linker path on OSX is messed up.  Just tell the user to use
     ## DYLD_FALLBACK_LIBRARY_PATH for now
     set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 else()
@@ -79,6 +93,8 @@ add_library(salmon_core STATIC ${SALMON_LIB_SRCS} )
 # Build the salmon executable
 add_executable(salmon ${SALMON_MAIN_SRCS} ${SALMON_ALIGN_SRCS})
 
+add_executable(unitTests ${UNIT_TESTS_SRCS})
+
 #add_executable(salmon-read ${SALMON_READ_SRCS})
 #set_target_properties(salmon-read PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_LIBPTHREAD -D_PBGZF_USE -fopenmp"
 #    LINK_FLAGS "-DHAVE_LIBPTHREAD -D_PBGZF_USE -fopenmp")
@@ -97,19 +113,26 @@ if (APPLE)
     endif()
 
 else()
-    # related to complete static linking --- on hold	
+    # related to complete static linking --- on hold
     set (BOOST_THREAD_LIBRARY)
 endif()
 
+# our suffix array construction libraries
+set (SUFFARRAY_LIB ${GAT_SOURCE_DIR}/external/install/lib/libdivsufsort.a)
+set (SUFFARRAY64_LIB ${GAT_SOURCE_DIR}/external/install/lib/libdivsufsort64.a)
+
+
 # Link the executable
 target_link_libraries(salmon
-    salmon_core 
+    salmon_core
     gff
     ${PTHREAD_LIB}
-    ${Boost_LIBRARIES} 
+    ${Boost_LIBRARIES}
     ${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a
-    ${ZLIB_LIBRARY} 
-    ${GAT_SOURCE_DIR}/external/install/lib/libjellyfish-2.0.a 
+    ${ZLIB_LIBRARY}
+    ${SUFFARRAY_LIB}
+    ${SUFFARRAY64_LIB}
+    ${GAT_SOURCE_DIR}/external/install/lib/libjellyfish-2.0.a
     ${GAT_SOURCE_DIR}/external/install/lib/libbwa.a
     m
     ${LIBLZMA_LIBRARIES}
@@ -122,17 +145,62 @@ target_link_libraries(salmon
 
 add_dependencies(salmon libbwa)
 
+# Link the executable
+target_link_libraries(unitTests
+    salmon_core
+    gff
+    ${PTHREAD_LIB}
+    ${Boost_LIBRARIES}
+    ${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a
+    ${ZLIB_LIBRARY}
+    ${SUFFARRAY_LIB}
+    ${SUFFARRAY64_LIB}
+    ${GAT_SOURCE_DIR}/external/install/lib/libjellyfish-2.0.a
+    ${GAT_SOURCE_DIR}/external/install/lib/libbwa.a
+    m
+    ${LIBLZMA_LIBRARIES}
+    ${BZIP2_LIBRARIES}
+    ${TBB_LIBRARIES}
+    ${LIBSALMON_LINKER_FLAGS}
+    ${NON_APPLECLANG_LIBS}
+    ${FAST_MALLOC_LIB}
+    )
+
 ##
 #  This ensures that the salmon executable should work with or without `make install`
 ##
+# Grumble grumble . . . OSX
 if (APPLE)
-	add_custom_command(TARGET salmon
-		POST_BUILD
-		COMMAND install_name_tool -add_rpath ${GAT_SOURCE_DIR}/external/install/lib salmon 
-		COMMAND install_name_tool -add_rpath @executable_path/../lib salmon 
-		)
+    # only attempt install_name_tool for tbb if we installed it
+    if (${TBB_LIBRARY_DIRS} MATCHES ${GAT_SOURCE_DIR}/external/install/lib)
+        add_custom_command(TARGET salmon
+            POST_BUILD
+            COMMAND install_name_tool -change libtbb.dylib @rpath/libtbb.dylib ${GAT_SOURCE_DIR}/build/src/salmon
+            COMMAND install_name_tool -change libtbbmalloc.dylib @rpath/libtbbmalloc.dylib ${GAT_SOURCE_DIR}/build/src/salmon
+            COMMAND install_name_tool -change libtbbmalloc_proxy.dylib @rpath/libtbbmalloc_proxy.dylib ${GAT_SOURCE_DIR}/build/src/salmon
+            COMMAND install_name_tool -add_rpath  ${GAT_SOURCE_DIR}/external/install/lib ${GAT_SOURCE_DIR}/build/src/salmon
+            )
+        add_custom_command(TARGET unitTests
+            POST_BUILD
+            COMMAND install_name_tool -change libtbb.dylib @rpath/libtbb.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
+            COMMAND install_name_tool -change libtbbmalloc.dylib @rpath/libtbbmalloc.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
+            COMMAND install_name_tool -change libtbbmalloc_proxy.dylib @rpath/libtbbmalloc_proxy.dylib ${GAT_SOURCE_DIR}/build/src/unitTests
+            COMMAND install_name_tool -add_rpath  ${GAT_SOURCE_DIR}/external/install/lib ${GAT_SOURCE_DIR}/build/src/unitTests
+            )
+    endif()
+else()
+    # related to complete static linking --- on hold    
+    set (BOOST_THREAD_LIBRARY)
 endif()
 
+#if (APPLE)
+#	add_custom_command(TARGET salmon
+#		POST_BUILD
+#		COMMAND install_name_tool -add_rpath ${GAT_SOURCE_DIR}/external/install/lib salmon
+#	COMMAND install_name_tool -add_rpath @executable_path/../lib salmon
+#		)
+#endif()
+
 ##### ======================================
 
 IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
@@ -145,43 +213,41 @@ set(INSTALL_LIB_DIR lib )
 set(INSTALL_BIN_DIR bin )
 set(INSTALL_INCLUDE_DIR include )
 
-install(DIRECTORY 
-        ${GAT_SOURCE_DIR}/external/install/lib/ 
+install(DIRECTORY
+        ${GAT_SOURCE_DIR}/external/install/lib/
         DESTINATION ${INSTALL_LIB_DIR}
 	    FILES_MATCHING PATTERN "libtbb*.${SHARED_LIB_EXTENSION}*"
     )
-if (APPLE)
-install(DIRECTORY 
-        ${GAT_SOURCE_DIR}/external/install/lib/ 
-        DESTINATION ${INSTALL_LIB_DIR}
-	    FILES_MATCHING PATTERN "libcmph*.${SHARED_LIB_EXTENSION}*"
-       )
-endif()
-
 
 # install(FILES ${Boost_LIBRARIES}
 # 	           DESTINATION ${INSTALL_LIB_DIR})
 
 install(TARGETS salmon salmon_core
-                RUNTIME DESTINATION bin 
+                RUNTIME DESTINATION bin
                 LIBRARY DESTINATION lib
                 ARCHIVE DESTINATION lib
         )
 
+install(TARGETS unitTests
+        RUNTIME DESTINATION tests
+)
+
 set(POST_INSTALL_SCRIPT ${GAT_SOURCE_DIR}/cmake/PostInstall.cmake)
 
 install(
-    CODE 
+    CODE
     "
     execute_process(COMMAND \"${CMAKE_COMMAND}\"
                             -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
-                            -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} 
+                            -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
                             -P \"${POST_INSTALL_SCRIPT}\")
     "
 )
 
 include(InstallRequiredSystemLibraries)
-add_test( NAME salmon_read_test COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SOURCE_DIR} -P ${GAT_SOURCE_DIR}/cmake/TestSalmon.cmake )
+add_test( NAME unit_tests COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${CMAKE_INSTALL_PREFIX} -P ${GAT_SOURCE_DIR}/cmake/UnitTests.cmake )
+add_test( NAME salmon_read_test_fmd COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SOURCE_DIR} -P ${GAT_SOURCE_DIR}/cmake/TestSalmonFMD.cmake )
+add_test( NAME salmon_read_test_quasi COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SOURCE_DIR} -P ${GAT_SOURCE_DIR}/cmake/TestSalmonQuasi.cmake )
 
 ####
 #
@@ -195,7 +261,7 @@ add_test( NAME salmon_read_test COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SO
 
 # # when building, don't use the install RPATH already
 # # (but later on when installing)
-# SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) 
+# SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
 # MESSAGE( "LINK DIRECTORIES : ${CMAKE_LIBRARY_PATH}")
 # SET(CMAKE_INSTALL_RPATH "${CMAKE_LIBRARY_PATH}")
 
@@ -211,24 +277,24 @@ add_test( NAME salmon_read_test COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SO
 # ENDIF("${isSystemDir}" STREQUAL "-1")
 
 
-# set( BUILD_LUT_SRCS 
+# set( BUILD_LUT_SRCS
 # BuildLUT.cpp
 # IndexedCounter.cpp
 # PerfectHashIndexer.cpp
 # )
 
 # add_executable( BuildLUT ${BUILD_LUT_SRCS} )
-# target_link_libraries( BuildLUT 
-# 	${Boost_LIBRARIES} ${ZLIB_LIBRARY} 
+# target_link_libraries( BuildLUT
+# 	${Boost_LIBRARIES} ${ZLIB_LIBRARY}
 # 	cmph # perfect hashing library
-# 	jellyfish-2.0 pthread 
+# 	jellyfish-2.0 pthread
 #     gomp lib_activeobject lib_g2logger m tbb)
 
 # INSTALL(TARGETS BuildLUT DESTINATION ${PROJECT_SOURCE_DIR}/bin COMPONENT comp_buildlut)
 
 # set ( BUILD_INDEX_SRCS Indexer.cpp )
 
-# set( ANALYZE_TRANSCRIPT_GRAPH 
+# set( ANALYZE_TRANSCRIPT_GRAPH
 # AnalyzeTranscriptGraph.cpp
 # IndexedCounter.cpp
 # PerfectHashIndexer.cpp
@@ -243,17 +309,17 @@ add_test( NAME salmon_read_test COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SO
 
 # add_executable( ComputeTranscriptGraph ${COMPUTE_TGRAPH_SRCS} )
 # target_link_libraries( ComputeTranscriptGraph
-# 	${Boost_LIBRARIES} ${ZLIB_LIBRARY} 
+# 	${Boost_LIBRARIES} ${ZLIB_LIBRARY}
 # 	cmph # perfect hashing library
-# 	jellyfish pthread 
+# 	jellyfish pthread
 #     gomp lib_activeobject lib_g2logger blas m tbb nnls)
 
 
 # add_executable( AnalyzeTranscriptGraph ${ANALYZE_TRANSCRIPT_GRAPH} )
-# target_link_libraries( AnalyzeTranscriptGraph 
-# 	${Boost_LIBRARIES} ${ZLIB_LIBRARY} 
+# target_link_libraries( AnalyzeTranscriptGraph
+# 	${Boost_LIBRARIES} ${ZLIB_LIBRARY}
 # 	cmph # perfect hashing library
-# 	jellyfish pthread 
+# 	jellyfish pthread
 #     gomp lib_activeobject lib_g2logger blas m tbb)
 
 
@@ -263,4 +329,3 @@ add_test( NAME salmon_read_test COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SO
 
 #add_executable( TestTsnnls ${TEST_SRCS} )
 #target_link_libraries( TestTsnnls ${Boost_LIBRARIES} ${ZLIB_LIBRARY} blas lapack gomp lib_activeobject lib_g2logger tsnnls )
-
diff --git a/src/CollapsedEMOptimizer.cpp b/src/CollapsedEMOptimizer.cpp
index 1fcba96..1ae4bab 100644
--- a/src/CollapsedEMOptimizer.cpp
+++ b/src/CollapsedEMOptimizer.cpp
@@ -13,7 +13,7 @@
 #include <boost/math/special_functions/digamma.hpp>
 
 // C++ string formatting library
-#include "format.h"
+#include "spdlog/details/format.h"
 
 #include "cuckoohash_map.hh"
 #include "Eigen/Dense"
@@ -26,7 +26,8 @@
 #include "ReadPair.hpp"
 #include "UnpairedRead.hpp"
 #include "ReadExperiment.hpp"
-
+#include "MultinomialSampler.hpp"
+#include "BootstrapWriter.hpp"
 
 using BlockedIndexRange =  tbb::blocked_range<size_t>;
 
@@ -54,22 +55,150 @@ double normalize(std::vector<tbb::atomic<double>>& vec) {
     return sum;
 }
 
-/*
- * Use atomic compare-and-swap to update val to
- * val + inc.  Update occurs in a loop in case other
- * threads update in the meantime.
+
+template <typename VecT>
+double truncateCountVector(VecT& alphas, double cutoff) {
+    // Truncate tiny expression values
+    double alphaSum = 0.0;
+
+    for (size_t i = 0; i < alphas.size(); ++i) {
+        if (alphas[i] <= cutoff) { alphas[i] = 0.0; }
+        alphaSum += alphas[i];
+    }
+    return alphaSum;
+}
+
+/**
+ * Single-threaded EM-update routine for use in bootstrapping
+ */
+template <typename VecT>
+void EMUpdate_(
+        std::vector<std::vector<uint32_t>>& txpGroupLabels,
+        std::vector<std::vector<double>>& txpGroupCombinedWeights,
+        std::vector<uint64_t>& txpGroupCounts,
+        std::vector<Transcript>& transcripts,
+        Eigen::VectorXd& effLens,
+        const VecT& alphaIn,
+        VecT& alphaOut) {
+
+    assert(alphaIn.size() == alphaOut.size());
+
+    size_t numEqClasses = txpGroupLabels.size();
+    for (size_t eqID = 0; eqID < numEqClasses; ++eqID) {
+        uint64_t count = txpGroupCounts[eqID];
+        // for each transcript in this class
+        const std::vector<uint32_t>& txps = txpGroupLabels[eqID];
+        const auto& auxs = txpGroupCombinedWeights[eqID];
+
+        double denom = 0.0;
+        size_t groupSize = txps.size();
+        // If this is a single-transcript group,
+        // then it gets the full count.  Otherwise,
+        // update according to our VBEM rule.
+        if (BOOST_LIKELY(groupSize > 1)) {
+           for (size_t i = 0; i < groupSize; ++i) {
+               auto tid = txps[i];
+               auto aux = auxs[i]; 
+               double v = alphaIn[tid] * aux;
+               denom += v;
+            }
+
+            if (denom <= ::minEQClassWeight) {
+                // tgroup.setValid(false);
+            } else {
+                double invDenom = count / denom;
+                for (size_t i = 0; i < groupSize; ++i) {
+                    auto tid = txps[i];
+                    auto aux = auxs[i]; 
+                    double v = alphaIn[tid] * aux;
+                    if (!std::isnan(v)) {
+                        salmon::utils::incLoop(alphaOut[tid], v * invDenom);
+                    }
+                }
+            }
+        } else {
+            salmon::utils::incLoop(alphaOut[txps.front()], count);
+        }
+    }
+}
+
+/**
+ * Single-threaded VBEM-update routine for use in bootstrapping
  */
-void incLoop(tbb::atomic<double>& val, double inc) {
-        double oldMass = val.load();
-        double returnedMass = oldMass;
-        double newMass{oldMass + inc};
-        do {
-            oldMass = returnedMass;
-            newMass = oldMass + inc;
-            returnedMass = val.compare_and_swap(newMass, oldMass);
-        } while (returnedMass != oldMass);
+template <typename VecT>
+void VBEMUpdate_(
+		std::vector<std::vector<uint32_t>>& txpGroupLabels,
+		std::vector<std::vector<double>>& txpGroupCombinedWeights,
+		std::vector<uint64_t>& txpGroupCounts,
+		std::vector<Transcript>& transcripts,
+		Eigen::VectorXd& effLens,
+		double priorAlpha,
+		double totLen,
+		const VecT& alphaIn,
+		VecT& alphaOut,
+		VecT& expTheta) {
+
+	assert(alphaIn.size() == alphaOut.size());
+
+	size_t numEQClasses = txpGroupLabels.size();
+	double alphaSum = {0.0};
+	for (auto& e : alphaIn) { alphaSum += e; }
+
+	double logNorm = boost::math::digamma(alphaSum);
+
+
+	double prior = priorAlpha;
+	double priorNorm = prior * totLen;
+
+	for (size_t i = 0; i < transcripts.size(); ++i) {
+	  if (alphaIn[i] > ::minWeight) {
+	    expTheta[i] = std::exp(boost::math::digamma(alphaIn[i]) - logNorm);
+	  } else {
+	    expTheta[i] = 0.0;
+	  }
+	  alphaOut[i] = prior;
+	}
+
+	for (size_t eqID = 0; eqID < numEQClasses; ++eqID) {
+	  uint64_t count = txpGroupCounts[eqID];
+	  const std::vector<uint32_t>& txps = txpGroupLabels[eqID];
+	  const auto& auxs = txpGroupCombinedWeights[eqID];
+
+	  double denom = 0.0;
+	  size_t groupSize = txps.size();
+	  // If this is a single-transcript group,
+	  // then it gets the full count.  Otherwise,
+	  // update according to our VBEM rule.
+	  if (BOOST_LIKELY(groupSize > 1)) {
+	    for (size_t i = 0; i < groupSize; ++i) {
+	      auto tid = txps[i];
+	      auto aux = auxs[i]; 
+	      if (expTheta[tid] > 0.0) {
+		double v = expTheta[tid] * aux;
+		denom += v;
+	      }
+	    }
+	    if (denom <= ::minEQClassWeight) {
+	      // tgroup.setValid(false);
+	    } else {
+	      double invDenom = count / denom;
+	      for (size_t i = 0; i < groupSize; ++i) {
+		auto tid = txps[i];
+		auto aux = auxs[i];
+		if (expTheta[tid] > 0.0) {
+		  double v = expTheta[tid] * aux;
+		  salmon::utils::incLoop(alphaOut[tid], v * invDenom);
+		}
+	      }
+	    }
+
+	  } else {
+	    salmon::utils::incLoop(alphaOut[txps.front()], count);
+	  }
+	}
 }
 
+
 /*
  * Use the "standard" EM algorithm over equivalence
  * classes to estimate the latent variables (alphaOut)
@@ -85,7 +214,7 @@ void EMUpdate_(
     assert(alphaIn.size() == alphaOut.size());
 
     tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
-            [&eqVec, &alphaIn, &alphaOut](const BlockedIndexRange& range) -> void {
+            [&eqVec, &alphaIn, &effLens, &alphaOut](const BlockedIndexRange& range) -> void {
             for (auto eqID : boost::irange(range.begin(), range.end())) {
             auto& kv = eqVec[eqID];
 
@@ -93,42 +222,39 @@ void EMUpdate_(
             // for each transcript in this class
             const TranscriptGroup& tgroup = kv.first;
             if (tgroup.valid) {
-                const std::vector<uint32_t>& txps = tgroup.txps;
-                const std::vector<double>& auxs = kv.second.weights;
+            const std::vector<uint32_t>& txps = tgroup.txps;
+            const auto& auxs = kv.second.combinedWeights;
+
+            double denom = 0.0;
+            size_t groupSize = txps.size();
+            // If this is a single-transcript group,
+            // then it gets the full count.  Otherwise,
+            // update according to our VBEM rule.
+            if (BOOST_LIKELY(groupSize > 1)) {
+            for (size_t i = 0; i < groupSize; ++i) {
+            auto tid = txps[i];
+            auto aux = auxs[i]; 
+            double v = alphaIn[tid] * aux;
+            denom += v;
+            }
 
-                double denom = 0.0;
-                size_t groupSize = txps.size();
-                // If this is a single-transcript group,
-                // then it gets the full count.  Otherwise,
-                // update according to our VBEM rule.
-                if (BOOST_LIKELY(groupSize > 1)) {
-                    for (size_t i = 0; i < groupSize; ++i) {
+            if (denom <= ::minEQClassWeight) {
+                // tgroup.setValid(false);
+            } else {
+                double invDenom = count / denom;
+                for (size_t i = 0; i < groupSize; ++i) {
                     auto tid = txps[i];
-                    auto aux = auxs[i];
-                    //double el = effLens(tid);
-                    //if (el <= 0) { el = 1.0; }
+                    auto aux = auxs[i]; 
                     double v = alphaIn[tid] * aux;
-                    denom += v;
+                    if (!std::isnan(v)) {
+                        salmon::utils::incLoop(alphaOut[tid], v * invDenom);
                     }
-
-                    if (denom <= ::minEQClassWeight) {
-                        // tgroup.setValid(false);
-                    } else {
-
-                        double invDenom = count / denom;
-                        for (size_t i = 0; i < groupSize; ++i) {
-                            auto tid = txps[i];
-                            auto aux = auxs[i];
-                            double v = alphaIn[tid] * aux;
-                            if (!std::isnan(v)) {
-                                incLoop(alphaOut[tid], v * invDenom);
-                            }
-                        }
-                    }
-                } else {
-                    incLoop(alphaOut[txps.front()], count);
                 }
             }
+            } else {
+                salmon::utils::incLoop(alphaOut[txps.front()], count);
+            }
+            }
     }
     });
 
@@ -175,7 +301,8 @@ void VBEMUpdate_(
 
     tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
             [&eqVec, &alphaIn,
-             &alphaOut, &expTheta]( const BlockedIndexRange& range) -> void {
+             &alphaOut, &effLens, 
+	     &expTheta]( const BlockedIndexRange& range) -> void {
             for (auto eqID : boost::irange(range.begin(), range.end())) {
             auto& kv = eqVec[eqID];
 
@@ -184,7 +311,7 @@ void VBEMUpdate_(
             const TranscriptGroup& tgroup = kv.first;
             if (tgroup.valid) {
                 const std::vector<uint32_t>& txps = tgroup.txps;
-                const std::vector<double>& auxs = kv.second.weights;
+                const auto& auxs = kv.second.combinedWeights;
 
                 double denom = 0.0;
                 size_t groupSize = txps.size();
@@ -194,7 +321,7 @@ void VBEMUpdate_(
                 if (BOOST_LIKELY(groupSize > 1)) {
                     for (size_t i = 0; i < groupSize; ++i) {
                         auto tid = txps[i];
-                        auto aux = auxs[i];
+                        auto aux = auxs[i]; 
                         if (expTheta[tid] > 0.0) {
                             double v = expTheta[tid] * aux;
                             denom += v;
@@ -209,22 +336,24 @@ void VBEMUpdate_(
                             auto aux = auxs[i];
                             if (expTheta[tid] > 0.0) {
                               double v = expTheta[tid] * aux;
-                              incLoop(alphaOut[tid], v * invDenom);
+			      salmon::utils::incLoop(alphaOut[tid], v * invDenom);
                             }
                         }
                     }
 
                 } else {
-                    incLoop(alphaOut[txps.front()], count);
+                    salmon::utils::incLoop(alphaOut[txps.front()], count);
                 }
             }
         }});
 
 }
 
+template <typename VecT>
 size_t markDegenerateClasses(
         std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-        CollapsedEMOptimizer::VecType& alphaIn,
+        VecT& alphaIn,
+        Eigen::VectorXd& effLens,
         std::shared_ptr<spdlog::logger> jointLog,
         bool verbose=false) {
 
@@ -235,12 +364,12 @@ size_t markDegenerateClasses(
         // for each transcript in this class
         const TranscriptGroup& tgroup = kv.first;
         const std::vector<uint32_t>& txps = tgroup.txps;
-        const std::vector<double>& auxs = kv.second.weights;
+        const auto& auxs = kv.second.combinedWeights;
 
         double denom = 0.0;
         for (size_t i = 0; i < txps.size(); ++i) {
             auto tid = txps[i];
-            auto aux = auxs[i];
+            auto aux = auxs[i]; 
             double v = alphaIn[tid] * aux;
             if (!std::isnan(v)) {
                 denom += v;
@@ -274,7 +403,10 @@ size_t markDegenerateClasses(
             errstream << "}\n";
             errstream << "============================\n\n";
 
-            jointLog->info(errstream.str());
+            bool verbose{false};
+            if (verbose) {
+                jointLog->info(errstream.str());
+            }
             ++numDropped;
             kv.first.setValid(false);
         }
@@ -285,51 +417,251 @@ size_t markDegenerateClasses(
 
 CollapsedEMOptimizer::CollapsedEMOptimizer() {}
 
+
+bool doBootstrap(
+        std::vector<std::vector<uint32_t>>& txpGroups,
+        std::vector<std::vector<double>>& txpGroupCombinedWeights,
+        std::vector<Transcript>& transcripts,
+        Eigen::VectorXd& effLens,
+        std::vector<double>& sampleWeights,
+        uint64_t totalNumFrags,
+        uint64_t numMappedFrags,
+        double uniformTxpWeight,
+        std::atomic<uint32_t>& bsNum,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<double>&)>& writeBootstrap,
+        double relDiffTolerance,
+        uint32_t maxIter) {
+
+    uint32_t minIter = 50;
+
+    // Determine up front if we're going to use scaled counts.
+    bool useScaledCounts = !(sopt.useQuasi or sopt.allowOrphans);
+    bool useVBEM{sopt.useVBOpt};
+    size_t numClasses = txpGroups.size();
+    CollapsedEMOptimizer::SerialVecType alphas(transcripts.size(), 0.0);
+    CollapsedEMOptimizer::SerialVecType alphasPrime(transcripts.size(), 0.0);
+    CollapsedEMOptimizer::SerialVecType expTheta(transcripts.size(), 0.0);
+    std::vector<uint64_t> sampCounts(numClasses, 0);
+
+    uint32_t numBootstraps = sopt.numBootstraps;
+
+    auto& jointLog = sopt.jointLog;
+
+    std::random_device rd;
+    MultinomialSampler msamp(rd);
+
+    while (bsNum++ < numBootstraps) {
+        // Do a new bootstrap
+        msamp(sampCounts.begin(), totalNumFrags, numClasses, sampleWeights.begin());
+
+	double totalLen{0.0};
+        for (size_t i = 0; i < transcripts.size(); ++i) {
+            alphas[i] = transcripts[i].getActive() ? uniformTxpWeight * totalNumFrags : 0.0;
+            totalLen += effLens(i);
+        }
+
+        bool converged{false};
+        double maxRelDiff = -std::numeric_limits<double>::max();
+        size_t itNum = 0;
+
+        // If we use VBEM, we'll need the prior parameters
+        double priorAlpha = 0.01;
+        double minAlpha = 1e-8;
+        double alphaCheckCutoff = 1e-2;
+        double cutoff = (useVBEM) ? (priorAlpha + minAlpha) : minAlpha;
+
+        while (itNum < minIter or (itNum < maxIter and !converged)) {
+
+            if (useVBEM) {
+                VBEMUpdate_(txpGroups, txpGroupCombinedWeights, sampCounts, transcripts,
+                        effLens, priorAlpha, totalLen, alphas, alphasPrime, expTheta);
+            } else {
+                EMUpdate_(txpGroups, txpGroupCombinedWeights, sampCounts, transcripts,
+                        effLens, alphas, alphasPrime);
+            }
+
+            converged = true;
+            maxRelDiff = -std::numeric_limits<double>::max();
+            for (size_t i = 0; i < transcripts.size(); ++i) {
+                if (alphasPrime[i] > alphaCheckCutoff) {
+                    double relDiff = std::abs(alphas[i] - alphasPrime[i]) / alphasPrime[i];
+                    maxRelDiff = (relDiff > maxRelDiff) ? relDiff : maxRelDiff;
+                    if (relDiff > relDiffTolerance) {
+                        converged = false;
+                    }
+                }
+                alphas[i] = alphasPrime[i];
+                alphasPrime[i] = 0.0;
+            }
+
+            ++itNum;
+        }
+
+        double alphaSum = truncateCountVector(alphas, cutoff);
+
+        if (alphaSum < minWeight) {
+            jointLog->error("Total alpha weight was too small! "
+                    "Make sure you ran salmon correclty.");
+            return false;
+        }
+
+        if (useScaledCounts) {
+            double mappedFragsDouble = static_cast<double>(numMappedFrags);
+            double alphaSum = 0.0;
+            for (auto a : alphas) { alphaSum += a; }
+            if (alphaSum > ::minWeight) {
+                double scaleFrac = 1.0 / alphaSum;
+                // scaleFrac converts alpha to nucleotide fraction,
+                // and multiplying by numMappedFrags scales by the total
+                // number of mapped fragments to provide an estimated count.
+                for (auto& a : alphas) { a = mappedFragsDouble * (a * scaleFrac); }
+            } else { // This shouldn't happen!
+                sopt.jointLog->error("Bootstrap had insufficient number of fragments!"
+                                     "Something is probably wrong; please check that you "
+                                     "have run salmon correctly and report this to GitHub.");
+            }
+        }
+        writeBootstrap(alphas);
+    }
+    return true;
+}
+
 template <typename ExpT>
-bool CollapsedEMOptimizer::optimize(ExpT& readExp,
+bool CollapsedEMOptimizer::gatherBootstraps(
+        ExpT& readExp,
         SalmonOpts& sopt,
+        std::function<bool(const std::vector<double>&)>& writeBootstrap,
         double relDiffTolerance,
         uint32_t maxIter) {
 
-    tbb::task_scheduler_init tbbScheduler(sopt.numThreads);
     std::vector<Transcript>& transcripts = readExp.transcripts();
-
-    using VecT = CollapsedEMOptimizer::VecType;
+    using VecT = CollapsedEMOptimizer::SerialVecType;
     // With atomics
-    VecType alphas(transcripts.size(), 0.0);
-    VecType alphasPrime(transcripts.size(), 0.0);
-    VecType expTheta(transcripts.size());
+    VecT alphas(transcripts.size(), 0.0);
+    VecT alphasPrime(transcripts.size(), 0.0);
+    VecT expTheta(transcripts.size());
     Eigen::VectorXd effLens(transcripts.size());
 
+    bool scaleCounts = (!sopt.useQuasi and !sopt.allowOrphans);
+
+    auto& fragStartDists = readExp.fragmentStartPositionDistributions();
+    uint64_t numMappedFrags = scaleCounts ? readExp.upperBoundHits() : readExp.numMappedFragments();
+
+    uint32_t numBootstraps = sopt.numBootstraps;
+
     std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
         readExp.equivalenceClassBuilder().eqVec();
 
+    std::unordered_set<uint32_t> activeTranscriptIDs;
+    for (auto& kv : eqVec) {
+        auto& tg = kv.first;
+        for (auto& t : tg.txps) {
+            transcripts[t].setActive();
+            activeTranscriptIDs.insert(t);
+        }
+    }
+
     bool useVBEM{sopt.useVBOpt};
     // If we use VBEM, we'll need the prior parameters
     double priorAlpha = 0.01;
 
     auto jointLog = sopt.jointLog;
 
-    double totalNumFrags{static_cast<double>(readExp.numMappedReads())};
+    jointLog->info("Will draw {} bootstrap samples", numBootstraps);
+    jointLog->info("Optimizing over {} equivalence classes", eqVec.size());
+
+    double totalNumFrags{static_cast<double>(readExp.numMappedFragments())};
     double totalLen{0.0};
 
+    if (activeTranscriptIDs.size() == 0) {
+        jointLog->error("It seems that no transcripts are expressed; something is likely wrong!");
+        std::exit(1);
+    }
+
+    double scale = 1.0 / activeTranscriptIDs.size();
     for (size_t i = 0; i < transcripts.size(); ++i) {
-        double m = transcripts[i].mass(false);
-        if (std::isnan(m)) {
-            std::cerr << "FOUND NAN for txp " << i << "\n";
-        }
-        alphas[i] = (m == salmon::math::LOG_0) ? 0.0 : m;
-        effLens(i) = std::exp(transcripts[i].getCachedEffectiveLength());
+        //double m = transcripts[i].mass(false);
+        alphas[i] = transcripts[i].getActive() ? scale * totalNumFrags : 0.0;
+        effLens(i) = (sopt.noEffectiveLengthCorrection) ?
+                      transcripts[i].RefLength :
+					  std::exp(transcripts[i].getCachedLogEffectiveLength());
         totalLen += effLens(i);
     }
 
-    // If the user requested *not* to use "rich" equivalence classes,
-    // then wipe out all of the weight information here and simply replace
-    // the weights with the effective length terms (here, the *inverse* of
-    // the effective length).
-    if (sopt.noRichEqClasses) {
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
-                [&eqVec, &effLens]( const BlockedIndexRange& range) -> void {
+    auto numRemoved = markDegenerateClasses(eqVec, alphas, effLens, sopt.jointLog);
+    sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
+            numRemoved);
+
+    size_t itNum{0};
+    double minAlpha = 1e-8;
+    double cutoff = (useVBEM) ? (priorAlpha + minAlpha) : minAlpha;
+
+    // Since we will use the same weights and transcript groups for each
+    // of the bootstrap samples (only the count vector will change), it
+    // makes sense to keep only one copy of these.
+    using TGroupLabelT = std::vector<uint32_t>;
+    using TGroupWeightVec = std::vector<double>;
+    std::vector<TGroupLabelT> txpGroups;
+    std::vector<TGroupWeightVec> txpGroupCombinedWeights;
+    std::vector<uint64_t> origCounts;
+    uint64_t totalCount{0};
+
+    for (auto& kv : eqVec) {
+        uint64_t count = kv.second.count;
+        // for each transcript in this class
+        const TranscriptGroup& tgroup = kv.first;
+        if (tgroup.valid) {
+            const std::vector<uint32_t>& txps = tgroup.txps;
+            const auto& auxs = kv.second.combinedWeights;
+            txpGroups.push_back(txps);
+	    // Convert to non-atomic
+            txpGroupCombinedWeights.emplace_back(auxs.begin(), auxs.end());
+            origCounts.push_back(count);
+            totalCount += count;
+        }
+    }
+
+    double floatCount = totalCount;
+    std::vector<double> samplingWeights(txpGroups.size(), 0.0);
+    for (size_t i = 0; i < origCounts.size(); ++i) {
+        samplingWeights[i] = origCounts[i] / floatCount;
+    }
+
+    size_t numWorkerThreads{1};
+    if (sopt.numThreads > 1 and numBootstraps > 1) {
+        numWorkerThreads = std::min(sopt.numThreads - 1, numBootstraps - 1);
+    }
+
+    std::atomic<uint32_t> bsCounter{0};
+    std::vector<std::thread> workerThreads;
+    for (size_t tn = 0; tn < numWorkerThreads; ++tn) {
+        workerThreads.emplace_back(doBootstrap,
+                std::ref(txpGroups),
+                std::ref(txpGroupCombinedWeights),
+                std::ref(transcripts),
+                std::ref(effLens),
+                std::ref(samplingWeights),
+                totalCount,
+                numMappedFrags,
+                scale,
+                std::ref(bsCounter),
+                std::ref(sopt),
+                std::ref(writeBootstrap),
+                relDiffTolerance,
+                maxIter);
+    }
+
+    for (auto& t : workerThreads) {
+        t.join();
+    }
+    return true;
+}
+
+void updateEqClassWeights(std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
+                          Eigen::VectorXd& posWeightInvDenoms) {
+    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
+            [&eqVec, &posWeightInvDenoms]( const BlockedIndexRange& range) -> void {
                 // For each index in the equivalence class vector
                 for (auto eqID : boost::irange(range.begin(), range.end())) {
                     // The vector entry
@@ -343,29 +675,185 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
 
                     // Iterate over each weight and set it equal to
                     // 1 / effLen of the corresponding transcript
+                    double wsum{0.0};
                     for (size_t i = 0; i < classSize; ++i) {
-                        double el = effLens(k.txps[i]);
-                        v.weights[i] = (el <= 0.0) ? 1.0 : (1.0 / el);
+		      auto tid = k.txps[i]; 
+                      v.combinedWeights[i] = kv.second.count * (v.weights[i] * v.posWeights[i] * posWeightInvDenoms[tid]); 
+                      wsum += v.combinedWeights[i];
+                    }
+                    double wnorm = 1.0 / wsum;
+                    for (size_t i = 0; i < classSize; ++i) {
+                        v.combinedWeights[i] *= wnorm;
                     }
                 }
-        });
+            });
+}
+
+template <typename ExpT>
+bool CollapsedEMOptimizer::optimize(ExpT& readExp,
+        SalmonOpts& sopt,
+        double relDiffTolerance,
+        uint32_t maxIter) {
+
+    tbb::task_scheduler_init tbbScheduler(sopt.numThreads);
+    std::vector<Transcript>& transcripts = readExp.transcripts();
+
+    uint32_t minIter = 50;
+    bool doBiasCorrect = sopt.biasCorrect;
+    auto& expectedDist = readExp.expectedBias();
+
+    using VecT = CollapsedEMOptimizer::VecType;
+    // With atomics
+    VecType alphas(transcripts.size(), 0.0);
+    VecType alphasPrime(transcripts.size(), 0.0);
+    VecType expTheta(transcripts.size());
+
+    Eigen::VectorXd effLens(transcripts.size());
+    Eigen::VectorXd posWeightInvDenoms(transcripts.size());
+
+    std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
+        readExp.equivalenceClassBuilder().eqVec();
+
+    bool noRichEq = sopt.noRichEqClasses;
+    bool useFSPD{sopt.useFSPD};
+    bool useVBEM{sopt.useVBOpt};
+
+    // If we use VBEM, we'll need the prior parameters
+    double priorAlpha = 0.01;
+
+    auto jointLog = sopt.jointLog;
+
+    auto& fragStartDists = readExp.fragmentStartPositionDistributions();
+    double totalNumFrags{static_cast<double>(readExp.numMappedFragments())};
+    double totalLen{0.0};
+
+    // If effective length correction isn't turned off, then use effective
+    // lengths rather than reference lengths.
+    bool useEffectiveLengths = !sopt.noEffectiveLengthCorrection;
+
+    double uniformPrior = 1.0 / transcripts.size();
+
+    for (size_t i = 0; i < transcripts.size(); ++i) {
+        auto& txp = transcripts[i];
+	alphas[i] = txp.projectedCounts;
+        effLens(i) = useEffectiveLengths ? std::exp(txp.getCachedLogEffectiveLength()) : txp.RefLength;
+        txp.EffectiveLength = effLens(i);
+
+	if (noRichEq or !useFSPD) {
+	  posWeightInvDenoms(i) = 1.0;
+	} else {
+	  auto& fragStartDist = fragStartDists[txp.lengthClassIndex()];
+	  double denomFactor = fragStartDist.evalCDF(static_cast<int32_t>(txp.EffectiveLength), txp.RefLength);
+	  posWeightInvDenoms(i) = (denomFactor >= salmon::math::LOG_EPSILON) ? 
+	    std::exp(-denomFactor) : (1e-5);
+	}
+
+        totalLen += effLens(i);
     }
 
-    auto numRemoved = markDegenerateClasses(eqVec, alphas, sopt.jointLog);
+    // If the user requested *not* to use "rich" equivalence classes,
+    // then wipe out all of the weight information here and simply replace
+    // the weights with the effective length terms (here, the *inverse* of
+    // the effective length).  Otherwise, multiply the existing weight terms
+    // by the effective length term.
+    tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(eqVec.size())),
+            [&eqVec, &effLens, &posWeightInvDenoms, useFSPD, noRichEq]( const BlockedIndexRange& range) -> void {
+            // For each index in the equivalence class vector
+            for (auto eqID : boost::irange(range.begin(), range.end())) {
+                // The vector entry
+                auto& kv = eqVec[eqID];
+                // The label of the equivalence class
+                const TranscriptGroup& k = kv.first;
+                // The size of the label
+                size_t classSize = k.txps.size();
+                // The weights of the label
+                TGValue& v = kv.second;
+
+                // Iterate over each weight and set it
+                double wsum{0.0};
+
+		// If we don't have positional weights, then 
+		// create them here.
+		bool createdPosWeights{false};
+		if (v.weights.size() != v.posWeights.size()) {
+		  createdPosWeights = true;
+		  v.posWeights = std::vector<tbb::atomic<double>>(v.weights.size()); 
+		}
+
+                for (size_t i = 0; i < classSize; ++i) {
+        	    auto tid = k.txps[i];
+                    double el = effLens(tid);
+                    if (el <= 1.0) { el = 1.0; }
+                    if (noRichEq) {
+                        // Keep length factor separate for the time being
+                        v.weights[i] = 1.0;
+			// Pos weight
+			v.posWeights[i] = 1.0 / el;	
+                    } else if (createdPosWeights or !useFSPD) {
+		    // If the positional weights are new, then give them 
+		    // meaningful values.
+			v.posWeights[i] = 1.0 / el;	
+		    } 
+
+		    // combined weight
+		    v.combinedWeights.push_back( 
+			v.weights[i].load() * (v.posWeights[i].load() * posWeightInvDenoms[tid]));
+		    wsum += v.combinedWeights.back();
+                }
+
+                double wnorm = 1.0 / wsum;
+                for (size_t i = 0; i < classSize; ++i) {
+                  v.combinedWeights[i] = v.combinedWeights[i] * wnorm;
+                }
+            }
+    });
+
+    auto numRemoved = markDegenerateClasses(eqVec, alphas, effLens, sopt.jointLog);
     sopt.jointLog->info("Marked {} weighted equivalence classes as degenerate",
             numRemoved);
 
     size_t itNum{0};
     double minAlpha = 1e-8;
+    double alphaCheckCutoff = 1e-2;
     double cutoff = (useVBEM) ? (priorAlpha + minAlpha) : minAlpha;
 
+    // Iterations in which we will allow re-computing the effective lengths
+    // if bias-correction is enabled.
+    std::vector<uint32_t> recomputeIt{50, 500, 1000};
 
     bool converged{false};
     double maxRelDiff = -std::numeric_limits<double>::max();
-    while (itNum < maxIter and !converged) {
+    while (itNum < minIter or (itNum < maxIter and !converged)) {
+        if (doBiasCorrect and
+            (find(recomputeIt.begin(), recomputeIt.end(), itNum) != recomputeIt.end())) {
+
+            jointLog->info("iteration {}, recomputing effective lengths", itNum);
+            effLens = salmon::utils::updateEffectiveLengths(
+                    readExp,
+                    effLens,
+                    alphas,
+                    expectedDist
+                    );
+            // Check for strangeness with the lengths.
+            for (size_t i = 0; i < effLens.size(); ++i) {
+                if (effLens(i) <= 0.0) {
+                    jointLog->warn("Transcript {} had length {}", i, effLens(i));
+                }
+		if (noRichEq or !useFSPD) {
+		  posWeightInvDenoms(i) = 1.0;
+		} else {
+		  auto& txp = transcripts[i];
+		  auto& fragStartDist = fragStartDists[txp.lengthClassIndex()];
+		  double denomFactor = fragStartDist.evalCDF(static_cast<int32_t>(effLens(i)), txp.RefLength);
+		  posWeightInvDenoms(i) = (denomFactor >= salmon::math::LOG_EPSILON) ? 
+		    std::exp(-denomFactor) : 1e-5;
+		}
+            }
+	   updateEqClassWeights(eqVec, posWeightInvDenoms);
+        }
 
         if (useVBEM) {
-            VBEMUpdate_(eqVec, transcripts, effLens,
+            VBEMUpdate_(eqVec, transcripts, effLens, 
                         priorAlpha, totalLen, alphas, alphasPrime, expTheta);
         } else {
             EMUpdate_(eqVec, transcripts, effLens, alphas, alphasPrime);
@@ -374,7 +862,7 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
         converged = true;
         maxRelDiff = -std::numeric_limits<double>::max();
         for (size_t i = 0; i < transcripts.size(); ++i) {
-            if (alphas[i] > cutoff) {
+            if (alphasPrime[i] > alphaCheckCutoff) {
                 double relDiff = std::abs(alphas[i] - alphasPrime[i]) / alphasPrime[i];
                 maxRelDiff = (relDiff > maxRelDiff) ? relDiff : maxRelDiff;
                 if (relDiff > relDiffTolerance) {
@@ -397,14 +885,7 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
                     itNum, maxRelDiff);
 
     // Truncate tiny expression values
-    double alphaSum = 0.0;
-
-    alphaSum = 0.0;
-    for (size_t i = 0; i < alphas.size(); ++i) {
-      if (alphas[i] <= cutoff) { alphas[i] = 0.0; }
-      alphaSum += alphas[i];
-    }
-
+    double alphaSum = truncateCountVector(alphas, cutoff);
 
     if (alphaSum < minWeight) {
         jointLog->error("Total alpha weight was too small! "
@@ -417,9 +898,11 @@ bool CollapsedEMOptimizer::optimize(ExpT& readExp,
     for (size_t i = 0; i < transcripts.size(); ++i) {
         // Set the mass to the normalized (after truncation)
         // relative abundance
+        // If we changed the effective lengths, copy them over here
+        if (doBiasCorrect) { transcripts[i].EffectiveLength = effLens(i); }
+        transcripts[i].setSharedCount(alphas[i]);
         transcripts[i].setMass(alphas[i] / alphaSum);
     }
-
     return true;
 }
 
@@ -445,5 +928,31 @@ bool CollapsedEMOptimizer::optimize<AlignmentLibrary<ReadPair>>(
         uint32_t maxIter);
 
 
+template
+bool CollapsedEMOptimizer::gatherBootstraps<ReadExperiment>(
+        ReadExperiment& readExp,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<double>&)>& writeBootstrap,
+        double relDiffTolerance,
+        uint32_t maxIter);
+
+
+template
+bool CollapsedEMOptimizer::gatherBootstraps<AlignmentLibrary<UnpairedRead>>(
+        AlignmentLibrary<UnpairedRead>& readExp,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<double>&)>& writeBootstrap,
+        double relDiffTolerance,
+        uint32_t maxIter);
+
+
+template
+bool CollapsedEMOptimizer::gatherBootstraps<AlignmentLibrary<ReadPair>>(
+        AlignmentLibrary<ReadPair>& readExp,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<double>&)>& writeBootstrap,
+        double relDiffTolerance,
+        uint32_t maxIter);
+
 // Unused / old
 
diff --git a/src/CollapsedGibbsSampler.cpp b/src/CollapsedGibbsSampler.cpp
index 48d3567..56aa007 100644
--- a/src/CollapsedGibbsSampler.cpp
+++ b/src/CollapsedGibbsSampler.cpp
@@ -15,7 +15,7 @@
 #include <boost/filesystem.hpp>
 
 // C++ string formatting library
-#include "format.h"
+#include "spdlog/details/format.h"
 
 #include "cuckoohash_map.hh"
 #include "Eigen/Dense"
@@ -29,6 +29,7 @@
 #include "UnpairedRead.hpp"
 #include "ReadExperiment.hpp"
 #include "MultinomialSampler.hpp"
+#include "BootstrapWriter.hpp"
 
 using BlockedIndexRange =  tbb::blocked_range<size_t>;
 
@@ -39,12 +40,13 @@ constexpr double minWeight = std::numeric_limits<double>::denorm_min();
 
 void initCountMap_(
         std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-	std::vector<Transcript>& transcriptsIn,
-	double priorAlpha,
+        std::vector<Transcript>& transcriptsIn,
+        double priorAlpha,
         MultinomialSampler& msamp,
-        std::vector<int>& countMap,
+        std::vector<uint64_t>& countMap,
         std::vector<double>& probMap,
-	std::vector<int>& txpCounts) {
+        Eigen::VectorXd& effLens,
+        std::vector<int>& txpCounts) {
 
     size_t offset{0};
     for (auto& eqClass : eqVec) {
@@ -55,7 +57,7 @@ void initCountMap_(
         const size_t groupSize = tgroup.txps.size();
         if (tgroup.valid) {
             const std::vector<uint32_t>& txps = tgroup.txps;
-            const std::vector<double>& auxs = eqClass.second.weights;
+            const auto& auxs = eqClass.second.combinedWeights;
 
             double denom = 0.0;
             if (BOOST_LIKELY(groupSize > 1)) {
@@ -100,8 +102,9 @@ void initCountMap_(
 
 void sampleRound_(
         std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec,
-        std::vector<int>& countMap,
+        std::vector<uint64_t>& countMap,
         std::vector<double>& probMap,
+        Eigen::VectorXd& effLens,
         double priorAlpha,
         std::vector<int>& txpCount,
         MultinomialSampler& msamp) {
@@ -113,7 +116,7 @@ void sampleRound_(
     // Choose a fraction of this class to re-sample
 
     // The count substracted from each transcript
-    std::vector<int> txpResamp;
+    std::vector<uint64_t> txpResamp;
 
     for (auto& eqClass : eqVec) {
         uint64_t classCount = eqClass.second.count;
@@ -124,64 +127,64 @@ void sampleRound_(
         const size_t groupSize = tgroup.txps.size();
         if (tgroup.valid) {
             const std::vector<uint32_t>& txps = tgroup.txps;
-            const std::vector<double>& auxs = eqClass.second.weights;
+            const auto& auxs = eqClass.second.combinedWeights;
 
             double denom = 0.0;
             // If this is a single-transcript group,
             // then it gets the full count --- otherwise,
-	    // sample!
+            // sample!
             if (BOOST_LIKELY(groupSize > 1)) {
 
                 // Subtract some fraction of the current equivalence
                 // class' contribution from each transcript.
-		uint64_t numResampled{0};
-		if (groupSize > txpResamp.size()) {
-			txpResamp.resize(groupSize, 0);
-		}
+                uint64_t numResampled{0};
+                if (groupSize > txpResamp.size()) {
+                    txpResamp.resize(groupSize, 0);
+                }
 
-		// For each transcript in the group
+                // For each transcript in the group
                 for (size_t i = 0; i < groupSize; ++i) {
                     auto tid = txps[i];
                     auto aux = auxs[i];
                     auto currCount = countMap[offset + i];
-		    uint64_t currResamp = std::round(sampleFrac * currCount);
-		    numResampled += currResamp;
-		    txpResamp[i] = currResamp;
+                    uint64_t currResamp = std::round(sampleFrac * currCount);
+                    numResampled += currResamp;
+                    txpResamp[i] = currResamp;
                     txpCount[tid] -= currResamp;
                     countMap[offset + i] -= currResamp;
                     denom += (priorAlpha + txpCount[tid]) * aux;
                 }
 
-		if (denom > ::minEQClassWeight) {
-			// Get the multinomial probabilities
-			double norm = 1.0 / denom;
-			for (size_t i = 0; i < groupSize; ++i) {
-			    auto tid = txps[i];
-			    auto aux = auxs[i];
-			    probMap[offset + i] = norm * ((priorAlpha + txpCount[tid]) * aux);
-			}
-
-			// re-sample
-			msamp(txpResamp.begin(),        // count array to fill in
-			      numResampled,		// multinomial n
-			      groupSize,		// multinomial k
-			      probMap.begin() + offset  // where to find multinomial probs
-			      );
-
-			for (size_t i = 0; i < groupSize; ++i) {
-				auto tid = txps[i];
-				countMap[offset + i] += txpResamp[i];
-				txpCount[tid] += txpResamp[i];
-			}
-
-		} else { // We didn't sample
-			// add back to txp-count!
-			for (size_t i = 0; i < groupSize; ++i) {
-			    auto tid = txps[i];
-			    txpCount[tid] += txpResamp[i];
-			    countMap[offset + i] += txpResamp[i];
-			}
-		}
+                if (denom > ::minEQClassWeight) {
+                    // Get the multinomial probabilities
+                    double norm = 1.0 / denom;
+                    for (size_t i = 0; i < groupSize; ++i) {
+                        auto tid = txps[i];
+                        auto aux = auxs[i];
+                        probMap[offset + i] = norm * ((priorAlpha + txpCount[tid]) * aux);
+                    }
+
+                    // re-sample
+                    msamp(txpResamp.begin(),        // count array to fill in
+                            numResampled,		// multinomial n
+                            groupSize,		// multinomial k
+                            probMap.begin() + offset  // where to find multinomial probs
+                         );
+
+                    for (size_t i = 0; i < groupSize; ++i) {
+                        auto tid = txps[i];
+                        countMap[offset + i] += txpResamp[i];
+                        txpCount[tid] += txpResamp[i];
+                    }
+
+                } else { // We didn't sample
+                    // add back to txp-count!
+                    for (size_t i = 0; i < groupSize; ++i) {
+                        auto tid = txps[i];
+                        txpCount[tid] += txpResamp[i];
+                        countMap[offset + i] += txpResamp[i];
+                    }
+                }
             }
 
             offset += groupSize;
@@ -203,12 +206,17 @@ class DistStats {
 template <typename ExpT>
 bool CollapsedGibbsSampler::sample(ExpT& readExp,
         SalmonOpts& sopt,
+        std::function<bool(const std::vector<int>&)>& writeBootstrap,
         uint32_t numSamples) {
 
     namespace bfs = boost::filesystem;
+    auto& jointLog = sopt.jointLog;
     tbb::task_scheduler_init tbbScheduler(sopt.numThreads);
     std::vector<Transcript>& transcripts = readExp.transcripts();
 
+    // Fill in the effective length vector
+    Eigen::VectorXd effLens(transcripts.size());
+
     std::vector<std::pair<const TranscriptGroup, TGValue>>& eqVec =
         readExp.equivalenceClassBuilder().eqVec();
 
@@ -217,16 +225,20 @@ bool CollapsedGibbsSampler::sample(ExpT& readExp,
     std::vector<std::vector<int>> allSamples(numSamples,
                                         std::vector<int>(transcripts.size(),0));
     double priorAlpha = 1e-8;
-    auto numMappedReads = readExp.numMappedReads();
+    bool useScaledCounts = (!sopt.useQuasi and !sopt.allowOrphans);
+    auto numMappedFragments = (useScaledCounts) ? readExp.upperBoundHits() : readExp.numMappedFragments();
 
 
-    for (auto& txp : transcripts) {
-        txp.setMass(priorAlpha + (txp.mass(false) * numMappedReads));
+    for (size_t i = 0; i < transcripts.size(); ++i) {
+        auto& txp = transcripts[i];
+        txp.setMass(priorAlpha + (txp.mass(false) * numMappedFragments));
+        effLens(i) = txp.EffectiveLength;
     }
 
     tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(numSamples)),
-                [&eqVec, &transcripts, priorAlpha,
-                 &allSamples]( const BlockedIndexRange& range) -> void {
+                [&eqVec, &transcripts, priorAlpha, &effLens,
+                 &allSamples, &writeBootstrap, useScaledCounts,
+                 &jointLog, numMappedFragments]( const BlockedIndexRange& range) -> void {
 
 
                 std::random_device rd;
@@ -235,40 +247,104 @@ bool CollapsedGibbsSampler::sample(ExpT& readExp,
                 size_t countMapSize{0};
                 for (size_t i = 0; i < eqVec.size(); ++i) {
                     if (eqVec[i].first.valid) {
-                        countMapSize += eqVec[i].first.txps.size();
+                    countMapSize += eqVec[i].first.txps.size();
                     }
                 }
 
-                std::vector<int> countMap(countMapSize, 0);
+                size_t numTranscripts{transcripts.size()};
+
+                // will hold estimated counts
+                std::vector<int> alphas(numTranscripts, 0.0);
+                std::vector<uint64_t> countMap(countMapSize, 0);
                 std::vector<double> probMap(countMapSize, 0.0);
 
-                initCountMap_(eqVec, transcripts, priorAlpha, ms, countMap, probMap, allSamples[range.begin()]);
+                initCountMap_(eqVec, transcripts, priorAlpha, ms, countMap, probMap, effLens, allSamples[range.begin()]);
 
                 // For each sample this thread should generate
                 bool isFirstSample{true};
-		bool numInternalRounds = 10;
+                bool numInternalRounds = 10;
                 for (auto sampleID : boost::irange(range.begin(), range.end())) {
                     if (sampleID % 100 == 0) {
                         std::cerr << "gibbs sampling " << sampleID << "\n";
                     }
+
                     if (!isFirstSample) {
                         // the counts start at what they were last round.
                         allSamples[sampleID] = allSamples[sampleID-1];
                     }
-		    for (size_t i = 0; i < numInternalRounds; ++i){
-			    sampleRound_(eqVec, countMap, probMap, priorAlpha,
-					 allSamples[sampleID], ms);
-		    }
+
+                    // Thin the chain by a factor of (numInternalRounds)
+                    for (size_t i = 0; i < numInternalRounds; ++i){
+                        sampleRound_(eqVec, countMap, probMap, effLens, priorAlpha,
+                                allSamples[sampleID], ms);
+                    }
+
+                    // If we're scaling the counts, do it here.
+                    if (useScaledCounts) {
+                        double numMappedFrags = static_cast<double>(numMappedFragments);
+                        double alphaSum = 0.0;
+                        for (auto c : allSamples[sampleID]) { alphaSum += static_cast<double>(c); }
+                        if (alphaSum > ::minWeight) {
+                            double scaleFrac = 1.0 / alphaSum;
+                            // scaleFrac converts alpha to nucleotide fraction,
+                            // and multiplying by numMappedFrags scales by the total
+                            // number of mapped fragments to provide an estimated count.
+                            for (size_t tn = 0; tn < numTranscripts; ++tn) {
+                                alphas[tn] = static_cast<int>(
+                                        std::round(
+                                            numMappedFrags *
+                                            (static_cast<double>(allSamples[sampleID][tn]) * scaleFrac)));
+                            }
+                        } else { // This shouldn't happen!
+                            jointLog->error("Gibbs sampler had insufficient number of fragments!"
+                                    "Something is probably wrong; please check that you "
+                                    "have run salmon correctly and report this to GitHub.");
+                        }
+                    } else { // otherwise, just copy over from the sampled counts
+                        for (size_t tn = 0; tn < numTranscripts; ++tn) {
+                            alphas[tn] = static_cast<int>(allSamples[sampleID][tn]);
+                        }
+                    }
+
+                    writeBootstrap(alphas);
+                    //bootstrapWriter->writeBootstrap(alphas);
                     isFirstSample = false;
                 }
     });
+    return true;
+}
+
+template
+bool CollapsedGibbsSampler::sample<ReadExperiment>(ReadExperiment& readExp,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<int>&)>& writeBootstrap,
+        uint32_t maxIter);
+
+template
+bool CollapsedGibbsSampler::sample<AlignmentLibrary<UnpairedRead>>(
+        AlignmentLibrary<UnpairedRead>& readExp,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<int>&)>& writeBootstrap,
+        uint32_t maxIter);
+
+
+template
+bool CollapsedGibbsSampler::sample<AlignmentLibrary<ReadPair>>(
+        AlignmentLibrary<ReadPair>& readExp,
+        SalmonOpts& sopt,
+        std::function<bool(const std::vector<int>&)>& writeBootstrap,
+        uint32_t maxIter);
+
 
+
+/*
+    // Deprecated Gibbs output code
     auto numTranscripts = transcripts.size();
     std::vector<DistStats> ds(numTranscripts);
 
     // get posterior means
     tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(numTranscripts)),
-                [&allSamples, &transcripts, &ds, numMappedReads,
+                [&allSamples, &transcripts, &ds, numMappedFragments,
                  numSamples]( const BlockedIndexRange& range) -> void {
 
                 // For each sample this thread should generate
@@ -280,7 +356,7 @@ bool CollapsedGibbsSampler::sample(ExpT& readExp,
                       if (val > ds[tid].maxVal) { ds[tid].maxVal = val; }
                       meanNumReads += (1.0 / numSamples) * val;
                     }
-		    ds[tid].meanVal = meanNumReads;
+        		    ds[tid].meanVal = meanNumReads;
                     transcripts[tid].setMass(ds[tid].meanVal);
                 }
     });
@@ -295,11 +371,6 @@ bool CollapsedGibbsSampler::sample(ExpT& readExp,
 	    statStream << transcripts[i].RefName;
         for (size_t s = 0; s < allSamples.size(); ++s) {
             statStream << '\t' << allSamples[s][i];
-            /*
-		   << ds[i].meanVal << '\t'
-		   << ds[i].minVal << '\t'
-		   << ds[i].maxVal << '\n';
-           */
         }
         statStream << '\n';
     }
@@ -322,24 +393,4 @@ bool CollapsedGibbsSampler::sample(ExpT& readExp,
         transcripts[i].setMass(transcripts[i].mass(false) / txpSumTrunc);
     }
 
-    return true;
-}
-
-template
-bool CollapsedGibbsSampler::sample<ReadExperiment>(ReadExperiment& readExp,
-        SalmonOpts& sopt,
-        uint32_t maxIter);
-
-template
-bool CollapsedGibbsSampler::sample<AlignmentLibrary<UnpairedRead>>(
-        AlignmentLibrary<UnpairedRead>& readExp,
-        SalmonOpts& sopt,
-        uint32_t maxIter);
-
-
-template
-bool CollapsedGibbsSampler::sample<AlignmentLibrary<ReadPair>>(
-        AlignmentLibrary<ReadPair>& readExp,
-        SalmonOpts& sopt,
-        uint32_t maxIter);
-
+*/
diff --git a/src/ComputeBiasFeatures.cpp b/src/ComputeBiasFeatures.cpp
deleted file mode 100644
index 175587f..0000000
--- a/src/ComputeBiasFeatures.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-
-/**
->HEADER
-    Copyright (c) 2013 Rob Patro robp at cs.cmu.edu
-
-    This file is part of Salmon.
-
-    Salmon is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Salmon is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Salmon.  If not, see <http://www.gnu.org/licenses/>.
-<HEADER
-**/
-
-
-#include <boost/thread/thread.hpp>
-
-#include <iostream>
-#include <vector>
-#include <array>
-#include <atomic>
-#include <thread>
-
-#include "jellyfish/stream_manager.hpp"
-#include "jellyfish/whole_sequence_parser.hpp"
-#include "jellyfish/mer_dna.hpp"
-
-#include "tbb/concurrent_queue.h"
-
-#include <boost/range/irange.hpp>
-#include <boost/filesystem.hpp>
-
-#include "CommonTypes.hpp"
-
-// holding 2-mers as a uint64_t is a waste of space,
-// but using Jellyfish makes life so much easier, so
-// we'll live with it for now.
-using Kmer = uint64_t;
-using Sailfish::TranscriptFeatures;
-namespace bfs = boost::filesystem;
-
-template <typename ParserT>
-bool computeBiasFeaturesHelper(ParserT& parser,
-                               tbb::concurrent_bounded_queue<TranscriptFeatures>& featQueue,
-                               size_t& numComplete, size_t numThreads) {
-
-    using stream_manager = jellyfish::stream_manager<char**>;
-    using sequence_parser = jellyfish::whole_sequence_parser<stream_manager>;
-
-    size_t merLen = 2;
-    Kmer lshift(2 * (merLen - 1));
-    Kmer masq((1UL << (2 * merLen)) - 1);
-    std::atomic<size_t> readNum{0};
-
-    size_t numActors = numThreads;
-    std::vector<std::thread> threads;
-    auto tstart = std::chrono::steady_clock::now();
-
-    for (auto i : boost::irange(size_t{0}, numActors)) {
-        threads.push_back(std::thread(
-	        [&featQueue, &numComplete, &parser, &readNum, &tstart, lshift, masq, merLen, numActors]() -> void {
-
-                size_t cmlen, numKmers;
-                jellyfish::mer_dna_ns::mer_base_dynamic<uint64_t> kmer(merLen);
-
-                // while there are transcripts left to process
-                while (true) { //producer.nextRead(s)) {
-                    sequence_parser::job j(parser);
-                    // If this job is empty, then we're done
-                    if (j.is_empty()) { return; }
-
-                    for (size_t i=0; i < j->nb_filled; ++i) {
-                        ++readNum;
-                        if (readNum % 100 == 0) {
-                            auto tend = std::chrono::steady_clock::now();
-                            auto sec = std::chrono::duration_cast<std::chrono::seconds>(tend-tstart);
-                            auto nsec = sec.count();
-                            auto rate = (nsec > 0) ? readNum / sec.count() : 0;
-                            std::cerr << "processed " << readNum << " transcripts (" << rate << ") transcripts/s\r\r";
-                        }
-
-                        // we iterate over the entire read
-                        const char* start     = j->data[i].seq.c_str();
-                        uint32_t readLen      = j->data[i].seq.size();
-                        const char* const end = start + readLen;
-
-                        TranscriptFeatures tfeat{};
-
-                        // reset all of the counts
-                        numKmers = 0;
-                        cmlen = 0;
-                        kmer.polyA();
-
-                        // the maximum number of kmers we'd have to store
-                        uint32_t maxNumKmers = (readLen >= merLen) ? readLen - merLen + 1 : 0;
-                        if (maxNumKmers == 0) { featQueue.push(tfeat); continue; }
-
-                        // The transcript name
-                        std::string fullHeader(j->data[i].header);
-                        tfeat.name = fullHeader.substr(0, fullHeader.find(' '));
-                        tfeat.length = readLen;
-                        auto nfact = 1.0 / readLen;
-
-                        // iterate over the read base-by-base
-                        size_t offset{0};
-                        size_t numChars{j->data[i].seq.size()};
-                        while (offset < numChars) {
-                            auto c = jellyfish::mer_dna::code(j->data[i].seq[offset]);
-                            kmer.shift_left(c);
-                            if (jellyfish::mer_dna::not_dna(c)) {
-                                cmlen = 0;
-                                ++offset;
-                                continue;
-                            }
-                            if (++cmlen >= merLen) {
-                                size_t twomer = kmer.get_bits(0, 2*merLen);
-                                tfeat.diNucleotides[twomer]++;
-                                switch(c) {
-                                    case jellyfish::mer_dna::CODE_G:
-                                    case jellyfish::mer_dna::CODE_C:
-                                        tfeat.gcContent += nfact;
-                                        break;
-                                }
-                            }
-                            ++offset;
-                        } // end while
-
-                        char lastBase = j->data[i].seq.back();
-                        auto c = jellyfish::mer_dna::code(lastBase);
-                        switch(c) {
-                            case jellyfish::mer_dna::CODE_G:
-                            case jellyfish::mer_dna::CODE_C:
-                                tfeat.gcContent += nfact;
-                                break;
-                        }
-
-                        featQueue.push(tfeat);
-                    } // end job
-                } // end while(true)
-            } // end lambda
-            ));
-
-        } // actor loop
-
-        for (auto& t : threads) { t.join(); ++numComplete; }
-        return true;
-}
-
-int computeBiasFeatures(
-    std::vector<std::string>& transcriptFiles,
-    bfs::path outFilePath,
-    bool useStreamingParser,
-    size_t numThreads) {
-
-    using std::string;
-    using std::vector;
-    using std::cerr;
-
-    size_t numActors = numThreads;
-    size_t numComplete = 0;
-    tbb::concurrent_bounded_queue<TranscriptFeatures> featQueue;
-
-    std::ofstream ofile(outFilePath.string());
-
-    auto outputThread = std::thread(
-         [&ofile, &numComplete, &featQueue, numActors]() -> void {
-             TranscriptFeatures tf{};
-             while( numComplete < numActors or !featQueue.empty() ) {
-                 while(featQueue.try_pop(tf)) {
-                     ofile << tf.name << '\t';
-                     ofile << tf.length << '\t';
-                     ofile << tf.gcContent << '\t';
-                     for (auto i : boost::irange(size_t{0}, tf.diNucleotides.size())) {
-                         ofile << tf.diNucleotides[i];
-                         char end = (i == tf.diNucleotides.size() - 1) ? '\n' : '\t';
-                         ofile << end;
-                     }
-                 }
-                 boost::this_thread::sleep_for(boost::chrono::milliseconds(100));
-
-             }
-             ofile.close();
-         });
-
-    for( auto rf : transcriptFiles) {
-        std::cerr << "readFile: " << rf << ", ";
-    }
-    std::cerr << "\n";
-
-    for (auto& readFile : transcriptFiles) {
-        std::cerr << "file " << readFile << ": \n";
-
-        //namespace bfs = boost::filesystem;
-        //bfs::path filePath(readFile);
-
-        char* pc = new char[readFile.size() + 1];
-        std::strcpy(pc, readFile.c_str());
-        char* fnames[] = {pc};
-
-        // Create a jellyfish parser
-        const int concurrentFile{1};
-
-        using stream_manager = jellyfish::stream_manager<char**>;
-        using sequence_parser = jellyfish::whole_sequence_parser<stream_manager>;
-        stream_manager streams(fnames, fnames + 1, concurrentFile);
-
-        size_t maxReadGroupSize{100};
-        sequence_parser parser(1*numActors, maxReadGroupSize, concurrentFile, streams);
-        computeBiasFeaturesHelper<sequence_parser>(
-                parser, featQueue, numComplete, numActors);
-        delete pc;
-    }
-
-    std::cerr << "\n";
-    outputThread.join();
-    return 0;
-}
diff --git a/src/FASTAParser.cpp b/src/FASTAParser.cpp
index 0da7e60..b5409cc 100644
--- a/src/FASTAParser.cpp
+++ b/src/FASTAParser.cpp
@@ -2,7 +2,7 @@
 #include <cstdio>
 #include <iostream>
 #include <unordered_map>
-
+#include <random>
 
 #include "jellyfish/mer_dna.hpp"
 #include "jellyfish/stream_manager.hpp"
@@ -32,6 +32,13 @@ void FASTAParser::populateTargets(std::vector<Transcript>& refs) {
     stream_manager streams(readFiles.cbegin(), readFiles.cend(), concurrentFile);
     single_parser parser(4, maxReadGroup, concurrentFile, streams);
 
+    constexpr char bases[] = {'A', 'C', 'G', 'T'};
+    // Create a random uniform distribution
+    std::random_device rd;
+    std::default_random_engine eng(rd());
+    std::uniform_int_distribution<> dis(0, 3);
+    uint64_t numNucleotidesReplaced{0};
+
     while(true) {
         typename single_parser::job j(parser); // Get a job from the parser: a bunch of read (at most max_read_group)
         if(j.is_empty()) break;           // If got nothing, quit
@@ -44,11 +51,36 @@ void FASTAParser::populateTargets(std::vector<Transcript>& refs) {
             if (it == nameToID.end()) {
                 std::cerr << "WARNING: Transcript " << name << " appears in the reference but did not appear in the BAM\n";
             } else {
-                std::string& seq = j->data[i].seq;
-                refs[it->second].Sequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), seq.size());
+	      
+	      std::string& seq = j->data[i].seq;
+              size_t readLen = seq.length();
+
+	      refs[it->second].SAMSequence = salmon::stringtools::encodeSequenceInSAM(seq.c_str(), readLen);
+	      
+	      // Replace non-ACGT bases
+	      for (size_t b = 0; b < readLen; ++b) {
+		seq[b] = ::toupper(seq[b]);
+		int c = jellyfish::mer_dna::code(seq[b]);
+		// Replace non-ACGT bases with pseudo-random bases
+		if (jellyfish::mer_dna::not_dna(c)) {
+		  char rbase = bases[dis(eng)];
+		  c = jellyfish::mer_dna::code(rbase);
+		  seq[b] = rbase;
+		  ++numNucleotidesReplaced;
+		}
+	      }
+
+	      // allocate space for the new copy
+	      char* seqCopy = new char[seq.length()+1]; 
+	      std::strcpy(seqCopy, seq.c_str()); 
+	      refs[it->second].Sequence = seqCopy;
+	      refs[it->second].freeSeqOnDestruct = true;
+	      // seqCopy will only be freed when the transcript is destructed!
             }
         }
     }
 
+    std::cerr << "replaced " << numNucleotidesReplaced << " non-ACGT nucleotides with random nucleotides\n";
+
 }
 
diff --git a/src/FragmentLengthDistribution.cpp b/src/FragmentLengthDistribution.cpp
index cb2cfc3..53e486b 100644
--- a/src/FragmentLengthDistribution.cpp
+++ b/src/FragmentLengthDistribution.cpp
@@ -84,7 +84,7 @@ void FragmentLengthDistribution::addVal(size_t len, double mass) {
     //assert(!isnan(mass));
     //assert(kernel_.size());
 
-//  len /= binSize_;
+  len /= binSize_;
 
   if (len > maxVal()) {
       len = maxVal();
@@ -126,6 +126,9 @@ void FragmentLengthDistribution::addVal(size_t len, double mass) {
   }
 }
 
+/**
+ * Returns the *LOG* probability of observing a fragment of length *len*.
+ */
 double FragmentLengthDistribution::pmf(size_t len) const {
     len /= binSize_;
     if (len > maxVal()) {
@@ -134,6 +137,24 @@ double FragmentLengthDistribution::pmf(size_t len) const {
     return hist_[len]-totMass_;
 }
 
+/**
+ * Dumps the PMF to the provided vector.
+ */
+void FragmentLengthDistribution::dumpPMF(
+        std::vector<double>& pmfOut,
+        size_t& minV,
+        size_t& maxV) const {
+
+    minV = minVal();
+    maxV = maxVal();
+    pmfOut.clear();
+    pmfOut.reserve(maxV - minV + 1);
+    for (size_t i = minV; i <= maxV; ++i) {
+        pmfOut.push_back(pmf(i));
+    }
+}
+
+
 double FragmentLengthDistribution::cmf(size_t len) const {
     if(haveCachedCMF_) {
         return cachedCMF_[len];
diff --git a/src/FragmentStartPositionDistribution.cpp b/src/FragmentStartPositionDistribution.cpp
index c9342aa..dc9a535 100644
--- a/src/FragmentStartPositionDistribution.cpp
+++ b/src/FragmentStartPositionDistribution.cpp
@@ -24,11 +24,15 @@ FragmentStartPositionDistribution::FragmentStartPositionDistribution(uint32_t nu
       pmf_(numBins+2),
       cmf_(numBins+2),
       totMass_(salmon::math::LOG_1),
-      isUpdated_(false) {
+      isUpdated_(false),
+      allowUpdates_(true),
+      performingUpdate_(0){
 
   using salmon::math::logAdd;
   double uniMass = log(1.0 / numBins_);
-  for (auto i = 1; i <= numBins_; ++i) {
+  pmf_[0] = salmon::math::LOG_0;
+  cmf_[0] = salmon::math::LOG_0;
+  for (size_t i = 1; i <= numBins_; ++i) {
     pmf_[i] = uniMass;
     cmf_[i] = log(static_cast<double>(i)) + uniMass;
   }
@@ -51,42 +55,60 @@ void FragmentStartPositionDistribution::addVal(
         uint32_t txpLen,
         double mass) {
 
-    using salmon::math::logAdd;
-    if (hitPos >= txpLen) return; // hit should happen within the transcript
-
-    if (hitPos < 0) { hitPos = 0; }
-    double logLen = log(txpLen);
+    ++performingUpdate_;
+    {
+        if (!allowUpdates_) {
+            --performingUpdate_;
+            return;
+        }
 
-    // Modified from: https://github.com/deweylab/RSEM/blob/master/RSPD.h
-    int i;
-    // Fraction along the transcript where this hit occurs
-    double a = hitPos * 1.0 / txpLen;
-    double b;
+        using salmon::math::logAdd;
+        if (hitPos >= static_cast<int32_t>(txpLen)) {
+            --performingUpdate_;
+            return; // hit should happen within the transcript
+        }
 
-    for (i = ((long long)hitPos) * numBins_ / txpLen + 1;
-         i < (((long long)hitPos + 1) * numBins_ - 1) / txpLen + 1; i++) {
-        b = i * 1.0 / numBins_;
+        if (hitPos < 0) { hitPos = 0; }
+        double logLen = log(txpLen);
+
+        // Modified from: https://github.com/deweylab/RSEM/blob/master/RSPD.h
+        uint32_t i;
+        // Fraction along the transcript where this hit occurs
+        double a = hitPos * 1.0 / txpLen;
+        double b;
+
+        for (i = ((long long)hitPos) * numBins_ / txpLen + 1;
+             i < (((long long)hitPos + 1) * numBins_ - 1) / txpLen + 1; i++) {
+            b = i * 1.0 / numBins_;
+            double updateMass = log(b - a) + logLen + mass;
+            logAddMass(pmf_[i], updateMass);
+            logAddMass(totMass_, updateMass);
+            a = b;
+        }
+        b = (hitPos + 1.0) / txpLen;
         double updateMass = log(b - a) + logLen + mass;
         logAddMass(pmf_[i], updateMass);
         logAddMass(totMass_, updateMass);
-        a = b;
     }
-    b = (hitPos + 1.0) / txpLen;
-    double updateMass = log(b - a) + logLen + mass;
-    logAddMass(pmf_[i], updateMass);
-    logAddMass(totMass_, updateMass);
+    --performingUpdate_;
 }
 
 double FragmentStartPositionDistribution::evalCDF(int32_t hitPos, uint32_t txpLen) {
-    int i = (static_cast<long long>(hitPos) * numBins_) / txpLen;
-    double val = hitPos * 1.0 / txpLen * numBins_;
-    return salmon::math::logAdd(cmf_[i], std::log(val - i) + pmf_[i+1]);
+    int i = static_cast<int>((static_cast<double>(hitPos) * numBins_) / txpLen);
+    double val = hitPos * (1.0 / txpLen) * numBins_;
+    return (val - i < 1e-7) ? cmf_[i].load() :
+            salmon::math::logAdd(cmf_[i], std::log(val - i) + pmf_[i+1]);
 }
 
 void FragmentStartPositionDistribution::update() {
+    if (isUpdated_) { return; }
+    // TODO: Is this (thread)-safe yet?
+    allowUpdates_ = false;
     std::lock_guard<std::mutex> lg(fspdMut_);
+    // Make sure an update isn't being performed
+    while (performingUpdate_) {}
     if (!isUpdated_) {
-        for (int i = 1; i <= numBins_; i++) {
+        for (uint32_t i = 1; i <= numBins_; i++) {
             pmf_[i] = pmf_[i] - totMass_;
             cmf_[i] = salmon::math::logAdd(cmf_[i - 1], pmf_[i]);
         }
@@ -98,7 +120,7 @@ double FragmentStartPositionDistribution::operator()(
         int32_t hitPos,
         uint32_t txpLen,
         double logEffLen) {
-    
+
     if (hitPos < 0) { hitPos = 0; }
     assert(hitPos < txpLen);
     if (hitPos >= txpLen) {
@@ -106,9 +128,9 @@ double FragmentStartPositionDistribution::operator()(
 	    return salmon::math::LOG_0;
     }
     // If we haven't updated the CDF yet, then
-    // just return log(1 / effLen);
+    // just return log(1);
     if (!isUpdated_) {
-        return -logEffLen;
+        return -logEffLen; 
     }
 
     double a = hitPos * (1.0 / txpLen);
@@ -117,12 +139,56 @@ double FragmentStartPositionDistribution::operator()(
     if (effLen >= txpLen) {
 	    effLen = txpLen - 1;
     }
-    double denom = evalCDF(static_cast<int32_t>(effLen), txpLen);
+
+    double denom = evalCDF(static_cast<int32_t>(effLen), txpLen); // cmf_[numBins_];
+    double cdfNext = evalCDF(hitPos + 1, txpLen);
+    double cdfCurr = evalCDF(hitPos, txpLen);
+
     return ((denom >= salmon::math::LOG_EPSILON) ?
-            salmon::math::logSub(evalCDF(hitPos + 1, txpLen), evalCDF(hitPos, txpLen)) -
-	    denom : 0.0); 
+            salmon::math::logSub(cdfNext, cdfCurr) - denom :
+            salmon::math::LOG_0);
+}
+
+
+bool FragmentStartPositionDistribution::logNumDenomMass(
+        int32_t hitPos,
+        uint32_t txpLen,
+        double logEffLen,
+	double& logNum,
+	double& logDenom) {
+
+    if (hitPos < 0) { hitPos = 0; }
+    assert(hitPos < txpLen);
+    if (hitPos >= txpLen) {
+	    std::cerr << "\n\nhitPos = " << hitPos << ", txpLen = " << txpLen << "!!\n\n\n";
+	    logNum = salmon::math::LOG_0;
+	    logDenom = salmon::math::LOG_0;
+	    return false;
+    }
+    // If we haven't updated the CDF yet, then
+    // just return log(1);
+    if (!isUpdated_) {
+	logNum = std::log(1.0 / static_cast<double>(txpLen)); 
+	logDenom = salmon::math::LOG_1;
+        return true; 
+    }
 
-    //return salmon::math::logSub(evalCDF(hitPos + 1, txpLen), evalCDF(hitPos, txpLen));
+    double effLen = std::exp(logEffLen);
+    if (effLen >= txpLen) { effLen = txpLen - 1; }
+
+    double denom = evalCDF(static_cast<int32_t>(effLen), txpLen); 
+    double cdfNext = evalCDF(hitPos + 1, txpLen);
+    double cdfCurr = evalCDF(hitPos, txpLen);
+
+    if (denom >= salmon::math::LOG_EPSILON) {
+	logNum = salmon::math::logSub(cdfNext, cdfCurr);
+	logDenom = denom;
+	return true;
+    } else {
+	logNum = salmon::math::LOG_0;
+	logDenom = salmon::math::LOG_0;
+	return false;
+    }
 }
 
 double FragmentStartPositionDistribution::totMass() const {
diff --git a/src/GZipWriter.cpp b/src/GZipWriter.cpp
new file mode 100644
index 0000000..651d452
--- /dev/null
+++ b/src/GZipWriter.cpp
@@ -0,0 +1,253 @@
+#include <ctime>
+#include <fstream>
+
+#include "cereal/archives/json.hpp"
+
+#include "GZipWriter.hpp"
+#include "SalmonOpts.hpp"
+#include "ReadExperiment.hpp"
+#include "AlignmentLibrary.hpp"
+#include "ReadPair.hpp"
+#include "UnpairedRead.hpp"
+
+GZipWriter::GZipWriter(const boost::filesystem::path path, std::shared_ptr<spdlog::logger> logger) :
+  path_(path), logger_(logger) {
+}
+
+GZipWriter::~GZipWriter() {
+  if (bsStream_) {
+    bsStream_->reset();
+  }
+}
+
+/**
+ * Creates a new gzipped file (path) and writes the contents
+ * of the vector (vec) to the file in binary.
+ */
+template <typename T>
+bool writeVectorToFile(boost::filesystem::path path,
+                       const std::vector<T>& vec) {
+
+    {
+        bool binary = std::is_same<T, std::string>::value;
+        auto flags = std::ios_base::out | std::ios_base::binary;
+
+        boost::iostreams::filtering_ostream out;
+        out.push(boost::iostreams::gzip_compressor(6));
+        out.push(boost::iostreams::file_sink(path.string(), flags));
+
+        size_t num = vec.size();
+        size_t elemSize = sizeof(typename std::vector<T>::value_type);
+        // We have to get rid of constness below, but this should be OK
+        out.write(reinterpret_cast<char*>(const_cast<T*>(vec.data())),
+                  num * elemSize);
+        out.reset();
+    }
+    return true;
+}
+
+/**
+ * Write the ``main'' metadata to file.  Currently this includes:
+ *   -- Names of the target id's if bootstrapping / gibbs is performed
+ *   -- The fragment length distribution
+ *   -- The expected and observed bias values
+ *   -- A json file with information about the run
+ */
+template <typename ExpT>
+bool GZipWriter::writeMeta(
+    const SalmonOpts& opts,
+    const ExpT& experiment,
+    const std::string& tstring // the start time of the run
+    ) {
+
+  namespace bfs = boost::filesystem;
+
+  bfs::path auxDir = path_ / "aux";
+  bool auxSuccess = boost::filesystem::create_directories(auxDir);
+
+  auto numBootstraps = opts.numBootstraps;
+  auto numSamples = (numBootstraps > 0) ? numBootstraps : opts.numGibbsSamples;
+  if (numSamples > 0) {
+      bsPath_ = auxDir / "bootstrap";
+      bool bsSuccess = boost::filesystem::create_directories(bsPath_);
+      {
+
+          boost::iostreams::filtering_ostream nameOut;
+          nameOut.push(boost::iostreams::gzip_compressor(6));
+          auto bsFilename = bsPath_ / "names.tsv.gz";
+          nameOut.push(boost::iostreams::file_sink(bsFilename.string(), std::ios_base::out));
+
+          auto& transcripts = experiment.transcripts();
+          size_t numTxps = transcripts.size();
+          if (numTxps == 0) { return false; }
+          for (size_t tn = 0; tn < numTxps; ++tn) {
+              auto& t  = transcripts[tn];
+              nameOut << t.RefName;
+              if (tn < numTxps - 1) {
+                  nameOut << '\t';
+              }
+          }
+          nameOut << '\n';
+          nameOut.reset();
+      }
+
+  }
+
+  bfs::path fldPath = auxDir / "fld.gz";
+  int32_t numFLDSamples{10000};
+  auto fldSamples = salmon::utils::samplesFromLogPMF(
+                        experiment.fragmentLengthDistribution(), numFLDSamples);
+  writeVectorToFile(fldPath, fldSamples);
+
+  bfs::path normBiasPath = auxDir / "expected_bias.gz";
+  writeVectorToFile(normBiasPath, experiment.expectedBias());
+
+  bfs::path obsBiasPath = auxDir / "observed_bias.gz";
+  const auto& bcounts = experiment.readBias().counts;
+  std::vector<int32_t> observedBias(bcounts.size(), 0);
+  std::copy(bcounts.begin(), bcounts.end(), observedBias.begin());
+  writeVectorToFile(obsBiasPath, observedBias);
+
+  bfs::path info = auxDir / "meta_info.json";
+
+  {
+      std::ofstream os(info.string());
+      cereal::JSONOutputArchive oa(os);
+
+      std::string sampType = "none";
+      if (numBootstraps == 0 and numSamples > 0) {
+          sampType = "gibbs";
+      }
+      if (numBootstraps > 0) {
+          sampType = "bootstrap";
+      }
+
+      auto& transcripts = experiment.transcripts();
+      oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
+      oa(cereal::make_nvp("samp_type", sampType));
+      oa(cereal::make_nvp("frag_dist_length", fldSamples.size()));
+      oa(cereal::make_nvp("bias_correct", opts.biasCorrect));
+      oa(cereal::make_nvp("num_bias_bins", bcounts.size()));
+
+      std::string mapTypeStr = opts.alnMode ? "alignment" : "mapping";
+      oa(cereal::make_nvp("mapping_type", mapTypeStr));
+
+      oa(cereal::make_nvp("num_targets", transcripts.size()));
+      oa(cereal::make_nvp("num_bootstraps", numBootstraps));
+      oa(cereal::make_nvp("num_processed", experiment.numObservedFragments()));
+      oa(cereal::make_nvp("num_mapped", experiment.numMappedFragments()));
+      oa(cereal::make_nvp("percent_mapped", experiment.effectiveMappingRate() * 100.0));
+      oa(cereal::make_nvp("call", std::string("quant")));
+      oa(cereal::make_nvp("start_time", tstring));
+  }
+  return true;
+}
+
+template <typename ExpT>
+bool GZipWriter::writeAbundances(
+    const SalmonOpts& sopt,
+    ExpT& readExp) {
+
+  namespace bfs = boost::filesystem;
+
+  using salmon::math::LOG_0;
+  using salmon::math::LOG_1;
+
+  // If we're using lightweight-alignment (FMD)
+  // and not allowing orphans.
+  bool useScaledCounts = (!sopt.useQuasi and sopt.allowOrphans == false);
+  bfs::path fname = path_ / "quant.sf";
+
+  std::unique_ptr<std::FILE, int (*)(std::FILE *)> output(std::fopen(fname.c_str(), "w"), std::fclose);
+
+  fmt::print(output.get(), "Name\tLength\tEffectiveLength\tTPM\tNumReads\n");
+
+  double numMappedFrags = readExp.upperBoundHits();
+
+  std::vector<Transcript>& transcripts_ = readExp.transcripts();
+  for (auto& transcript : transcripts_) {
+      transcript.projectedCounts = useScaledCounts ?
+          (transcript.mass(false) * numMappedFrags) : transcript.sharedCount();
+  }
+
+  double tfracDenom{0.0};
+  for (auto& transcript : transcripts_) {
+      double refLength = transcript.EffectiveLength;
+      tfracDenom += (transcript.projectedCounts / numMappedFrags) / refLength;
+  }
+
+  double million = 1000000.0;
+  // Now posterior has the transcript fraction
+  for (auto& transcript : transcripts_) {
+      double count = transcript.projectedCounts;
+      double npm = (transcript.projectedCounts / numMappedFrags);
+      double effLength = transcript.EffectiveLength;
+      double tfrac = (npm / effLength) / tfracDenom;
+      double tpm = tfrac * million;
+      fmt::print(output.get(), "{}\t{}\t{}\t{}\t{}\n",
+              transcript.RefName, transcript.RefLength, effLength,
+              tpm, count);
+  }
+  return true;
+}
+
+template <typename T>
+bool GZipWriter::writeBootstrap(const std::vector<T>& abund) {
+#if defined __APPLE__
+            spin_lock::scoped_lock sl(writeMutex_);
+#else
+            std::lock_guard<std::mutex> lock(writeMutex_);
+#endif
+	    if (!bsStream_) {
+	      bsStream_.reset(new boost::iostreams::filtering_ostream);
+	      bsStream_->push(boost::iostreams::gzip_compressor(6));
+	      auto bsFilename = bsPath_ / "bootstraps.gz";
+	      bsStream_->push(
+                  boost::iostreams::file_sink(bsFilename.string(),
+                                              std::ios_base::out | std::ios_base::binary));
+	    }
+
+	    boost::iostreams::filtering_ostream& ofile = *bsStream_;
+	    size_t num = abund.size();
+        size_t elSize = sizeof(typename std::vector<T>::value_type);
+        ofile.write(reinterpret_cast<char*>(const_cast<T*>(abund.data())),
+                    elSize * num);
+        logger_->info("wrote {} bootstraps", numBootstrapsWritten_.load()+1);
+        ++numBootstrapsWritten_;
+        return true;
+}
+
+template
+bool GZipWriter::writeBootstrap<double>(const std::vector<double>& abund);
+
+template
+bool GZipWriter::writeBootstrap<int>(const std::vector<int>& abund);
+
+template
+bool GZipWriter::writeAbundances<ReadExperiment>(const SalmonOpts& sopt,
+                                                 ReadExperiment& readExp);
+template
+bool GZipWriter::writeAbundances<AlignmentLibrary<UnpairedRead>>(const SalmonOpts& sopt,
+                                                 AlignmentLibrary<UnpairedRead>& readExp);
+template
+bool GZipWriter::writeAbundances<AlignmentLibrary<ReadPair>>(const SalmonOpts& sopt,
+                                                 AlignmentLibrary<ReadPair>& readExp);
+
+template
+bool GZipWriter::writeMeta<ReadExperiment>(
+    const SalmonOpts& opts,
+    const ReadExperiment& experiment,
+    const std::string& tstring);
+
+template
+bool GZipWriter::writeMeta<AlignmentLibrary<UnpairedRead>>(
+    const SalmonOpts& opts,
+    const AlignmentLibrary<UnpairedRead>& experiment,
+    const std::string& tstring);
+
+template
+bool GZipWriter::writeMeta<AlignmentLibrary<ReadPair>>(
+    const SalmonOpts& opts,
+    const AlignmentLibrary<ReadPair>& experiment,
+    const std::string& tstring);
+
diff --git a/src/PerformBiasCorrection.cpp b/src/PerformBiasCorrection.cpp
deleted file mode 100644
index f4c9e63..0000000
--- a/src/PerformBiasCorrection.cpp
+++ /dev/null
@@ -1,349 +0,0 @@
-/**
->HEADER
-    Copyright (c) 2013 Rob Patro robp at cs.cmu.edu
-
-    This file is part of Salmon.
-
-    Salmon is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Salmon is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Salmon.  If not, see <http://www.gnu.org/licenses/>.
-<HEADER
-**/
-
-
-#include <iostream>
-#include <fstream>
-#include <istream>
-#include <vector>
-#include <array>
-#include <unordered_map>
-#include <limits>
-#include <cmath>
-#include <cstdint>
-#include <numeric>
-
-#include <boost/filesystem.hpp>
-#include <boost/range/irange.hpp>
-#include <boost/multiprecision/cpp_dec_float.hpp>
-
-#include "Eigen/Dense"
-#include "PCA.hpp"
-
-#include "tensemble/TypeDef.h"
-#include "tensemble/RandomForestRegressor.h"
-#include "tensemble/RandomForestClassifier.h"
-#include "tensemble/GBMRegressor.h"
-#include "tensemble/GBMClassifier.h"
-#include "tensemble/ReadData.h"
-
-#include "CommonTypes.hpp"
-
-#define DEFAULT_N_TREES 100
-#define DEFAULT_N_JOBS 1
-#define DEFAULT_MAX_FEATURES_RATIO 1.0
-#define DEFAULT_MIN_SAMPLE_LEAF 5
-#define DEFAULT_MAX_DEPTH   4
-#define DEFAULT_SUBSAMPLE   1.0
-#define DEFAULT_SPLIT_CRITERION CRITERION_MSE
-#define DEFAULT_LOSS SQUARE_LOSS
-#define DEFAULT_LEARN_RATE 0.1
-#define DEFAULT_OOB 1
-#define DEFAULT_VERBOSE 0
-#define DEFAULT_BOOTSTRAP 1
-#define DEFAULT_COMPUTE_IMPORTANCE 0
-
-namespace bfs = boost::filesystem;
-using Kmer = ::uint64_t;
-using Sailfish::TranscriptFeatures;
-using mpdec = boost::multiprecision::cpp_dec_float_100;
-
-TranscriptFeatures parseFeature(std::ifstream& ifs) {
-        TranscriptFeatures tf{};
-        ifs >> tf.name;
-        ifs >> tf.length;
-        ifs >> tf.gcContent;
-        for (auto i : boost::irange(size_t{0}, tf.diNucleotides.size())) {
-                ifs >> tf.diNucleotides[i];
-        }
-        // eat the newline
-        char junk;
-        ifs.get(junk);
-        return tf;
-}
-
-std::vector<TranscriptFeatures> parseFeatureFile(const bfs::path& featureFile) {
-        std::ifstream ifile(featureFile.string());
-        std::vector<TranscriptFeatures> feats;
-        while (!ifile.eof()) {
-                feats.emplace_back( parseFeature(ifile) );
-                if (ifile.peek() == EOF) { break; }
-        }
-        ifile.close();
-        return feats;
-}
-
-struct TranscriptResult{
-        size_t length;
-        double tpm;
-        double rpkm;
-        double kpkm;
-        double approxKmerCount;
-        double approxCount;
-};
-
-struct ExpressionResults {
-        std::vector<std::string> comments;
-        std::unordered_map<std::string, TranscriptResult> expressions;
-};
-
-ExpressionResults parseSalmonFile(const bfs::path& expFile, double& numMappedReads) {
-        numMappedReads = 0.0;
-
-        std::ifstream ifile(expFile.string());
-        ExpressionResults res;
-        while(!ifile.eof()) {
-
-                if (ifile.peek() == '#') {
-                        std::string comment;
-                        std::getline(ifile, comment);
-                        res.comments.emplace_back(comment);
-                } else {
-                        std::string tname;
-                        TranscriptResult tr;
-                        ifile >> tname;
-                        ifile >> tr.length;
-                        ifile >> tr.tpm;
-                        ifile >> tr.approxCount;
-                        numMappedReads += tr.approxCount;
-                        res.expressions[tname] = tr;
-                        // eat the newline
-                        char nline; ifile.get(nline);
-                }
-
-                if (ifile.peek() == EOF) { break; }
-        }
-
-        return res;
-}
-
-int performBiasCorrectionSalmon(
-        bfs::path featureFile,
-        bfs::path expressionFile,
-        bfs::path outputFile,
-        size_t numThreads) {
-
-        auto features = parseFeatureFile(featureFile);
-        std::cerr << "parsed " << features.size() << " features\n";
-
-        double numMappedReads = 0.0;
-        auto salmonRes = parseSalmonFile(expressionFile, numMappedReads);
-        std::cerr << "parsed " << salmonRes.expressions.size() << " expression values\n";
-
-        auto numFeatureVectors = features.size();
-        auto numSamples = salmonRes.expressions.size();
-
-        bool skipBiasCorrection{false};
-
-        if (numFeatureVectors != numSamples) {
-            std::cerr << "The size of the feature map didn't match the "
-                      << "number of transcripts.  Bias correction will not "
-                      << "be performed\n";
-            skipBiasCorrection = true;
-        }
-
-        uint32_t minSamples{1000};
-        if (numSamples <= minSamples) {
-            std::cerr << "There are an insufficient number of transcripts "
-                      << "for post-hoc bias correction.  It will not be performed\n";
-            skipBiasCorrection = true;
-        }
-
-        if (skipBiasCorrection) {
-            std::ofstream ofile(outputFile.string());
-            for (auto& c : salmonRes.comments) {
-                ofile << c << "\n";
-            }
-            for (auto& kv : salmonRes.expressions) {
-                auto& name = kv.first;
-                auto& expRecord = kv.second;
-                ofile << name << '\t'
-                    << expRecord.length << '\t'
-                    << expRecord.tpm << '\t'
-                    << expRecord.approxCount << '\n';
-            }
-            ofile.close();
-            return 0;
-        }
-
-
-        std::vector<size_t> retainedRows;
-        std::vector<double> retainedTPMs;
-        std::vector<std::string> retainedNames;
-
-        double minLTPM, maxLTPM;
-        minLTPM = std::numeric_limits<double>::max();
-        maxLTPM = -minLTPM;
-
-        for (auto i : boost::irange(size_t{0}, features.size())) {
-                auto& tname = features[i].name;
-                auto tpm = salmonRes.expressions[tname].tpm; //
-                double v;
-
-                if ( tpm >= 1.0 ) {
-                        retainedRows.emplace_back(i);
-                        retainedNames.push_back(tname);
-                        v = std::log(tpm);
-                        retainedTPMs.push_back(v);
-                        minLTPM = std::min(minLTPM, v);
-                        maxLTPM = std::max(maxLTPM, v);
-                }
-        }
-
-
-        std::vector<float> pcavec;
-        std::vector<double> featData;
-        Eigen::MatrixXd featMat(retainedRows.size(), 17);
-        size_t fnum = 0;
-        size_t linearIndex{0};
-        for (auto r : retainedRows) {
-                auto& f = features[r];
-                pcavec.push_back(f.gcContent);
-                featMat(fnum, 0) = f.gcContent;
-                for (auto i : boost::irange(size_t{0}, f.diNucleotides.size())) {
-                        featMat(fnum, i) = f.diNucleotides[i];
-                        pcavec.push_back(f.diNucleotides[i]);
-                }
-                ++fnum;
-        }
-
-        PCA pca(featMat);
-
-        std::cerr << "Performing PCA decomposition\n";
-        pca.performDecomposition();
-
-        auto encodedXSub = pca.projectedData(0.95, true);
-
-        Data train;
-        size_t numCols = encodedXSub.cols();
-        train.set_size(retainedRows.size(), numCols+1);
-
-        size_t c = 0;
-        for (auto r : retainedRows) {
-                train.X[c][0] = std::log(static_cast<double>(features[r].length));
-
-                for (auto j : boost::irange(size_t{1}, numCols+1)) {
-                        train.X[c][j] = encodedXSub(c, j-1);
-                }
-                train.y[c] = retainedTPMs[c];
-                ++c;
-        }
-
-        /** Random Forest Regression **/
-        size_t minDepth = 5;
-        auto reg = std::unique_ptr<RandomForestRegressor>(new RandomForestRegressor(
-                500,
-                train.n_features,
-                5, // max tree depth
-                1, // min_samples_leaf
-                1.0, // features ratio
-                true, // bootstrap
-                true, //out-of-bag
-                true, // compute importance
-                0, // random seed
-                numThreads, // num jobs
-                true // verbose
-        ));
-
-        std::cerr << "there are " << train.n_samples << " samples\n";
-        std::cerr << "there are " << train.n_features << " features\n";
-        reg->build(train.X, train.y, train.n_samples);
-
-        std::vector<REAL> pred(train.n_samples, 0.0);
-        reg->predict(train.X, &pred[0], train.n_samples, train.n_features);
-
-        REAL trn_rmse=rmse(&pred[0], train.y, train.n_samples);
-        REAL trn_r2=R2(&pred[0], train.y, train.n_samples);
-        std::cerr << "Train RMSE=" << trn_rmse << ", Correlation Coefficient=" << trn_r2 << "\n";
-
-        double grandMean = 0.0;
-        size_t ntrain = train.n_samples;
-        for (auto i : boost::irange(size_t{0}, ntrain)) {
-                grandMean += retainedTPMs[i];
-        }
-        grandMean /= train.n_samples;
-
-        for (auto i : boost::irange(size_t{0}, ntrain)) {
-                pred[i] = grandMean + (retainedTPMs[i] - pred[i]);
-        }
-
-        trn_rmse=rmse(&pred[0], train.y, train.n_samples);
-        trn_r2=R2(&pred[0], train.y, train.n_samples);
-        std::cerr << "Train RMSE=" << trn_rmse << ", Correlation Coefficient=" << trn_r2 << "\n";
-
-        std::ofstream ofile(outputFile.string());
-        for (auto& c : salmonRes.comments) {
-                ofile << c << "\n";
-        }
-
-        size_t retainedCnt = 0;
-        vector<mpdec> tpms(features.size());
-        double tpmSum{0.0};
-        for (auto i : boost::irange(size_t{0}, size_t{features.size()})) {
-          auto& name = features[i].name;
-          auto& r = salmonRes.expressions[name];
-          if (i == retainedRows[retainedCnt]) {
-            double v = std::exp(pred[retainedCnt]);
-            tpms[i] = v;
-            tpmSum += v;
-            ++retainedCnt;
-          } else {
-              tpms[i] = r.tpm;
-              tpmSum += r.tpm;
-          }
-        }
-
-        double tpmNorm = 1000000.0 / tpmSum;
-        for (auto i : boost::irange(size_t{0}, size_t{features.size()})) {
-            tpms[i] *= tpmNorm;
-        }
-
-        vector<mpdec> estNumReads(features.size());
-
-        // use TPM estimates to computed estimated read counts
-        mpdec mpzero = 0;
-        mpdec totalNucDenom = 0;
-        for (auto i : boost::irange(size_t{0},  size_t{features.size()})) {
-            double len = features[i].length;
-            totalNucDenom += tpms[i] * len;
-        }
-
-        for (auto i : boost::irange(size_t{0},  size_t{features.size()})) {
-            mpdec len = features[i].length;
-            estNumReads[i] += ((tpms[i] * len) / totalNucDenom) * numMappedReads;
-        }
-
-        for (auto i : boost::irange(size_t{0}, size_t{features.size()})) {
-          auto& name = features[i].name;
-          auto& r = salmonRes.expressions[name];
-          auto length = r.length;
-          ofile << name << '\t'
-                << r.length << '\t'
-                << tpms[i] << '\t'
-                << estNumReads[i] << '\n';
-        }
-        std::cerr << "retainedCnt = " << retainedCnt << ", nsamps = " << train.n_samples << "\n";
-
-        ofile.close();
-    return 0;
-}
-
-
diff --git a/src/PerformBiasCorrection_old.cpp b/src/PerformBiasCorrection_old.cpp
deleted file mode 100644
index f5d7696..0000000
--- a/src/PerformBiasCorrection_old.cpp
+++ /dev/null
@@ -1,420 +0,0 @@
-/**
->HEADER
-    Copyright (c) 2013 Rob Patro robp at cs.cmu.edu
-
-    This file is part of Salmon.
-
-    Salmon is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Salmon is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Salmon.  If not, see <http://www.gnu.org/licenses/>.
-<HEADER
-**/
-
-
-#include <iostream>
-#include <fstream>
-#include <istream>
-#include <vector>
-#include <array>
-#include <unordered_map>
-#include <limits>
-
-#include <boost/filesystem.hpp>
-#include <boost/range/irange.hpp>
-
-#include "shark/Data/Dataset.h"
-#include "shark/Algorithms/Trainers/PCA.h"
-#include "shark/Algorithms/Trainers/RFTrainer.h"
-#include "shark/Algorithms/Trainers/CARTTrainer.h"
-
-#include "tensemble/TypeDef.h"
-#include "tensemble/RandomForestRegressor.h"
-#include "tensemble/RandomForestClassifier.h"
-#include "tensemble/GBMRegressor.h"
-#include "tensemble/GBMClassifier.h"
-#include "tensemble/ReadData.h"
-
-#define DEFAULT_N_TREES 100
-#define DEFAULT_N_JOBS 1
-#define DEFAULT_MAX_FEATURES_RATIO 1.0
-#define DEFAULT_MIN_SAMPLE_LEAF 10
-#define DEFAULT_MAX_DEPTH   4
-#define DEFAULT_SUBSAMPLE   1.0
-#define DEFAULT_SPLIT_CRITERION CRITERION_MSE
-#define DEFAULT_LOSS SQUARE_LOSS
-#define DEFAULT_LEARN_RATE 0.1
-#define DEFAULT_OOB 1
-#define DEFAULT_VERBOSE 0
-#define DEFAULT_BOOTSTRAP 1
-#define DEFAULT_COMPUTE_IMPORTANCE 0
-
-namespace bfs = boost::filesystem;
-using Kmer = uint64_t;
-
-struct TranscriptFeatures{
-	std::string name;
-	size_t length;
-	double gcContent;
-	std::array<Kmer, 16> diNucleotides;
-};
-
-TranscriptFeatures parseFeature(std::ifstream& ifs) {
-	TranscriptFeatures tf{};
-	ifs >> tf.name;
-	ifs >> tf.length;
-	ifs >> tf.gcContent;
-	for (auto i : boost::irange(size_t{0}, tf.diNucleotides.size())) {
-		ifs >> tf.diNucleotides[i];
-	}
-	// eat the newline
-	char junk;
-	ifs.get(junk);
-	return tf;
-}
-
-std::vector<TranscriptFeatures> parseFeatureFile(const bfs::path& featureFile) {
-	std::ifstream ifile(featureFile.string());
-	std::vector<TranscriptFeatures> feats;
-	while (!ifile.eof()) {
-		feats.emplace_back( parseFeature(ifile) );
-		if (ifile.peek() == EOF) { break; }
-	}
-	ifile.close();
-	return feats;
-}
-
-struct TranscriptResult{
-	size_t length;
-	double tpm;
-	double rpkm;
-};
-
-struct ExpressionResults {
-	std::vector<std::string> comments;
-	std::unordered_map<std::string, TranscriptResult> expressions;
-};
-
-ExpressionResults parseSailfishFile(const bfs::path& expFile) {
-	std::ifstream ifile(expFile.string());
-	ExpressionResults res;
-	while(!ifile.eof()) {
-
-		if (ifile.peek() == '#') {
-			std::string comment;
-			std::getline(ifile, comment);
-			res.comments.emplace_back(comment);
-		} else {
-			std::string tname;
-			TranscriptResult tr;
-			ifile >> tname;
-			ifile >> tr.length;
-			ifile >> tr.tpm;
-			ifile >> tr.rpkm;
-			res.expressions[tname] = tr;
-			// eat the newline
-			char nline; ifile.get(nline);
-		}
-
-		if (ifile.peek() == EOF) { break; }
-	}
-
-	return res;
-}
-
-int main(int argc, char* argv[]) {
-
-	using shark::PCA;
-	//PCA pca(data)
-	bfs::path featureFile = bfs::path(argv[1]);
-	auto features = parseFeatureFile(featureFile);
-	std::cerr << "parsed " << features.size() << " features\n";
-
-	bfs::path expressionFile = bfs::path(argv[2]);
-	auto sfres = parseSailfishFile(expressionFile);
-	std::cerr << "parsed " << sfres.expressions.size() << " expression values\n";
-
-	std::vector<shark::RealVector> RPKMLabels;//(features.size(), 1);
-	std::vector<size_t> retainedRows;
-	std::vector<double> retainedRPKMs;
-	std::vector<std::string> retainedNames;
-
-	double minLRPKM, maxLRPKM;
-	minLRPKM = std::numeric_limits<double>::max();
-	maxLRPKM = -minLRPKM;
-	double avgLRPKM = 0.0;
-
-	for (auto i : boost::irange(size_t{0}, features.size())) {
-		auto& tname = features[i].name;
-		auto rpkm = sfres.expressions[tname].rpkm;
-		shark::RealVector v(1);
-
-		if ( rpkm >= 0.001 ) {
-			retainedRows.emplace_back(i);
-			retainedNames.push_back(tname);
-			v(0) = std::log(rpkm);
-			retainedRPKMs.push_back(v(0));
-
-			if (std::fabs(rpkm - std::exp(v(0))) > 1e-2) {
-				std::cerr << "EXP DOES NOT INVERT LOG!!\n";
-			}
-			minLRPKM = std::min(minLRPKM, v(0));
-			maxLRPKM = std::max(maxLRPKM, v(0));
-			avgLRPKM += v(0);
-			RPKMLabels.emplace_back(v);
-		}
-	}
-	avgLRPKM /= retainedRows.size();
-
-
-	std::vector<shark::RealVector> featMat;
-	size_t fnum = 0;
-	for (auto r : retainedRows) {
-		auto& f = features[r];
-		shark::RealVector v(17);
-		v(0) = f.gcContent;
-		for (auto i : boost::irange(size_t{0}, f.diNucleotides.size())) { v(i+1) = f.diNucleotides[i]; }
-		featMat.emplace_back(v);
-		++fnum;
-	}
-
-	shark::UnlabeledData<shark::RealVector> Xsub = shark::createDataFromRange(featMat);
-	PCA pca(Xsub, true);
-	auto evals = pca.eigenvalues();
-	double totalVariance = 0.0;
-	for ( auto e : evals ) { totalVariance += e; }
-	double varCutoff = 0.95;
-	double varSum = 0.0;
-	size_t dimCutoff = 0;
-	size_t currentDim = 0;
-	for ( auto e : evals ) {
-		++currentDim;
-		varSum += e;
-		if (varSum / totalVariance >= varCutoff) {
-			dimCutoff = currentDim;
-			break;
-		}
-		//std::cerr << "ev: " << e <<  "\n";
-	}
-	std::cerr << varCutoff * 100.0 << "% of the variance is explained by " << dimCutoff << " dimensions\n";
-
-	shark::LinearModel<> enc;
-	pca.encoder(enc, dimCutoff);
-	auto encodedXsub = enc(Xsub);
-
-	std::vector<shark::RealVector> X;
-	std::vector<shark::RealVector> labels;
-
-	Data train;
-	Data train2;
-	train.set_size(retainedRows.size(), dimCutoff+1);
-	train2.set_size(retainedRows.size(), dimCutoff+1);
-
-	size_t c = 0;
-	for (auto r : retainedRows) {
-		shark::RealVector v(dimCutoff + 1);
-		shark::RealVector l(1);
-
-		v(0) = std::log(static_cast<double>(features[r].length));
-		train.X[c][0] = std::log(static_cast<double>(features[r].length));
-		train2.X[c][0] = std::log(static_cast<double>(features[r].length));
-		for (auto j : boost::irange(size_t{1}, dimCutoff+1)) {
-			train.X[c][j] = encodedXsub.element(c)(j-1);
-			train2.X[c][j] = encodedXsub.element(c)(j-1);
-			v(j) = encodedXsub.element(c)(j-1);
-			//std::cerr << "Train [" << c <<"][" << j	 << "] = " << train.X[c][j] << "\n";
-		}
-		train.y[c] = retainedRPKMs[c];// std::log(sfres[features[r].name].expressions.rpkm);
-		l(0) = retainedRPKMs[c];
-
-		X.emplace_back(v);
-		labels.emplace_back(l);
-
-		++c;
-	}
-
-	size_t numRetainedSamples = retainedRows.size();
-
-// #define DEFAULT_N_TREES 100
-// #define DEFAULT_N_JOBS 1
-// #define DEFAULT_MAX_FEATURES_RATIO 1.0
-// #define DEFAULT_MIN_SAMPLE_LEAF 2
-// #define DEFAULT_MAX_DEPTH   4
-// #define DEFAULT_SUBSAMPLE   1.0
-// #define DEFAULT_SPLIT_CRITERION CRITERION_MSE
-// #define DEFAULT_LOSS SQUARE_LOSS
-// #define DEFAULT_LEARN_RATE 0.1
-// #define DEFAULT_OOB 1
-// #define DEFAULT_VERBOSE 0
-// #define DEFAULT_BOOTSTRAP 1
-// #define DEFAULT_COMPUTE_IMPORTANCE 0
-
-	/** Random Forest Regression **/
-	/*
-	auto reg = std::unique_ptr<RandomForestRegressor>(new RandomForestRegressor(
-		1000,
-		train.n_features,
-		DEFAULT_MAX_DEPTH,
-		DEFAULT_MIN_SAMPLE_LEAF,
-		DEFAULT_MAX_FEATURES_RATIO,
-		true, // bootstrap
-		true, //out-of-bag
-		false,
-		0,
-		16,
-		true
-	));
-	*/
-
-	/*
-	auto reg = std::unique_ptr<GBMRegressor>(new GBMRegressor(
-		SQUARE_LOSS,
-		500,
-		train.n_features,
-		DEFAULT_MAX_DEPTH,
-		DEFAULT_MIN_SAMPLE_LEAF,
-		DEFAULT_MAX_FEATURES_RATIO,
-		DEFAULT_SUBSAMPLE, // subsample
-		0.25, //learn rate
-		true, //out-of-bag
-		false, // compute imporance
-		34239, // random seed
-		30, // num jobs
-		true // verbose
-	));*/
-
-	/*
-	std::cerr << "there are " << numRetainedSamples << " samples\n";
-	std::cerr << "there are " << train.n_features << " features\n";
-	reg->build(train.X, train.y, numRetainedSamples);
-
-	REAL* pred = new REAL[numRetainedSamples];
-	for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) { pred[i] = 0.0; }
-	reg->predict(train2.X, pred, numRetainedSamples, train.n_features);
-
- REAL trn_rmse=rmse(pred, train.y, numRetainedSamples);
-  REAL trn_r2=R2(pred, train.y, numRetainedSamples);
-  std::cerr << "Train RMSE=" << trn_rmse << ", Correlation Coefficient=" << trn_r2 << "\n";
-	*/
-
-
-	shark::Data<shark::RealVector> input = shark::createDataFromRange(X);
-	shark::Data<shark::RealVector> regLabels = shark::createDataFromRange(labels);
-
-	shark::LabeledData<shark::RealVector, shark::RealVector> regressionData(input, regLabels);
-
-	shark::RFTrainer trainer;
-	shark::RFClassifier model;
-
-	trainer.setNodeSize(20);
-	trainer.setNTrees(500);
-	trainer.setOOBratio(0.001);
-
-	//std::cerr << "number of classes = " << shark::numberOfClasses(regressionSubset) << "\n";
-	//std::cerr << "number of classes = " << shark::numberOfClasses(regressionSubset) << "\n";
-	trainer.train(model, regressionData);
-
-	auto predictionData = model(regressionData.inputs());
-	std::cerr << "inputs\n";
-	std::vector<double> pred(retainedRows.size(), 0.0);
-	//std::cerr << regressionSubset << "\n";
-	std::cerr << predictionData << "\n";
-	size_t ctr = 0;
-	std::cerr << "NUM ELEMENTS: " << predictionData.numberOfElements() << "\n";
-	for (auto i : boost::irange(size_t{0}, predictionData.numberOfElements())) {
-		pred[i] = predictionData.element(i)(0);
-	}
-	std::cerr << "cha!\n";
-
-	//auto& inputLabels = regressionSubset.labels();
-	/*
-	auto mm = std::minmax_element(train.y, train.y + numRetainedSamples);
-	double minLRPKM = std::get<0>(mm);
-	double maxLRPKM = std::get<1>(mm);
-	*/
-
-	for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
-		pred[i] = RPKMLabels[i](0) - pred[i];
-	}
-
-	auto mmpred = std::minmax_element(pred.begin(), pred.begin() + numRetainedSamples);
-	double minPred = *(mmpred.first);
-	double maxPred = *(mmpred.second);
-
-	double scale = std::fabs(maxLRPKM - minLRPKM) / std::fabs(maxPred - minPred);
-
-	std::cerr << "min,max LRPKM : " << minLRPKM << ", " << maxLRPKM << "\n";
-	std::cerr << "min, max pred : " << minPred << ", " << maxPred  << "\n";
-	std::cerr << "SCALE: " << scale << "\n";
-
-
-	minPred = std::numeric_limits<double>::max();
-	for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
-		pred[i] *= scale;
-		minPred = std::min(minPred, pred[i]);
-	}
-
-	double shift{minLRPKM - minPred};
-	minPred = std::numeric_limits<double>::max();
-	maxPred = -minPred;
-	for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
-		pred[i] += shift;
-		minPred = std::min(minPred, pred[i]);
-		maxPred = std::max(maxPred, pred[i]);
-	}
-
-	/*
-   trn_rmse=rmse(pred, train.y, numRetainedSamples);
-   trn_r2=R2(pred, train.y, numRetainedSamples);
-  std::cerr << "Train RMSE=" << trn_rmse << ", Correlation Coefficient=" << trn_r2 << "\n";
-	*/
-
-	std::ofstream ofile(argv[3]);
-	for (auto& c : sfres.comments) {
-		ofile << c << "\n";
-	}
-
-	size_t retainedCnt = 0;
-	for (auto i : boost::irange(size_t{0}, size_t{features.size()})) {
-
-		double rpkm = 0.0;
-		auto& name = features[i].name;
-		auto& r = sfres.expressions[name];
-
-		if (i == retainedRows[retainedCnt]) {
-			//rpkm = std::exp(pred[retainedCnt]);
-			rpkm = std::exp(pred[retainedCnt]);
-			++retainedCnt;
-		} else {
-			rpkm = r.rpkm;
-		}
-		/*
-		if (i == retainedRows[retainedCnt]) {
-			if (retainedNames[retainedCnt] != features[i].name) {
-				std::cerr << "AHHH!!!\n";
-			}
-
-			rpkm = std::exp(RPKMLabels[retainedCnt](0));// pred[retainedCnt]);
-
-			++retainedCnt;
-		} else {
-			rpkm = sfres.expressions[features[i].name].rpkm;
-		}
-		*/
-		//std::cerr << "Feature = " << features[i].name << ", rpkm = " << rpkm << "\n";
-
-		ofile << name << '\t' << r.length << '\t' << r.tpm << '\t' << rpkm << '\n';
-	}
-	std::cerr << "retainedCnt = " << retainedCnt << ", nsamps = " << numRetainedSamples << "\n";
-	//for ( auto i : retainedRows ) { std::cerr << i << " "; }
-	ofile.close();
-
-}
diff --git a/src/Salmon.cpp b/src/Salmon.cpp
index b7ba2d6..201e814 100644
--- a/src/Salmon.cpp
+++ b/src/Salmon.cpp
@@ -51,7 +51,7 @@ int help(int argc, char* argv[]) {
     ===============
 
     Please invoke salmon with one of the following commands {index, quant, swim}.
-    For more inforation on the options for theses particular methods, use the -h
+    For more information on the options for these particular methods, use the -h
     flag along with the method name.  For example:
 
     salmon index -h
diff --git a/src/SalmonQuantify.cpp b/src/SalmonQuantify.cpp
index add7f11..2d9c9f1 100644
--- a/src/SalmonQuantify.cpp
+++ b/src/SalmonQuantify.cpp
@@ -29,6 +29,7 @@
 #include <map>
 #include <vector>
 #include <unordered_set>
+#include <iterator>
 #include <mutex>
 #include <thread>
 #include <sstream>
@@ -36,11 +37,12 @@
 #include <random>
 #include <queue>
 #include <unordered_map>
+#include <functional>
 #include "btree_map.h"
 #include "btree_set.h"
 
 // C++ string formatting library
-#include "format.h"
+#include "spdlog/details/format.h"
 
 // C Includes for BWA
 #include <cstdio>
@@ -70,6 +72,7 @@ extern "C" {
 #include <boost/program_options.hpp>
 #include <boost/lockfree/queue.hpp>
 #include <boost/thread/thread.hpp>
+#include <boost/range/iterator_range.hpp>
 
 // TBB Includes
 #include "tbb/concurrent_unordered_set.h"
@@ -101,7 +104,10 @@ extern "C" {
 #include "ReadLibrary.hpp"
 #include "SalmonConfig.hpp"
 #include "IOUtils.hpp"
+#include "SalmonIndex.hpp"
 
+#include "BWAUtils.hpp"
+#include "KmerIntervalMap.hpp"
 #include "AlignmentGroup.hpp"
 #include "PairSequenceParser.hpp"
 #include "ForgettingMassCalculator.hpp"
@@ -111,102 +117,17 @@ extern "C" {
 #include "EquivalenceClassBuilder.hpp"
 #include "CollapsedEMOptimizer.hpp"
 #include "CollapsedGibbsSampler.hpp"
-
-/* This allows us to use CLASP for optimal MEM
- * chaining.  However, this seems to be neither
- * computationally efficient, nor significantly
- * better than the greedy chaining, so I'm temporarily
- * removing this un-necessary dependency.  If you
- * (other dev or future Rob) re-instate this in the future
- * remember to re-enable the CLASP fetch and build
- * steps in the CMakeLists.txt files
- *
- *#include "FragmentList.hpp"
- */
-
-extern unsigned char nst_nt4_table[256];
-char const* bwa_pg = "cha";
-
-
-/******* STUFF THAT IS STATIC IN BWAMEM THAT WE NEED HERE --- Just re-define it *************/
-#define intv_lt(a, b) ((a).info < (b).info)
-KSORT_INIT(mem_intv, bwtintv_t, intv_lt)
-
-typedef struct {
-    bwtintv_v mem, mem1, *tmpv[2];
-} smem_aux_t;
-
-static smem_aux_t *smem_aux_init()
-{
-    smem_aux_t *a;
-    a = static_cast<smem_aux_t*>(calloc(1, sizeof(smem_aux_t)));
-    a->tmpv[0] = static_cast<bwtintv_v*>(calloc(1, sizeof(bwtintv_v)));
-    a->tmpv[1] = static_cast<bwtintv_v*>(calloc(1, sizeof(bwtintv_v)));
-    return a;
-}
-
-static void smem_aux_destroy(smem_aux_t *a)
-{
-    free(a->tmpv[0]->a); free(a->tmpv[0]);
-    free(a->tmpv[1]->a); free(a->tmpv[1]);
-    free(a->mem.a); free(a->mem1.a);
-    free(a);
-}
-
-static void mem_collect_intv(const SalmonOpts& sopt, const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq, smem_aux_t *a)
-{
-    int i, k, x = 0, old_n;
-    int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1;
-    int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
-    a->mem.n = 0;
-    // first pass: find all SMEMs
-    while (x < len) {
-        if (seq[x] < 4) {
-            x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
-            // EDIT
-            // x += 8;
-            for (i = 0; i < a->mem1.n; ++i) {
-                bwtintv_t *p = &a->mem1.a[i];
-                int slen = (uint32_t)p->info - (p->info>>32); // seed length
-                if (slen >= opt->min_seed_len)
-                    kv_push(bwtintv_t, a->mem, *p);
-            }
-        } else ++x;
-    }
-    // second pass: find MEMs inside a long SMEM
-    old_n = a->mem.n;
-    for (k = 0; k < old_n; ++k) {
-        bwtintv_t *p = &a->mem.a[k];
-        int start = p->info>>32, end = (int32_t)p->info;
-        if (end - start < split_len || p->x[2] > opt->split_width) continue;
-        bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
-        for (i = 0; i < a->mem1.n; ++i)
-            if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
-                kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
-    }
-    // third pass: LAST-like
-    if (sopt.extraSeedPass and opt->max_mem_intv > 0) {
-        x = 0;
-        while (x < len) {
-            if (seq[x] < 4) {
-                if (1) {
-                    bwtintv_t m;
-                    x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
-                    if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
-                } else { // for now, we never come to this block which is slower
-                    x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
-                    for (i = 0; i < a->mem1.n; ++i)
-                        kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
-                }
-            } else ++x;
-        }
-    }
-    // sort
-    // ks_introsort(mem_intv, a->mem.n, a->mem.a);
-}
-
-
-/******* END OF STUFF THAT IS STATIC IN BWAMEM THAT WE NEED HERE --- Just re-define it *************/
+#include "RapMapUtils.hpp"
+#include "HitManager.hpp"
+#include "SASearcher.hpp"
+#include "SACollector.hpp"
+#include "GZipWriter.hpp"
+//#include "TextBootstrapWriter.hpp"
+
+/****** QUASI MAPPING DECLARATIONS *********/
+using MateStatus = rapmap::utils::MateStatus;
+using QuasiAlignment = rapmap::utils::QuasiAlignment;
+/****** QUASI MAPPING DECLARATIONS  *******/
 
 using paired_parser = pair_sequence_parser<char**>;
 using stream_manager = jellyfish::stream_manager<std::vector<std::string>::const_iterator>;
@@ -217,83 +138,40 @@ using TranscriptIDVector = std::vector<TranscriptID>;
 using KmerIDMap = std::vector<TranscriptIDVector>;
 using my_mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;
 
-constexpr uint32_t miniBatchSize{1000};
-
-class SMEMAlignment {
-    public:
-        SMEMAlignment() :
-            transcriptID_(std::numeric_limits<TranscriptID>::max()),
-            format_(LibraryFormat::formatFromID(0)),
-            score_(0.0),
-            hitPos_(0),
-            fragLength_(0),
-            logProb(salmon::math::LOG_0),
-            logBias(salmon::math::LOG_0){}
-
-        SMEMAlignment(TranscriptID transcriptIDIn, LibraryFormat format,
-                  double scoreIn = 0.0,
-                  int32_t hitPosIn = 0,
-                  uint32_t fragLengthIn= 0,
-                  double logProbIn = salmon::math::LOG_0) :
-            transcriptID_(transcriptIDIn), format_(format), score_(scoreIn),
-            hitPos_(hitPosIn), fragLength_(fragLengthIn), logProb(logProbIn) {}
-
-        SMEMAlignment(const SMEMAlignment& o) = default;
-        SMEMAlignment(SMEMAlignment&& o) = default;
-        SMEMAlignment& operator=(SMEMAlignment& o) = default;
-        SMEMAlignment& operator=(SMEMAlignment&& o) = default;
-
-
-        inline TranscriptID transcriptID() const { return transcriptID_; }
-        inline uint32_t fragLength() const { return fragLength_; }
-        inline LibraryFormat libFormat() const { return format_; }
-        inline double score() const { return score_; }
-        inline int32_t hitPos() const { return hitPos_; }
-        // inline double coverage() {  return static_cast<double>(kmerCount) / fragLength_; };
-        uint32_t kmerCount;
-        double logProb;
-        double logBias;
-        template <typename Archive>
-        void save(Archive& archive) const {
-            archive(transcriptID_, format_.formatID(), score_, hitPos_, fragLength_);
-        }
+constexpr uint32_t miniBatchSize{5000};
 
-        template <typename Archive>
-        void load(Archive& archive) {
-            uint8_t formatID;
-            archive(transcriptID_, formatID, score_, hitPos_, fragLength_);
-            format_ = LibraryFormat::formatFromID(formatID);
-        }
+template <typename AlnT>
+using AlnGroupVec = std::vector<AlignmentGroup<AlnT>>;
 
-    private:
-        TranscriptID transcriptID_;
-        LibraryFormat format_;
-        double score_;
-        int32_t hitPos_;
-        uint32_t fragLength_;
-};
+template <typename AlnT>
+using AlnGroupVecRange = boost::iterator_range<typename AlnGroupVec<AlnT>::iterator>;
 
 #define __MOODYCAMEL__
 #if defined(__MOODYCAMEL__)
- using AlnGroupQueue = moodycamel::ConcurrentQueue<AlignmentGroup<SMEMAlignment>*>;
+ template <typename AlnT>
+ using AlnGroupQueue = moodycamel::ConcurrentQueue<AlignmentGroup<AlnT>*>;
 #else
- using AlnGroupQueue = tbb::concurrent_queue<AlignmentGroup<SMEMAlignment>*>;
+ template <typename AlnT>
+ using AlnGroupQueue = tbb::concurrent_queue<AlignmentGroup<AlnT>*>;
 #endif
 
+#include "LightweightAlignmentDefs.hpp"
+
+template <typename AlnT>
 void processMiniBatch(
         ReadExperiment& readExp,
         ForgettingMassCalculator& fmCalc,
         uint64_t firstTimestepOfRound,
         ReadLibrary& readLib,
         const SalmonOpts& salmonOpts,
-        std::vector<AlignmentGroup<SMEMAlignment>*>& batchHits,
+        AlnGroupVecRange<AlnT> batchHits,
         std::vector<Transcript>& transcripts,
         ClusterForest& clusterForest,
         FragmentLengthDistribution& fragLengthDist,
         std::atomic<uint64_t>& numAssignedFragments,
         std::default_random_engine& randEng,
         bool initialRound,
-        bool& burnedIn
+        std::atomic<bool>& burnedIn
         ) {
 
     using salmon::math::LOG_0;
@@ -303,7 +181,6 @@ void processMiniBatch(
     using salmon::math::logSub;
 
     const uint64_t numBurninFrags = salmonOpts.numBurninFrags;
-    bool useMassBanking = (!initialRound and salmonOpts.useMassBanking);
 
     auto log = spdlog::get("jointLog");
     size_t numTranscripts{transcripts.size()};
@@ -318,9 +195,8 @@ void processMiniBatch(
 
     bool updateCounts = initialRound;
     bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_1;
-    bool useFSPD{!salmonOpts.noFragStartPosDist};
+    bool useFSPD{salmonOpts.useFSPD};
     bool useFragLengthDist{!salmonOpts.noFragLengthDist};
-    bool useSequenceBiasModel{!salmonOpts.noSeqBiasModel};
     bool noFragLenFactor{salmonOpts.noFragLenFactor};
 
     const auto expectedLibraryFormat = readLib.format();
@@ -331,211 +207,162 @@ void processMiniBatch(
 
     // Build reverse map from transcriptID => hit id
     using HitID = uint32_t;
-    /* This isn't used anymore!!!
-    btree::btree_map<TranscriptID, std::vector<SMEMAlignment*>> hitsForTranscript;
-    size_t hitID{0};
-    for (auto& hv : batchHits) {
-        for (auto& tid : hv->alignments()) {
-            hitsForTranscript[tid.transcriptID()].push_back(&tid);
-        }
-        ++hitID;
-    }
-    double clustTotal = std::log(batchHits.size()) + logForgettingMass;
-    */
 
     double logForgettingMass{0.0};
     uint64_t currentMinibatchTimestep{0};
+
+    // logForgettingMass and currentMinibatchTimestep are OUT parameters!
     fmCalc.getLogMassAndTimestep(logForgettingMass, currentMinibatchTimestep);
 
     double startingCumulativeMass = fmCalc.cumulativeLogMassAt(firstTimestepOfRound);
-    // BEGIN: DOUBLY-COLLAPSED TESTING
-    struct HitInfo {
-        uint32_t numHits = 0;
-        bool observed = false;
-        double newUniqueMass = LOG_0;
-    };
-
-    std::unordered_map<TranscriptID, HitInfo> hitInfo;
-    // We only need to fill this in if it's not the first round
-    if (useMassBanking) {
-        for (auto& alnGroup : batchHits) {
-            for (auto a : alnGroup->alignments()) {
-                auto transcriptID = a.transcriptID();
-                if (transcriptID < 0 or transcriptID >= transcripts.size()) {
-                    salmonOpts.jointLog->warn("Invalid Transcript ID [{}] encountered", transcriptID);
-                }
-                auto& info = hitInfo[transcriptID];
-                auto& txp = transcripts[transcriptID];
-                if(!info.observed) {
-                    info.observed = true;
-
-                    if (txp.uniqueCount() > 0) {
-                        double dormantInterval = static_cast<double>(currentMinibatchTimestep -
-                                    firstTimestepOfRound + 1);
-                        // The cumulative mass last time this was updated
-                        double prevUpdateMass = startingCumulativeMass;//fmCalc.cumulativeLogMassAt(startTime);
-                        double currentUpdateMass = fmCalc.cumulativeLogMassAt(currentMinibatchTimestep);
-                        double updateFraction = std::log(txp.uniqueUpdateFraction());
-
-                        // The new unique mass to be added to this transcript
-                        double newUniqueMass = salmon::math::logSub(currentUpdateMass, prevUpdateMass) +
-                            updateFraction - std::log(dormantInterval);
-                        info.newUniqueMass = newUniqueMass;
-                    }
-                }
-                info.numHits++;
-            } // end alignments in group
-        } // end batch hits
-    } // end initial round
-    // END: DOUBLY-COLLAPSED TESTING
-
     int i{0};
     {
         // Iterate over each group of alignments (a group consists of all alignments reported
         // for a single read).  Distribute the read's mass to the transcripts
         // where it potentially aligns.
         for (auto& alnGroup : batchHits) {
-            if (alnGroup->size() == 0) { continue; }
+	    // If we had no alignments for this read, then skip it
+            if (alnGroup.size() == 0) { continue; }
 
             // We start out with probability 0
             double sumOfAlignProbs{LOG_0};
+
             // Record whether or not this read is unique to a single transcript.
             bool transcriptUnique{true};
 
-            auto firstTranscriptID = alnGroup->alignments().front().transcriptID();
+            auto firstTranscriptID = alnGroup.alignments().front().transcriptID();
             std::unordered_set<size_t> observedTranscripts;
 
-            // EQCLASS
+            // New incompat. handling.
+            /**
+            // The equivalence class information for
+            // compatible fragments
+            std::vector<uint32_t> txpIDsCompat;
+            std::vector<double> auxProbsCompat;
+	        std::vector<double> posProbsCompat;
+            double auxDenomCompat = salmon::math::LOG_0;
+
+            // The equivalence class information for
+            // all fragments (if there is no compatible fragment)
+            std::vector<uint32_t> txpIDsAll;
+            std::vector<double> auxProbsAll;
+            std::vector<double> posProbsAll;
+            double auxDenomAll = salmon::math::LOG_0;
+
+            std::vector<uint32_t>* txpIDsFinal = nullptr;
+            std::vector<uint32_t>* txpIDsFinal = nullptr;
+            std::vector<uint32_t>* txpIDsFinal = nullptr;
+            double auxDenomFinal = salmon::math::LOG_0;
+            **/
+
             std::vector<uint32_t> txpIDs;
             std::vector<double> auxProbs;
-            double auxDenom = salmon::math::LOG_0;
-	        size_t txpIDsHash{0};
+	        std::vector<double> posProbs;
+            double auxDenom= salmon::math::LOG_0;
 
-            double avgLogBias = salmon::math::LOG_0;
             uint32_t numInGroup{0};
+            uint32_t prevTxpID{0};
+
             // For each alignment of this read
-            for (auto& aln : alnGroup->alignments()) {
+            for (auto& aln : alnGroup.alignments()) {
                 auto transcriptID = aln.transcriptID();
                 auto& transcript = transcripts[transcriptID];
                 transcriptUnique = transcriptUnique and (transcriptID == firstTranscriptID);
 
                 double refLength = transcript.RefLength > 0 ? transcript.RefLength : 1.0;
                 double coverage = aln.score();
-                double logFragCov = (coverage > 0) ? std::log(coverage) : LOG_0;
+                double logFragCov = (coverage > 0) ? std::log(coverage) : LOG_1;
 
                 // The alignment probability is the product of a
                 // transcript-level term (based on abundance and) an
                 // alignment-level term.
-                double logRefLength;
+                double logRefLength{salmon::math::LOG_0};
                 if (salmonOpts.noEffectiveLengthCorrection or !burnedIn) {
                     logRefLength = std::log(transcript.RefLength);
                 } else {
-                    logRefLength = transcript.getCachedEffectiveLength();
+                    logRefLength = transcript.getCachedLogEffectiveLength();
                 }
 
                 double transcriptLogCount = transcript.mass(initialRound);
 
-                // BEGIN: DOUBLY-COLLAPSED TESTING
-                // If this is not the initial round, then add the
-                // appropriate proportion of unique read mass for
-                // every ambiguous alignment we encounter. We do
-                // this before the line (below) where we
-                // retrieve this transcript's mass.
-                if (useMassBanking and transcript.uniqueCount() > 0) {
-                    auto txpHitInfo = hitInfo[transcriptID];
-                    transcriptLogCount = salmon::math::logAdd(
-                            transcriptLogCount,
-                            txpHitInfo.newUniqueMass);
-                }
-                // END: DOUBLY-COLLAPSED TESTING
-
+                // If the transcript had a non-zero count (including pseudocount)
+                if (std::abs(transcriptLogCount) != LOG_0 ) {
 
-                if ( transcriptLogCount != LOG_0 ) {
-                    double errLike = salmon::math::LOG_1;
-                    if (burnedIn) {
-                        // TODO: Make error model for smem-based quantification
-                        //errLike = errMod.logLikelihood(aln, transcript);
+                    // The probability of drawing a fragment of this length;
+                    double logFragProb = LOG_1;
+                    if (burnedIn and useFragLengthDist and aln.fragLength() > 0) {
+                        logFragProb = fragLengthDist.pmf(static_cast<size_t>(aln.fragLength()));
                     }
 
-                    double logFragProb = (useFragLengthDist) ?
-                        ((aln.fragLength() > 0) ?
-                         fragLengthDist.pmf(static_cast<size_t>(aln.fragLength())) :
-                         LOG_1) :
-                         LOG_1;
-
                     // TESTING
                     if (noFragLenFactor) { logFragProb = LOG_1; }
 
+                    // TODO: Maybe take the fragment length distribution into account
+                    // for single-end fragments?
 
-                    // TODO: Take the fragment length distribution into account
-                    // for single-end fragments as in the alignment-based code below
-                    /*
-                    if (!salmonOpts.noFragLengthDist) {
-                        if(aln->fragLen() == 0) {
-                            if (aln->isLeft() and transcript.RefLength - aln->left() < fragLengthDist.maxVal()) {
-                                logFragProb = fragLengthDist.cmf(transcript.RefLength - aln->left());
-                            } else if (aln->isRight() and aln->right() < fragLengthDist.maxVal()) {
-                                logFragProb = fragLengthDist.cmf(aln->right());
-                            }
+                    // The probability that the fragments align to the given strands in the
+                    // given orientations.
+                    double logAlignCompatProb =
+                        (useReadCompat) ?
+                        (salmon::utils::logAlignFormatProb(
+                            aln.libFormat(),
+                            expectedLibraryFormat,
+                            static_cast<int32_t>(aln.pos),
+                            aln.fwd, aln.mateStatus, salmonOpts.incompatPrior)
+                         ) : LOG_1;
+
+                    /** New compat handling
+                    // True if the read is compatible with the
+                    // expected library type; false otherwise.
+                    bool compat = ignoreCompat;
+                    if (!compat) {
+                        if (aln.mateStatus == rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
+                            compat = salmon::utils::compatibleHit(
+                                    expectedLibType, observedLibType);
                         } else {
-                            logFragProb = fragLengthDist.pmf(static_cast<size_t>(aln->fragLen()));
+                            int32_t pos = static_cast<int32_t>(aln.pos);
+                            compat = salmon::utils::compatibleHit(
+                                    expectedLibraryFormat, pos,
+                                    aln.fwd, aln.mateStatus);
                         }
                     }
-                    */
-
-                    // The probability that the fragments align to the given strands in the
-                    // given orientations.
-                    double logAlignCompatProb = (useReadCompat) ?
-                                                (salmon::utils::logAlignFormatProb(aln.libFormat(), expectedLibraryFormat, salmonOpts.incompatPrior)) :
-                                                LOG_1;
+                    **/
 
                     // Allow for a non-uniform fragment start position distribution
-                    double startPosProb = -logRefLength;
+                    double startPosProb{-logRefLength};
+                    double fragStartLogNumerator{salmon::math::LOG_1};
+                    double fragStartLogDenominator{salmon::math::LOG_1};
+
                     auto hitPos = aln.hitPos();
                     if (useFSPD and burnedIn and hitPos < refLength) {
-                        auto& fragStartDist =
-                            fragStartDists[transcript.lengthClassIndex()];
-                        startPosProb = fragStartDist(hitPos, refLength, logRefLength);
-                    }
-
-                    double logBiasProb = salmon::math::LOG_1;
-                    if (useSequenceBiasModel and burnedIn) {
-                        double fragLength = aln.fragLength();
-                        if (fragLength > 0) {
-                            int32_t leftHitPos = hitPos;
-                            int32_t rightHitPos = hitPos + fragLength;
-                            logBiasProb = biasModel.biasFactor(transcript,
-                                                               leftHitPos,
-                                                               rightHitPos,
-                                                               aln.libFormat());
-                        } else {
-                            logBiasProb = biasModel.biasFactor(transcript,
-                                                               hitPos,
-                                                               aln.libFormat());
-                        }
-
+                        auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
+                        // Get the log(numerator) and log(denominator) for the fragment start position
+                        // probability.
+                        bool nonZeroProb = fragStartDist.logNumDenomMass(hitPos, refLength, logRefLength,
+                                fragStartLogNumerator, fragStartLogDenominator);
+                        // Set the overall probability.
+                        startPosProb = (nonZeroProb) ?
+                            fragStartLogNumerator - fragStartLogDenominator :
+                            salmon::math::LOG_0;
                     }
 
                     // Increment the count of this type of read that we've seen
                     ++libTypeCounts[aln.libFormat().formatID()];
 
-                    double auxProb = startPosProb + logFragProb + logFragCov +
-                                     logAlignCompatProb + logBiasProb;
+                    // The total auxiliary probabilty is the product (sum in log-space) of
+                    // The start position probability
+                    // The fragment length probabilty
+                    // The mapping score (coverage) probability
+                    // The fragment compatibility probability
+                    // The bias probability
+                    double auxProb =  logFragProb + logFragCov +
+                                      logAlignCompatProb;
 
-                    aln.logProb = transcriptLogCount + auxProb;
+                    aln.logProb = transcriptLogCount + auxProb + startPosProb;
 
+                    // If this alignment had a zero probability, then skip it
                     if (std::abs(aln.logProb) == LOG_0) { continue; }
 
-                    if (useSequenceBiasModel and burnedIn) {
-                        avgLogBias = salmon::math::logAdd(avgLogBias, logBiasProb);
-                        numInGroup++;
-                        aln.logBias = logBiasProb;
-                    } else {
-                        avgLogBias = salmon::math::logAdd(avgLogBias, logBiasProb);
-                        numInGroup++;
-                        aln.logBias = salmon::math::LOG_1;
-                    }
-
                     sumOfAlignProbs = logAdd(sumOfAlignProbs, aln.logProb);
 
                     if (updateCounts and
@@ -544,12 +371,19 @@ void processMiniBatch(
                         observedTranscripts.insert(transcriptID);
                     }
                     // EQCLASS
+                    if (transcriptID < prevTxpID) { std::cerr << "[ERROR] Transcript IDs are not in sorted order; please report this bug on GitHub!\n"; }
+                    prevTxpID = transcriptID;
                     txpIDs.push_back(transcriptID);
                     auxProbs.push_back(auxProb);
                     auxDenom = salmon::math::logAdd(auxDenom, auxProb);
-    	            boost::hash_combine(txpIDsHash, transcriptID);
+
+                    // If we're using the fragment start position distribution
+                    // remember *the numerator* of (x / cdf(effLen / len)) where
+                    // x = cdf(p+1 / len) - cdf(p / len)
+                    if (useFSPD) { posProbs.push_back(std::exp(fragStartLogNumerator)); }
                 } else {
                     aln.logProb = LOG_0;
+
                 }
             }
 
@@ -562,27 +396,19 @@ void processMiniBatch(
                 ++localNumAssignedFragments;
             }
 
-            if (numInGroup > 0){
-                avgLogBias = avgLogBias - std::log(numInGroup);
-            }
-
             // EQCLASS
-            TranscriptGroup tg(txpIDs, txpIDsHash);
             double auxProbSum{0.0};
             for (auto& p : auxProbs) {
-                //p -= auxDenom;
                 p = std::exp(p - auxDenom);
                 auxProbSum += p;
             }
-            if (std::abs(auxProbSum - 1.0) > 0.01) {
-                std::cerr << "weights had sum of " << auxProbSum
-                          << " but it should be 1!!\n\n";
+            if (txpIDs.size() > 0) {
+               TranscriptGroup tg(txpIDs);
+               eqBuilder.addGroup(std::move(tg), auxProbs, posProbs);
             }
-            eqBuilder.addGroup(std::move(tg), auxProbs);
-
 
             // normalize the hits
-            for (auto& aln : alnGroup->alignments()) {
+            for (auto& aln : alnGroup.alignments()) {
                 if (std::abs(aln.logProb) == LOG_0) { continue; }
                 // Normalize the log-probability of this alignment
                 aln.logProb -= sumOfAlignProbs;
@@ -592,14 +418,7 @@ void processMiniBatch(
 
                 // Add the new mass to this transcript
                 double newMass = logForgettingMass + aln.logProb;
-
-                // If this is not the initial round, and we need to
-                // add "banked" mass for this hit, do it now.
-                if (useMassBanking and transcript.uniqueCount() > 0) {
-                    newMass = salmon::math::logAdd(newMass, hitInfo[transcriptID].newUniqueMass);
-                }
                 transcript.addMass( newMass );
-                transcript.addBias( aln.logBias );
 
                 double r = uni(randEng);
                 if (!burnedIn and r < std::exp(aln.logProb)) {
@@ -611,29 +430,14 @@ void processMiniBatch(
                     }
                     if (useFSPD) {
                         auto hitPos = aln.hitPos();
-                        auto& fragStartDist =
-                            fragStartDists[transcript.lengthClassIndex()];
+                        auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
                         fragStartDist.addVal(hitPos,
                                              transcript.RefLength,
                                              logForgettingMass);
                     }
-                    if (useSequenceBiasModel) {
-                        if (fragLength > 0.0) {
-                            int32_t leftPos = aln.hitPos();
-                            int32_t rightPos = leftPos + fragLength;
-                            biasModel.update(transcript, leftPos, rightPos,
-                                             aln.libFormat(), logForgettingMass, LOG_1);
-                        } else {
-                            int32_t hitPos = aln.hitPos();
-                            biasModel.update(transcript, hitPos,
-                                             aln.libFormat(),
-                                             logForgettingMass, LOG_1);
-                        }
-                    }
                 }
             } // end normalize
 
-            double avgBias = std::exp(avgLogBias);
             // update the single target transcript
             if (transcriptUnique) {
                 if (updateCounts) {
@@ -644,9 +448,9 @@ void processMiniBatch(
                         1.0,
                         logForgettingMass, updateCounts);
             } else { // or the appropriate clusters
-                clusterForest.mergeClusters<SMEMAlignment>(alnGroup->alignments().begin(), alnGroup->alignments().end());
+                clusterForest.mergeClusters<AlnT>(alnGroup.alignments().begin(), alnGroup.alignments().end());
                 clusterForest.updateCluster(
-                        alnGroup->alignments().front().transcriptID(),
+                        alnGroup.alignments().front().transcriptID(),
                         1.0,
                         logForgettingMass, updateCounts);
             }
@@ -654,45 +458,13 @@ void processMiniBatch(
             } // end read group
         }// end timer
 
-        double individualTotal = LOG_0;
-        {
-            /*
-            // M-step
-            double totalMass{0.0};
-            for (auto kv = hitsForTranscript.begin(); kv != hitsForTranscript.end(); ++kv) {
-                auto transcriptID = kv->first;
-                // The target must be a valid transcript
-                if (transcriptID >= numTranscripts or transcriptID < 0) {std::cerr << "index " << transcriptID << " out of bounds\n"; }
-
-                auto& transcript = transcripts[transcriptID];
-
-                // The prior probability
-                double hitMass{LOG_0};
-
-                // The set of alignments that match transcriptID
-                auto& hits = kv->second;
-                std::for_each(hits.begin(), hits.end(), [&](SMEMAlignment* aln) -> void {
-                        if (!std::isfinite(aln->logProb)) { std::cerr << "hitMass = " << aln->logProb << "\n"; }
-                        hitMass = logAdd(hitMass, aln->logProb);
-                        });
-
-                double updateMass = logForgettingMass + hitMass;
-                individualTotal = logAdd(individualTotal, updateMass);
-                totalMass = logAdd(totalMass, updateMass);
-                transcript.addMass(updateMass);
-            } // end for
-            */
-        } // end timer
-
-        if (zeroProbFrags > 0) {
+	if (zeroProbFrags > 0) {
             log->warn("Minibatch contained {} "
                       "0 probability fragments", zeroProbFrags);
         }
 
         numAssignedFragments += localNumAssignedFragments;
         if (numAssignedFragments >= numBurninFrags and !burnedIn) {
-            burnedIn = true;
-            for (auto& t : transcripts) {  t.updateEffectiveLength(fragLengthDist); }
             if (useFSPD) {
                 // update all of the fragment start position
                 // distributions
@@ -700,1115 +472,94 @@ void processMiniBatch(
                     fspd.update();
                 }
             }
+            // NOTE: only one thread should succeed here, and that
+            // thread will set burnedIn to true.
+            readExp.updateTranscriptLengthsAtomic(burnedIn);
         }
         if (initialRound) {
             readLib.updateLibTypeCounts(libTypeCounts);
         }
 }
 
-uint32_t basesCovered(std::vector<uint32_t>& kmerHits) {
-    std::sort(kmerHits.begin(), kmerHits.end());
-    uint32_t covered{0};
-    uint32_t lastHit{0};
-    uint32_t kl{20};
-    for (auto h : kmerHits) {
-        covered += std::min(h - lastHit, kl);
-        lastHit = h;
-    }
-    return covered;
-}
-
-uint32_t basesCovered(std::vector<uint32_t>& posLeft, std::vector<uint32_t>& posRight) {
-    return basesCovered(posLeft) + basesCovered(posRight);
-}
-
-class KmerVote {
-    public:
-        KmerVote(int32_t vp, uint32_t rp, uint32_t vl) : votePos(vp), readPos(rp), voteLen(vl) {}
-        int32_t votePos{0};
-        uint32_t readPos{0};
-        uint32_t voteLen{0};
-        /*
-        std::string str(){
-            return "<" + votePos  + ", "  + readPos  + ", "  + voteLen + ">";
-        }
-        */
-};
-class MatchFragment {
-    public:
-        MatchFragment(uint32_t refStart_, uint32_t queryStart_, uint32_t length_) :
-            refStart(refStart_), queryStart(queryStart_), length(length_) {}
-
-        uint32_t refStart, queryStart, length;
-        uint32_t weight;
-        double score;
-};
-
-bool precedes(const MatchFragment& a, const MatchFragment& b) {
-    return (a.refStart + a.length) < b.refStart and
-           (a.queryStart + a.length) < b.queryStart;
-}
-
-
-class TranscriptHitList {
-    public:
-        int32_t bestHitPos{0};
-        uint32_t bestHitCount{0};
-        double bestHitScore{0.0};
-
-        std::vector<KmerVote> votes;
-        std::vector<KmerVote> rcVotes;
-
-        uint32_t targetID;
-
-        bool isForward_{true};
-
-        void addFragMatch(uint32_t tpos, uint32_t readPos, uint32_t voteLen) {
-            int32_t votePos = static_cast<int32_t>(tpos) - static_cast<int32_t>(readPos);
-            votes.emplace_back(votePos, readPos, voteLen);
-        }
-
-        void addFragMatchRC(uint32_t tpos, uint32_t readPos, uint32_t voteLen, uint32_t readLen) {
-            //int32_t votePos = static_cast<int32_t>(tpos) - (readPos) + voteLen;
-            int32_t votePos = static_cast<int32_t>(tpos) - (readLen - readPos);
-            rcVotes.emplace_back(votePos, readPos, voteLen);
-        }
-
-        uint32_t totalNumHits() { return std::max(votes.size(), rcVotes.size()); }
-
-        bool computeBestLoc_(std::vector<KmerVote>& sVotes, Transcript& transcript,
-                             std::string& read, bool isRC,
-                             int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
-            // Did we update the highest-scoring cluster? This will be set to
-            // true iff we have a cluster of a higher score than the score
-            // currently given in maxClusterCount.
-            bool updatedMaxScore{false};
-
-            if (sVotes.size() == 0) { return updatedMaxScore; }
-
-            struct VoteInfo {
-                uint32_t coverage = 0;
-                int32_t rightmostBase = 0;
-            };
-
-            uint32_t readLen = read.length();
-
-            boost::container::flat_map<uint32_t, VoteInfo> hitMap;
-            int32_t currClust{static_cast<int32_t>(sVotes.front().votePos)};
-
-            for (size_t j = 0; j < sVotes.size(); ++j) {
-
-                int32_t votePos = sVotes[j].votePos;
-                uint32_t readPos = sVotes[j].readPos;
-                uint32_t voteLen = sVotes[j].voteLen;
-
-                if (votePos >= currClust) {
-                    if (votePos - currClust > 10) {
-                        currClust = votePos;
-                    }
-                    auto& hmEntry = hitMap[currClust];
-
-                    hmEntry.coverage += std::min(voteLen, (votePos + readPos + voteLen) - hmEntry.rightmostBase);
-                    hmEntry.rightmostBase = votePos + readPos + voteLen;
-                } else if (votePos < currClust) {
-                    std::cerr << "Should not have votePos = " << votePos << " <  currClust = " << currClust << "\n";
-                    std::exit(1);
-                }
-
-                if (hitMap[currClust].coverage > maxClusterCount) {
-                    maxClusterCount = hitMap[currClust].coverage;
-                    maxClusterPos = currClust;
-                    maxClusterScore = maxClusterCount / static_cast<double>(readLen);
-                    updatedMaxScore = true;
-                }
-
-            }
-            return updatedMaxScore;
-        }
-
-        bool computeBestLoc2_(std::vector<KmerVote>& sVotes, uint32_t tlen,
-                              int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
-
-            bool updatedMaxScore{false};
-
-            if (sVotes.size() == 0) { return updatedMaxScore; }
-
-            double weights[] = { 1.0, 0.983471453822, 0.935506985032,
-                0.860707976425, 0.765928338365, 0.6592406302, 0.548811636094,
-                0.441902209585, 0.344153786865, 0.259240260646,
-                0.188875602838};
-
-            uint32_t maxGap = 4;
-            uint32_t leftmost = (sVotes.front().votePos > maxGap) ? (sVotes.front().votePos - maxGap) : 0;
-            uint32_t rightmost = std::min(sVotes.back().votePos + maxGap, tlen);
-
-            uint32_t span = (rightmost - leftmost);
-            std::vector<double> probAln(span, 0.0);
-            double kwidth = 1.0 / (2.0 * maxGap);
-
-            size_t nvotes = sVotes.size();
-            for (size_t j = 0; j < nvotes; ++j) {
-                uint32_t votePos = sVotes[j].votePos;
-                uint32_t voteLen = sVotes[j].voteLen;
 
-                auto x = j + 1;
-                while (x < nvotes and sVotes[x].votePos == votePos) {
-                    voteLen += sVotes[x].voteLen;
-                    j += 1;
-                    x += 1;
-                }
-
-
-                uint32_t dist{0};
-                size_t start = (votePos >= maxGap) ? (votePos - maxGap - leftmost) : (votePos - leftmost);
-                size_t mid = votePos - leftmost;
-                size_t end = std::min(votePos + maxGap - leftmost, rightmost - leftmost);
-                for (size_t k = start; k < end; k += 1) {
-                    dist = (mid > k) ? mid - k : k - mid;
-                    probAln[k] += weights[dist] * voteLen;
-                    if (probAln[k] > maxClusterScore) {
-                        maxClusterScore = probAln[k];
-                        maxClusterPos = k + leftmost;
-                        updatedMaxScore = true;
-                    }
-                }
-            }
-
-            return updatedMaxScore;
-        }
-
-
-        inline uint32_t numSampledHits_(Transcript& transcript, std::string& readIn,
-                                        int32_t votePos, int32_t posInRead, int32_t voteLen, bool isRC, uint32_t numTries) {
-
-
-            // The read starts at this position in the transcript (may be negative!)
-            int32_t readStart = votePos;
-            // The (uncorrected) length of the read
-            int32_t readLen = readIn.length();
-            // Pointer to the sequence of the read
-            const char* read = readIn.c_str();
-            // Don't mess around with unsigned arithmetic here
-            int32_t tlen = transcript.RefLength;
-
-            // If the read starts before the first base of the transcript,
-            // trim off the initial overhang  and correct the other variables
-            if (readStart < 0) {
-                if (isRC) {
-                    uint32_t correction = -readStart;
-                    //std::cerr << "readLen = " << readLen << ", posInRead = " << posInRead << ", voteLen = " << voteLen << ", correction = " << correction << "\n";
-                    //std::cerr << "tlen = " << tlen << ", votePos = " << votePos << "\n";
-                    read += correction;
-                    readLen -= correction;
-                    posInRead -= correction;
-                    readStart = 0;
-                } else {
-                    uint32_t correction = -readStart;
-                    read += correction;
-                    readLen -= correction;
-                    posInRead -= correction;
-                    readStart = 0;
-                }
-            }
-            // If the read hangs off the end of the transcript,
-            // shorten its effective length.
-            if (readStart + readLen >= tlen) {
-                if (isRC) {
-                    uint32_t correction = (readStart + readLen) - transcript.RefLength + 1;
-                    //std::cerr << "Trimming RC hit: correction = " << correction << "\n";
-                    //std::cerr << "untrimmed read : "  << read << "\n";
-                    read += correction;
-                    readLen -= correction;
-                    if (voteLen > readLen) { voteLen = readLen; }
-                    posInRead = 0;
-                } else {
-                    readLen = tlen - (readStart + 1);
-                    voteLen = std::max(voteLen, readLen - (posInRead + voteLen));
-                }
-            }
-            // Finally, clip any reverse complement reads starting at 0
-            if (isRC) {
-
-                if (voteLen > readStart) {
-                    readLen -= (readLen - (posInRead + voteLen));
-                }
+/// START QUASI
 
-            }
-
-            // If the read is too short, it's not useful
-            if (readLen <= 15) { return 0; }
-            // The step between sample centers (given the number of samples we're going to take)
-            double step = (readLen - 1) / static_cast<double>(numTries-1);
-            // The strand of the transcript from which we'll extract sequence
-            auto dir = (isRC) ? salmon::stringtools::strand::reverse :
-                                salmon::stringtools::strand::forward;
-
-            bool superVerbose{false};
-
-            if (superVerbose) {
-                std::stringstream ss;
-                ss << "Supposed hit " << (isRC ? "RC" : "") << "\n";
-                ss << "info: votePos = " << votePos << ", posInRead = " << posInRead
-                    << ", voteLen = " << voteLen << ", readLen = " << readLen
-                    << ", tran len = " << tlen << ", step = " << step << "\n";
-                if (readStart + readLen > tlen ) {
-                    ss << "ERROR!!!\n";
-                    std::cerr << "[[" << ss.str() << "]]";
-                    std::exit(1);
-                }
-                ss << "Transcript name = " << transcript.RefName << "\n";
-                ss << "T : ";
-                try {
-                    for ( size_t j = 0; j < readLen; ++j) {
-                        if (isRC) {
-                            if (j == posInRead) {
-                                char red[] = "\x1b[30m";
-                                red[3] = '0' + static_cast<char>(fmt::RED);
-                                ss << red;
-                            }
-
-                            if (j == posInRead + voteLen) {
-                                const char RESET_COLOR[] = "\x1b[0m";
-                                ss << RESET_COLOR;
-                            }
-                            ss << transcript.charBaseAt(readStart+readLen-j,dir);
-                        } else {
-                            if (j == posInRead ) {
-                                char red[] = "\x1b[30m";
-                                red[3] = '0' + static_cast<char>(fmt::RED);
-                                ss << red;
-                            }
-
-                            if (j == posInRead + voteLen) {
-                                const char RESET_COLOR[] = "\x1b[0m";
-                                ss << RESET_COLOR;
-                            }
-
-                            ss << transcript.charBaseAt(readStart+j);
-                        }
-                    }
-                    ss << "\n";
-                    char red[] = "\x1b[30m";
-                    red[3] = '0' + static_cast<char>(fmt::RED);
-                    const char RESET_COLOR[] = "\x1b[0m";
-
-                    ss << "R : " << std::string(read, posInRead) << red << std::string(read + posInRead, voteLen) << RESET_COLOR;
-                    if (readLen > posInRead + voteLen) { ss << std::string(read + posInRead + voteLen); }
-                    ss << "\n\n";
-                } catch (std::exception& e) {
-                    std::cerr << "EXCEPTION !!!!!! " << e.what() << "\n";
-                }
-                std::cerr << ss.str() << "\n";
-                ss.clear();
-            }
-
-            // The index of the current sample within the read
-            int32_t readIndex = 0;
-
-            // The number of loci in the subvotes and their
-            // offset patternns
-            size_t lpos = 3;
-            int leftPattern[] = {-4, -2, 0};
-            int rightPattern[] = {0, 2, 4};
-            int centerPattern[] = {-4, 0, 4};
-
-            // The number of subvote hits we've had
-            uint32_t numHits = 0;
-            // Take the samples
-            for (size_t i  = 0; i < numTries; ++i) {
-                // The sample will be centered around this point
-                readIndex = static_cast<uint32_t>(std::round(readStart + i * step)) - readStart;
-
-                // The number of successful sub-ovtes we have
-                uint32_t subHit = 0;
-                // Select the center sub-vote pattern, unless we're near the end of a read
-                int* pattern = &centerPattern[0];
-                if (readIndex + pattern[0] < 0) {
-                    pattern = &rightPattern[0];
-                } else if (readIndex + pattern[lpos-1] >= readLen) {
-                    pattern = &leftPattern[0];
-                }
-
-                // collect the subvotes
-                for (size_t j = 0; j < lpos; ++j) {
-                    // the pattern offset
-                    int offset = pattern[j];
-                    // and sample position it implies within the read
-                    int readPos = readIndex + offset;
-
-                    if (readStart + readPos >= tlen) {
-                        std::cerr  << "offset = " << offset << ", readPos = " << readPos << ", readStart = " << readStart << ", readStart + readPos = " << readStart + readPos << ", tlen = " << transcript.RefLength << "\n";
-                    }
-
-                    subHit += (isRC) ?
-                        (transcript.charBaseAt(readStart + readLen - readPos, dir) == salmon::stringtools::charCanon[read[readPos]]) :
-                        (transcript.charBaseAt(readStart + readPos               ) == salmon::stringtools::charCanon[read[readPos]]);
-                }
-                // if the entire subvote was successful, this is a hit
-                numHits += (subHit == lpos);
-            }
-            // return the number of hits we had
-            return numHits;
-        }
-
-
-
-        bool computeBestLoc3_(std::vector<KmerVote>& sVotes, Transcript& transcript,
-                              std::string& read, bool isRC,
-                              int32_t& maxClusterPos, uint32_t& maxClusterCount, double& maxClusterScore) {
-
-            bool updatedMaxScore{false};
-
-            if (sVotes.size() == 0) { return updatedMaxScore; }
-
-            struct LocHitCount {
-                int32_t loc;
-                uint32_t nhits;
-            };
-
-            uint32_t numSamp = 15;
-            std::vector<LocHitCount> hitCounts;
-            size_t nvotes = sVotes.size();
-            int32_t prevPos = -std::numeric_limits<int32_t>::max();
-            for (size_t j = 0; j < nvotes; ++j) {
-                int32_t votePos = sVotes[j].votePos;
-                int32_t posInRead = sVotes[j].readPos;
-                int32_t voteLen = sVotes[j].voteLen;
-                if (prevPos == votePos) { continue; }
-                auto numHits = numSampledHits_(transcript, read, votePos, posInRead, voteLen, isRC, numSamp);
-                hitCounts.push_back({votePos, numHits});
-                prevPos = votePos;
-            }
-
-            uint32_t maxGap = 8;
-            uint32_t hitIdx = 0;
-            uint32_t accumHits = 0;
-            int32_t hitLoc = hitCounts[hitIdx].loc;
-            while (hitIdx < hitCounts.size()) {
-                uint32_t idx2 = hitIdx;
-                while (idx2 < hitCounts.size() and std::abs(hitCounts[idx2].loc - hitLoc) <= maxGap) {
-                    accumHits += hitCounts[idx2].nhits;
-                    ++idx2;
-                }
-
-                double score = static_cast<double>(accumHits) / numSamp;
-                if (score > maxClusterScore) {
-                    maxClusterCount = accumHits;
-                    maxClusterScore = score;
-                    maxClusterPos = hitCounts[hitIdx].loc;
-                    updatedMaxScore = true;
-                }
-                accumHits = 0;
-                ++hitIdx;
-                hitLoc = hitCounts[hitIdx].loc;
-            }
-
-            return updatedMaxScore;
-        }
-
-
-        bool computeBestChain(Transcript& transcript, std::string& read) {
-            std::sort(votes.begin(), votes.end(),
-                    [](const KmerVote& v1, const KmerVote& v2) -> bool {
-                        if (v1.votePos == v2.votePos) {
-                            return v1.readPos < v2.readPos;
-                        }
-                        return v1.votePos < v2.votePos;
-                    });
-
-            std::sort(rcVotes.begin(), rcVotes.end(),
-                    [](const KmerVote& v1, const KmerVote& v2) -> bool {
-                        if (v1.votePos == v2.votePos) {
-                            return v1.readPos < v2.readPos;
-                        }
-                        return v1.votePos < v2.votePos;
-                    });
-
-            int32_t maxClusterPos{0};
-            uint32_t maxClusterCount{0};
-            double maxClusterScore{0.0};
-
-            // we don't need the return value from the first call
-            static_cast<void>(computeBestLoc_(votes, transcript, read, false, maxClusterPos, maxClusterCount, maxClusterScore));
-            bool revIsBest = computeBestLoc_(rcVotes, transcript, read, true, maxClusterPos, maxClusterCount, maxClusterScore);
-            isForward_ = not revIsBest;
-
-            bestHitPos = maxClusterPos;
-            bestHitCount = maxClusterCount;
-            bestHitScore = maxClusterScore;
-            return true;
-        }
-
-        bool isForward() { return isForward_; }
-
-};
-
-template <typename CoverageCalculator>
-inline void collectHitsForRead(const bwaidx_t *idx, const bwtintv_v* a, smem_aux_t* auxHits,
-                        mem_opt_t* memOptions, const SalmonOpts& salmonOpts, const uint8_t* read, uint32_t readLen,
-                        std::unordered_map<uint64_t, CoverageCalculator>& hits) {
-
-    mem_collect_intv(salmonOpts, memOptions, idx->bwt, readLen, read, auxHits);
-
-    // For each MEM
-    for (int i = 0; i < auxHits->mem.n; ++i ) {
-        // A pointer to the interval of the MEMs occurences
-        bwtintv_t* p = &auxHits->mem.a[i];
-        // The start and end positions in the query string (i.e. read) of the MEM
-        int qstart = p->info>>32;
-        uint32_t qend = static_cast<uint32_t>(p->info);
-        int step, count, slen = (qend - qstart); // seed length
-
-        int64_t k;
-        step = p->x[2] > memOptions->max_occ? p->x[2] / memOptions->max_occ : 1;
-        // For every occurrence of the MEM
-        for (k = count = 0; k < p->x[2] && count < memOptions->max_occ; k += step, ++count) {
-            bwtint_t pos;
-            bwtint_t startPos, endPos;
-            int len, isRev, isRevStart, isRevEnd, refID, refIDStart, refIDEnd;
-            int queryStart = qstart;
-            len = slen;
-            uint32_t rlen = readLen;
-
-            // Get the position in the reference index of this MEM occurrence
-            int64_t refStart = bwt_sa(idx->bwt, p->x[0] + k);
-
-            pos = startPos = bns_depos(idx->bns, refStart, &isRevStart);
-            endPos = bns_depos(idx->bns, refStart + slen - 1, &isRevEnd);
-            // If we span the forward/reverse boundary, discard the hit
-            if (isRevStart != isRevEnd) {
-                continue;
-            }
-            // Otherwise, isRevStart = isRevEnd so just assign isRev = isRevStart
-            isRev = isRevStart;
-
-            // If the hit is reversed --- swap the start and end
-            if (isRev) {
-                if (endPos > startPos) {
-                    salmonOpts.jointLog->warn("Hit is supposedly reversed, "
-                                              "but startPos = {} < endPos = {}",
-                                              startPos, endPos);
-                }
-                auto temp = startPos;
-                startPos = endPos;
-                endPos = temp;
-            }
-            // Get the ID of the reference sequence in which it occurs
-            refID = refIDStart = bns_pos2rid(idx->bns, startPos);
-            refIDEnd = bns_pos2rid(idx->bns, endPos);
-
-            if (refID < 0) { continue; } // bridging multiple reference sequences or the forward-reverse boundary;
-
-            auto tlen = idx->bns->anns[refID].len;
-
-            // The refence sequence-relative (e.g. transcript-relative) position of the MEM
-            long hitLoc = static_cast<long>(isRev ? endPos : startPos) - idx->bns->anns[refID].offset;
-
-            if ((refIDStart != refIDEnd)) {
-                // If a seed spans two transcripts
-
-                // If we're not considering splitting such seeds, then
-                // just discard this seed and continue.
-                if (not salmonOpts.splitSpanningSeeds) { continue; }
-
-                //std::cerr << "Seed spans two transcripts! --- attempting to split: \n";
-                if (!isRev) {
-                    // If it's going forward, we have a situation like this
-                    // packed transcripts: t1 ===========|t2|==========>
-                    // hit:                          |==========>
-
-                    // length of hit in t1
-                    auto len1 = tlen - hitLoc;
-                    // length of hit in t2
-                    auto len2 = slen - len1;
-                    if (std::max(len1, len2) < memOptions->min_seed_len) { continue; }
-
-                    /** Keeping this here for now in case I need to debug splitting seeds again
-                    std::cerr << "\t hit is in the forward direction: ";
-                    std::cerr << "t1 part has length " << len1 << ", t2 part has length " << len2 << "\n";
-                    */
-
-                    // If the part in t1 is larger then just cut off the rest
-                    if (len1 >= len2) {
-                        slen = len1;
-                        int32_t votePos = static_cast<int32_t>(hitLoc) - queryStart;
-                        //std::cerr << "\t\t t1 (of length " << tlen << ") has larger hit --- new hit length = " << len1 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
-                    } else {
-                        // Otherwise, make the hit be in t2.
-                        // Because the hit spans the boundary where t2 begins,
-                        // the new seed begins matching at position 0 of
-                        // transcript t2
-                        hitLoc = 0;
-                        slen = len2;
-                        // The seed originally started at position q, now it starts  len1 characters to the  right of that
-                        queryStart += len1;
-                        refID = refIDEnd;
-                        int32_t votePos = static_cast<int32_t>(hitLoc) - queryStart;
-                        tlen = idx->bns->anns[refID].len;
-                        //std::cerr << "\t\t t2 (of length " << tlen << ") has larger hit --- new hit length = " << len2 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
-                    }
-                } else {
-
-                    // If it's going in the reverse direction, we have a situation like this
-                    // packed transcripts: t1 <===========|t2|<==========
-                    // hit:                          X======Y>======Z>
-                    // Which means we have
-                    // packed transcripts: t1 <===========|t2|<==========
-                    // hit:                          <Z=====Y<======X
-                    // length of hit in t1
-
-                    auto len2 = endPos - idx->bns->anns[refIDEnd].offset;
-                    auto len1 = slen - len2;
-                    if (std::max(len1, len2) < memOptions->min_seed_len) { continue; }
-
-                    /** Keeping this here for now in case I need to debug splitting seeds again
-                    std::cerr << "\t hit is in the reverse direction: ";
-                    std::cerr << "\n\n";
-                    std::cerr << "startPos = " << startPos << ", endPos = " << endPos << ", offset[refIDStart] = "
-                              <<  idx->bns->anns[refIDStart].offset << ", offset[refIDEnd] = " << idx->bns->anns[refIDEnd].offset << "\n";
-                    std::cerr << "\n\n";
-                    std::cerr << "t1 part has length " << len1 << ", t2 part has length " << len2 << "\n\n";
-                    */
-
-                    if (len1 >= len2) {
-                        slen = len1;
-                        hitLoc = tlen - len2;
-                        queryStart += len2;
-                        rlen -= len2;
-                        int32_t votePos = static_cast<int32_t>(hitLoc) - (rlen - queryStart);
-                        //std::cerr << "\t\t t1 (hitLoc: " << hitLoc << ") (of length " << tlen << ") has larger hit --- new hit length = " << len1 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
-                    } else {
-                        slen = len2;
-                        refID = bns_pos2rid(idx->bns, endPos);
-                        tlen = idx->bns->anns[refID].len;
-                        hitLoc = len2;
-                        rlen = hitLoc + queryStart;
-                        int32_t votePos = static_cast<int32_t>(hitLoc) - (rlen - queryStart);
-                        //std::cerr << "\t\t t2 (of length " << tlen << ") (hitLoc: " << hitLoc << ") has larger hit --- new hit length = " << len2 << "; starts at pos " << queryStart << " in the read (votePos will be " << votePos << ")\n";
-                    }
-                }
-
-            }
-
-            if (isRev) {
-                hits[refID].addFragMatchRC(hitLoc, queryStart , slen, rlen);
-            } else {
-                hits[refID].addFragMatch(hitLoc, queryStart, slen);
-            }
-        } // for k
-    }
-}
-
-inline bool consistentNames(header_sequence_qual& r) {
-    return true;
-}
-
-bool consistentNames(std::pair<header_sequence_qual, header_sequence_qual>& rp) {
-        auto l1 = rp.first.header.length();
-        auto l2 = rp.second.header.length();
-        char* sptr = static_cast<char*>(memchr(&rp.first.header[0], ' ', l1));
-
-        bool compat = false;
-        // If we didn't find a space in the name of read1
-        if (sptr == NULL) {
-            if (l1 > 1) {
-                compat = (l1 == l2);
-                compat = compat and (memcmp(&rp.first.header[0], &rp.second.header[0], l1-1) == 0);
-                compat = compat and ((rp.first.header[l1-1] == '1' and rp.second.header[l2-1] == '2')
-                                or   (rp.first.header[l1-1] == rp.second.header[l2-1]));
-            } else {
-                compat = (l1 == l2);
-                compat = compat and (rp.first.header[0] == rp.second.header[0]);
-            }
-        } else {
-            size_t offset = sptr - (&rp.first.header[0]);
-
-            // If read2 matches read1 up to and including the space
-            if (offset + 1 < l2) {
-                compat = memcmp(&rp.first.header[0], &rp.second.header[0], offset) == 0;
-                // and after the space, read1 and read2 have an identical character or
-                // read1 has a '1' and read2 has a '2', then this is a consistent pair.
-                compat = compat and ((rp.first.header[offset+1] == rp.second.header[offset+1])
-                                or   (rp.first.header[offset+1] == '1' and rp.second.header[offset+1] == '2'));
-            } else {
-                compat = false;
-            }
-        }
-        return compat;
-}
-
-/**
- *  Returns true if the @hit is within @cutoff bases of the end of
- *  transcript @txp and false otherwise.
- */
-template <typename CoverageCalculator>
-inline bool nearEndOfTranscript(
-            CoverageCalculator& hit,
-            Transcript& txp,
-            int32_t cutoff=std::numeric_limits<int32_t>::max()) {
-	// check if hit appears close to the end of the given transcript
-	auto hitPos = hit.bestHitPos;
-	return (hitPos < cutoff or
-            std::abs(static_cast<int32_t>(txp.RefLength) - hitPos) < cutoff);
-
-}
-
-template <typename CoverageCalculator>
-void getHitsForFragment(std::pair<header_sequence_qual, header_sequence_qual>& frag,
-                        bwaidx_t *idx,
-                        smem_i *itr,
-                        const bwtintv_v *a,
-                        smem_aux_t* auxHits,
-                        mem_opt_t* memOptions,
-                        const SalmonOpts& salmonOpts,
-                        double coverageThresh,
-                        uint64_t& upperBoundHits,
-                        AlignmentGroup<SMEMAlignment>& hitList,
-                        uint64_t& hitListCount,
-                        std::vector<Transcript>& transcripts) {
-
-    std::unordered_map<uint64_t, CoverageCalculator> leftHits;
-
-    std::unordered_map<uint64_t, CoverageCalculator> rightHits;
-
-    uint32_t leftReadLength{0};
-    uint32_t rightReadLength{0};
-
-    /**
-    * As soon as we can decide on an acceptable way to validate read names,
-    * we'll inform the user and quit if we see something inconsistent.  However,
-    * we first need a reasonable way to verify potential naming formats from
-    * many different sources.
-    */
-    /*
-    if (!consistentNames(frag)) {
-        fmt::MemoryWriter errstream;
-
-        errstream << "Inconsistent paired-end reads!\n";
-        errstream << "mate1 : " << frag.first.header << "\n";
-        errstream << "mate2 : " << frag.second.header << "\n";
-        errstream << "Paired-end reads should appear consistently in their respective files.\n";
-        errstream << "Please fix the paire-end input before quantifying with salmon; exiting.\n";
-
-        std::cerr << errstream.str();
-        std::exit(-1);
-    }
-    */
-
-    //---------- End 1 ----------------------//
-    {
-        std::string readStr   = frag.first.seq;
-        uint32_t readLen      = readStr.size();
-
-        leftReadLength = readLen;
-
-        for (int p = 0; p < readLen; ++p) {
-            readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
-        }
-
-        collectHitsForRead(idx, a, auxHits,
-                            memOptions,
-                            salmonOpts,
-                            reinterpret_cast<const uint8_t*>(readStr.c_str()),
-                            readLen,
-                            leftHits);
-    }
-
-    //---------- End 2 ----------------------//
-    {
-        std::string readStr   = frag.second.seq;
-        uint32_t readLen      = readStr.size();
-
-        rightReadLength = readLen;
-
-        for (int p = 0; p < readLen; ++p) {
-            readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
-        }
-
-        collectHitsForRead(idx, a, auxHits,
-                            memOptions,
-                            salmonOpts,
-                            reinterpret_cast<const uint8_t*>(readStr.c_str()),
-                            readLen,
-                            rightHits);
-     } // end right
-
-    upperBoundHits += (leftHits.size() + rightHits.size() > 0) ? 1 : 0;
-    size_t readHits{0};
-    auto& alnList = hitList.alignments();
-    hitList.isUniquelyMapped() = true;
-    alnList.clear();
-
-    //std::cerr << "leftHits.size() = " << leftHits.size() << ", leftHitsOld.size() = " << leftHitsOld.size() <<
-    //             "rightHits.size() = " << rightHits.size() << ", rightHitsOld.size() = "<< rightHitsOld.size() << "\n";
-
-    double cutoffLeft{ coverageThresh };//* leftReadLength};
-    double cutoffRight{ coverageThresh };//* rightReadLength};
-
-    uint64_t leftHitCount{0};
-
-    // Fraction of the optimal coverage that a lightweight alignment
-    // must obtain in order to be retained.
-    float fOpt{0.9};
-
-    // First, see if there are transcripts where both ends of the
-    // fragments map
-    auto& minHitList = (leftHits.size() < rightHits.size()) ? leftHits : rightHits;
-    auto& maxHitList = (leftHits.size() < rightHits.size()) ? rightHits : leftHits;
-
-    std::vector<uint64_t> jointHits; // haha (variable name)!
-    jointHits.reserve(minHitList.size());
-
-    {
-        auto notFound = maxHitList.end();
-        for (auto& kv : minHitList) {
-            uint64_t refID = kv.first;
-            if (maxHitList.find(refID) != notFound) {
-                jointHits.emplace_back(refID);
-            }
-        }
-    }
-
-    // Check if the fragment generated orphaned
-    // lightweight alignments.
-    bool isOrphan = (jointHits.size() == 0);
-
-    uint32_t firstTranscriptID = std::numeric_limits<uint32_t>::max();
-    double bestScore = -std::numeric_limits<double>::max();
-    bool sortedByTranscript = true;
-    int32_t lastTranscriptId = std::numeric_limits<int32_t>::min();
-
-    if (BOOST_UNLIKELY(isOrphan)) {
-        return;
-        bool foundValidHit{false};
-        // search for a hit on the left
-        for (auto& tHitList : leftHits) {
-            auto transcriptID = tHitList.first;
-            Transcript& t = transcripts[transcriptID];
-            tHitList.second.computeBestChain(t, frag.first.seq);
-            double score = tHitList.second.bestHitScore;
-
-            if (score >= fOpt * bestScore and score >= cutoffLeft) {
-    	    	// make sure orphaned fragment is near the end of the transcript
-	    	    if (!nearEndOfTranscript(tHitList.second, t, 200)) { continue; }
-
-                foundValidHit = true;
-
-        		if (score > bestScore) { bestScore = score; }
-                bool isForward = tHitList.second.isForward();
-                int32_t hitPos = tHitList.second.bestHitPos;
-                auto fmt = salmon::utils::hitType(hitPos, isForward);
-
-                if (leftHitCount == 0) {
-                    firstTranscriptID = transcriptID;
-                } else if (hitList.isUniquelyMapped() and transcriptID != firstTranscriptID) {
-                    hitList.isUniquelyMapped() = false;
-                }
-
-                if (transcriptID  < lastTranscriptId) {
-                    sortedByTranscript = false;
-                }
-
-                alnList.emplace_back(transcriptID, fmt, score, hitPos);
-                readHits += score;
-                ++hitListCount;
-                ++leftHitCount;
-            }
-        }
-
-        //if (!foundValidHit) {
-            // search for a hit on the right
-            for (auto& tHitList : rightHits) {
-                auto transcriptID = tHitList.first;
-                Transcript& t = transcripts[transcriptID];
-                tHitList.second.computeBestChain(t, frag.second.seq);
-                double score = tHitList.second.bestHitScore;
-
-                if (score >= fOpt * bestScore and score >= cutoffRight) {
-        		    // make sure orphaned fragment is near the end of the transcript
-	        	    if (!nearEndOfTranscript(tHitList.second, t, 200)) { continue; }
-
-                    if (score > bestScore) { bestScore = score; }
-                    foundValidHit = true;
-                    bool isForward = tHitList.second.isForward();
-                    int32_t hitPos = tHitList.second.bestHitPos;
-                    auto fmt = salmon::utils::hitType(hitPos, isForward);
-                    if (leftHitCount == 0) {
-                        firstTranscriptID = tHitList.first;
-                    } else if (hitList.isUniquelyMapped() and transcriptID != firstTranscriptID) {
-                        hitList.isUniquelyMapped() = false;
-                    }
-
-                    alnList.emplace_back(transcriptID, fmt, score, hitPos);
-                    readHits += score;
-                    ++hitListCount;
-                    ++leftHitCount;
-                }
-            }
-        //}
-
-        if (alnList.size() > 0) {
-            auto newEnd = std::stable_partition(alnList.begin(), alnList.end(),
-                           [bestScore, fOpt](SMEMAlignment& aln) -> bool {
-                                return aln.score() >= fOpt * bestScore;
-                           });
-            alnList.resize(std::distance(alnList.begin(), newEnd));
-            if (!sortedByTranscript) {
-
-                std::sort(alnList.begin(), alnList.end(),
-                          [](const SMEMAlignment& x, const SMEMAlignment& y) -> bool {
-                           return x.transcriptID() < y.transcriptID();
-                          });
-            }
-        }
-
-    } else {
-        for (auto transcriptID : jointHits) {
-            Transcript& t = transcripts[transcriptID];
-            auto& leftHitList = leftHits[transcriptID];
-            leftHitList.computeBestChain(t, frag.first.seq);
-            if (leftHitList.bestHitScore >= cutoffLeft) {
-                auto& rightHitList = rightHits[transcriptID];
-                rightHitList.computeBestChain(t, frag.second.seq);
-                if (rightHitList.bestHitScore < cutoffRight) { continue; }
-
-                auto end1Start = leftHitList.bestHitPos;
-                auto end2Start = rightHitList.bestHitPos;
-
-                double score = (leftHitList.bestHitScore + rightHitList.bestHitScore) * 0.5;
-                if (score < fOpt * bestScore) { continue; }
-
-                if (score > bestScore) {
-                    bestScore = score;
-                }
-
-                uint32_t fragLength = std::abs(static_cast<int32_t>(end1Start) -
-                                               static_cast<int32_t>(end2Start)) + rightReadLength;
-
-                bool end1IsForward = leftHitList.isForward();
-                bool end2IsForward = rightHitList.isForward();
-
-                uint32_t end1Pos = (end1IsForward) ? leftHitList.bestHitPos : leftHitList.bestHitPos + leftReadLength;
-                uint32_t end2Pos = (end2IsForward) ? rightHitList.bestHitPos : rightHitList.bestHitPos + rightReadLength;
-        		bool canDovetail = false;
-                auto fmt = salmon::utils::hitType(end1Pos, end1IsForward, leftReadLength, end2Pos, end2IsForward, rightReadLength, canDovetail);
-
-                if (readHits == 0) {
-                    firstTranscriptID = transcriptID;
-                } else if (hitList.isUniquelyMapped() and transcriptID != firstTranscriptID) {
-                     hitList.isUniquelyMapped() = false;
-                }
-
-                int32_t minHitPos = std::min(end1Pos, end2Pos);
-                if (transcriptID  < lastTranscriptId) {
-                    sortedByTranscript = false;
-                }
-                alnList.emplace_back(transcriptID, fmt, score, minHitPos, fragLength);
-                ++readHits;
-                ++hitListCount;
-            }
-        } // end for jointHits
-        if (alnList.size() > 0) {
-            auto newEnd = std::stable_partition(alnList.begin(), alnList.end(),
-                           [bestScore, fOpt](SMEMAlignment& aln) -> bool {
-                                return aln.score() >= fOpt * bestScore;
-                           });
-            alnList.resize(std::distance(alnList.begin(), newEnd));
-            if (!sortedByTranscript) {
-                std::sort(alnList.begin(), alnList.end(),
-                          [](const SMEMAlignment& x, const SMEMAlignment& y) -> bool {
-                           return x.transcriptID() < y.transcriptID();
-                          });
-            }
-        }
-    } // end else
-
-    /*
-    for (auto& tHitList : leftHits) {
-        // Coverage score
-        Transcript& t = transcripts[tHitList.first];
-        tHitList.second.computeBestChain(t, frag.first.seq);
-        ++leftHitCount;
-    }
-
-    uint32_t firstTranscriptID = std::numeric_limits<uint32_t>::max();
-
-    for (auto& tHitList : rightHits) {
-
-        auto it = leftHits.find(tHitList.first);
-        // Coverage score
-        if (it != leftHits.end() and it->second.bestHitScore >= cutoffLeft) {
-            Transcript& t = transcripts[tHitList.first];
-            tHitList.second.computeBestChain(t, frag.second.seq);
-            if (tHitList.second.bestHitScore < cutoffRight) { continue; }
-
-            auto end1Start = it->second.bestHitPos;
-            auto end2Start = tHitList.second.bestHitPos;
-
-            double score = (it->second.bestHitScore + tHitList.second.bestHitScore) * 0.5;
-            uint32_t fragLength = std::abs(static_cast<int32_t>(end1Start) -
-                                           static_cast<int32_t>(end2Start)) + rightReadLength;
-
-            bool end1IsForward = it->second.isForward();
-            bool end2IsForward = tHitList.second.isForward();
-
-            uint32_t end1Pos = (end1IsForward) ? it->second.bestHitPos : it->second.bestHitPos + leftReadLength;
-            uint32_t end2Pos = (end2IsForward) ? tHitList.second.bestHitPos : tHitList.second.bestHitPos + rightReadLength;
-            auto fmt = salmon::utils::hitType(end1Pos, end1IsForward, end2Pos, end2IsForward);
-
-            if (readHits == 0) {
-                firstTranscriptID = tHitList.first;
-            } else if (hitList.isUniquelyMapped() and tHitList.first != firstTranscriptID) {
-                hitList.isUniquelyMapped() = false;
-            }
+// To use the parser in the following, we get "jobs" until none is
+// available. A job behaves like a pointer to the type
+// jellyfish::sequence_list (see whole_sequence_parser.hpp).
+template <typename RapMapIndexT>
+void processReadsQuasi(paired_parser* parser,
+               ReadExperiment& readExp,
+               ReadLibrary& rl,
+               AlnGroupVec<SMEMAlignment>& structureVec,
+               std::atomic<uint64_t>& numObservedFragments,
+               std::atomic<uint64_t>& numAssignedFragments,
+               std::atomic<uint64_t>& validHits,
+               std::atomic<uint64_t>& upperBoundHits,
+               RapMapIndexT* idx,
+               std::vector<Transcript>& transcripts,
+               ForgettingMassCalculator& fmCalc,
+               ClusterForest& clusterForest,
+               FragmentLengthDistribution& fragLengthDist,
+               mem_opt_t* memOptions,
+               SalmonOpts& salmonOpts,
+               double coverageThresh,
+	           std::mutex& iomutex,
+               bool initialRound,
+               std::atomic<bool>& burnedIn,
+               volatile bool& writeToCache) {
 
-            alnList.emplace_back(tHitList.first, fmt, score, fragLength);
-            ++readHits;
-            ++hitListCount;
-        }
-    }
-    */
+    	// ERROR
+	salmonOpts.jointLog->error("MEM-mapping cannot be used with the Quasi index --- please report this bug on GitHub");
+	std::exit(1);
 }
 
-/**
-  *   Get hits for single-end fragment
-  *
-  *
-  */
-template <typename CoverageCalculator>
-void getHitsForFragment(jellyfish::header_sequence_qual& frag,
-                        bwaidx_t *idx,
-                        smem_i *itr,
-                        const bwtintv_v *a,
-                        smem_aux_t* auxHits,
-                        mem_opt_t* memOptions,
-                        const SalmonOpts& salmonOpts,
-                        double coverageThresh,
-                        uint64_t& upperBoundHits,
-                        AlignmentGroup<SMEMAlignment>& hitList,
-                        uint64_t& hitListCount,
-                        std::vector<Transcript>& transcripts) {
-
-    uint64_t leftHitCount{0};
-
-    //std::unordered_map<uint64_t, TranscriptHitList> hits;
-    std::unordered_map<uint64_t, CoverageCalculator> hits;
-
-    uint32_t readLength{0};
-
-    //---------- get hits ----------------------//
-    {
-        std::string readStr   = frag.seq;
-        uint32_t readLen      = frag.seq.size();
-
-        readLength = readLen;
-
-        for (int p = 0; p < readLen; ++p) {
-            readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
-        }
-
-        char* readPtr = const_cast<char*>(readStr.c_str());
-
-        collectHitsForRead(idx, a, auxHits,
-                            memOptions,
-                            salmonOpts,
-                            reinterpret_cast<const uint8_t*>(readStr.c_str()),
-                            readLen,
-                            hits);
-
-    }
-
-    upperBoundHits += (hits.size() > 0) ? 1 : 0;
-
-    int32_t lastTranscriptId = std::numeric_limits<int32_t>::min();
-    bool sortedByTranscript{true};
-    double fOpt{0.9};
-    double bestScore = -std::numeric_limits<double>::max();
-
-    size_t readHits{0};
-    auto& alnList = hitList.alignments();
-    hitList.isUniquelyMapped() = true;
-    alnList.clear();
-
-    uint32_t firstTranscriptID = std::numeric_limits<uint32_t>::max();
-    double cutoff{ coverageThresh };//* readLength};
-    for (auto& tHitList : hits) {
-        // Coverage score
-        Transcript& t = transcripts[tHitList.first];
-        tHitList.second.computeBestChain(t, frag.seq);
-        double score = tHitList.second.bestHitScore;
-        // DEBUG -- process ALL HITS
-        //if (true) {
-        if (score >= fOpt * bestScore and tHitList.second.bestHitScore >= cutoff) {
-
-            bool isForward = tHitList.second.isForward();
-            if (score < fOpt * bestScore) { continue; }
-
-        	if (score > bestScore) { bestScore = score; }
-
-            auto hitPos = tHitList.second.bestHitPos;
-            auto fmt = salmon::utils::hitType(hitPos, isForward);
-
-            if (leftHitCount == 0) {
-                firstTranscriptID = tHitList.first;
-            } else if (hitList.isUniquelyMapped() and tHitList.first != firstTranscriptID) {
-                hitList.isUniquelyMapped() = false;
-            }
-
-            auto transcriptID = tHitList.first;
-
-            if (transcriptID  < lastTranscriptId) {
-                sortedByTranscript = false;
-            }
-
-            alnList.emplace_back(transcriptID, fmt, score, hitPos);
-            readHits += score;
-            ++hitListCount;
-            ++leftHitCount;
-        }
-    }
-    if (alnList.size() > 0) {
-        auto newEnd = std::stable_partition(alnList.begin(), alnList.end(),
-                [bestScore, fOpt](SMEMAlignment& aln) -> bool {
-                return aln.score() >= fOpt * bestScore;
-                });
-        alnList.resize(std::distance(alnList.begin(), newEnd));
-        if (!sortedByTranscript) {
-            std::sort(alnList.begin(), alnList.end(),
-                    [](const SMEMAlignment& x, const SMEMAlignment& y) -> bool {
-                     return x.transcriptID() < y.transcriptID();
-                    });
-        }
-    }
-
-
-
+template <typename RapMapIndexT>
+void processReadsQuasi(single_parser* parser,
+               ReadExperiment& readExp,
+               ReadLibrary& rl,
+               AlnGroupVec<SMEMAlignment>& structureVec,
+               std::atomic<uint64_t>& numObservedFragments,
+               std::atomic<uint64_t>& numAssignedFragments,
+               std::atomic<uint64_t>& validHits,
+               std::atomic<uint64_t>& upperBoundHits,
+               RapMapIndexT* sidx,
+               std::vector<Transcript>& transcripts,
+               ForgettingMassCalculator& fmCalc,
+               ClusterForest& clusterForest,
+               FragmentLengthDistribution& fragLengthDist,
+               mem_opt_t* memOptions,
+               SalmonOpts& salmonOpts,
+               double coverageThresh,
+	           std::mutex& iomutex,
+               bool initialRound,
+               std::atomic<bool>& burnedIn,
+               volatile bool& writeToCache) {
+    	// ERROR
+	salmonOpts.jointLog->error("MEM-mapping cannot be used with the Quasi index --- please report this bug on GitHub");
+	std::exit(1);
 }
 
-// To use the parser in the following, we get "jobs" until none is
-// available. A job behaves like a pointer to the type
-// jellyfish::sequence_list (see whole_sequence_parser.hpp).
-template <typename ParserT, typename CoverageCalculator>
-void processReadsMEM(ParserT* parser,
+template <typename RapMapIndexT>
+void processReadsQuasi(paired_parser* parser,
                ReadExperiment& readExp,
                ReadLibrary& rl,
-               AlnGroupQueue& structureCache,
-               AlnGroupQueue& outputGroups,
+               AlnGroupVec<QuasiAlignment>& structureVec,
                std::atomic<uint64_t>& numObservedFragments,
                std::atomic<uint64_t>& numAssignedFragments,
                std::atomic<uint64_t>& validHits,
                std::atomic<uint64_t>& upperBoundHits,
-               bwaidx_t *idx,
+               RapMapIndexT* qidx,
                std::vector<Transcript>& transcripts,
                ForgettingMassCalculator& fmCalc,
                ClusterForest& clusterForest,
                FragmentLengthDistribution& fragLengthDist,
                mem_opt_t* memOptions,
-               const SalmonOpts& salmonOpts,
+               SalmonOpts& salmonOpts,
                double coverageThresh,
 	           std::mutex& iomutex,
                bool initialRound,
-               bool& burnedIn,
+               std::atomic<bool>& burnedIn,
                volatile bool& writeToCache) {
   uint64_t count_fwd = 0, count_bwd = 0;
   // Seed with a real random value, if available
@@ -1817,18 +568,11 @@ void processReadsMEM(ParserT* parser,
   // Create a random uniform distribution
   std::default_random_engine eng(rd());
 
-  std::vector<AlignmentGroup<SMEMAlignment>*> hitLists;
-  //std::vector<std::vector<Alignment>> hitLists;
   uint64_t prevObservedFrags{1};
-  hitLists.resize(miniBatchSize);
-
   uint64_t leftHitCount{0};
   uint64_t hitListCount{0};
 
-  // Super-MEM iterator
-  smem_i *itr = smem_itr_init(idx->bwt);
-  const bwtintv_v *a = nullptr;
-  smem_aux_t* auxHits = smem_aux_init();
+  auto& readBias = readExp.readBias();
 
   auto expectedLibType = rl.format();
 
@@ -1836,44 +580,149 @@ void processReadsMEM(ParserT* parser,
 
   size_t locRead{0};
   uint64_t localUpperBoundHits{0};
+  size_t rangeSize{0};
+  uint64_t  localNumAssignedFragments{0};
+
+  bool tooManyHits{false};
+  size_t maxNumHits{salmonOpts.maxReadOccs};
+  size_t readLen{0};
+  SACollector<RapMapIndexT> hitCollector(qidx);
+  SASearcher<RapMapIndexT> saSearcher(qidx);
+  std::vector<QuasiAlignment> leftHits;
+  std::vector<QuasiAlignment> rightHits;
+  rapmap::utils::HitCounters hctr;
+
   while(true) {
-    typename ParserT::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
+    typename paired_parser::job j(*parser); // Get a job from the parser: a bunch of reads (at most max_read_group)
     if(j.is_empty()) break;           // If got nothing, quit
 
-    hitLists.resize(j->nb_filled);
-    //structureCache.try_dequeue_bulk(hitLists.begin() , j->nb_filled);
+    rangeSize = j->nb_filled;
+    if (rangeSize > structureVec.size()) {
+        salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} --- this shouldn't happen.\n"
+                                   "Please report this bug on GitHub", rangeSize, structureVec.size());
+        std::exit(1);
+    }
 
     for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read in this batch
+        readLen = j->data[i].first.seq.length();
+        tooManyHits = false;
         localUpperBoundHits = 0;
-        //hitLists[i].setRead(&j->data[i]);
+        auto& jointHitGroup = structureVec[i];
+        jointHitGroup.clearAlignments();
+        auto& jointHits = jointHitGroup.alignments();
+        leftHits.clear();
+        rightHits.clear();
+
+        bool lh = hitCollector(j->data[i].first.seq,
+                               leftHits, saSearcher,
+                               MateStatus::PAIRED_END_LEFT,
+                               true);
+        bool rh = hitCollector(j->data[i].second.seq,
+                               rightHits, saSearcher,
+                               MateStatus::PAIRED_END_RIGHT,
+                               true);
+
+        rapmap::utils::mergeLeftRightHits(
+                               leftHits, rightHits, jointHits,
+                               readLen, maxNumHits, tooManyHits, hctr);
 
-#if defined(__MOODYCAMEL__)
-        // Moody camel
-        while (!structureCache.try_dequeue(hitLists[i])) {}
-#else
-        // TBB
-        while (!structureCache.try_pop(hitLists[i])) {}
-#endif
-        auto& hitList = *(hitLists[i]);
-
-        getHitsForFragment<CoverageCalculator>(j->data[i], idx, itr, a,
-                                               auxHits,
-                                               memOptions,
-                                               salmonOpts,
-                                               coverageThresh,
-                                               localUpperBoundHits,
-                                               hitList, hitListCount,
-                                               transcripts);
         if (initialRound) {
-            upperBoundHits += localUpperBoundHits;
+            upperBoundHits += (jointHits.size() > 0);
         }
 
         // If the read mapped to > maxReadOccs places, discard it
-        if (hitList.size() > salmonOpts.maxReadOccs ) { hitList.alignments().clear(); }
-        validHits += hitList.size();
+        if (jointHits.size() > salmonOpts.maxReadOccs ) { jointHitGroup.clearAlignments(); }
+
+
+	// If we have mappings, then process them.
+	if (jointHits.size() > 0) {
+	  bool isPaired = jointHits.front().mateStatus == rapmap::utils::MateStatus::PAIRED_END_PAIRED;
+	  // If we are ignoring orphans
+	  if (!salmonOpts.allowOrphans) {
+	    // If the mappings for the current read are not properly-paired (i.e. are orphans)
+	    // then just clear the group.
+	    if (!isPaired) { jointHitGroup.clearAlignments(); }
+	  } else {
+	    // If these aren't paired-end reads --- so that
+	    // we have orphans --- make sure we sort the
+	    // mappings so that they are in transcript order
+	    if (!isPaired) {
+	      // Find the end of the hits for the left read
+	      auto leftHitEndIt = std::partition_point(
+		  jointHits.begin(), jointHits.end(),
+		  [](const QuasiAlignment& q) -> bool {
+		  return q.mateStatus == rapmap::utils::MateStatus::PAIRED_END_LEFT;
+		  });
+	      // Merge the hits so that the entire list is in order
+	      // by transcript ID.
+	      std::inplace_merge(jointHits.begin(), leftHitEndIt, jointHits.end(),
+		  [](const QuasiAlignment& a, const QuasiAlignment& b) -> bool {
+		  return a.transcriptID() < b.transcriptID();
+		  });
+	    }
+	  }
+
+	  bool needBiasSample = salmonOpts.biasCorrect;
+
+	  for (auto& h : jointHits) {
+
+	    // ---- Collect bias samples ------ //
+	    int32_t pos = static_cast<int32_t>(h.pos);
+	    auto dir = salmon::utils::boolToDirection(h.fwd);
+
+	    // If bias correction is turned on, and we haven't sampled a mapping
+	    // for this read yet, and we haven't collected the required number of
+	    // samples overall.
+	    if(needBiasSample and salmonOpts.numBiasSamples > 0){
+	      // the "start" position is the leftmost position if
+	      // we hit the forward strand, and the leftmost
+	      // position + the read length if we hit the reverse complement
+	      int32_t startPos = h.fwd ? pos : pos + h.readLen;
+
+	      auto& t = transcripts[h.tid];
+	      if (startPos > 0 and startPos < t.RefLength) {
+		const char* txpStart = t.Sequence;
+		const char* readStart = txpStart + startPos;
+		const char* txpEnd = txpStart + t.RefLength;
+		bool success = readBias.update(txpStart, readStart, txpEnd, dir);
+		if (success) {
+		  salmonOpts.numBiasSamples -= 1;
+		  needBiasSample = false;
+		}
+	      }
+	    }
+	    // ---- Collect bias samples ------ //
+
+
+	    switch (h.mateStatus) {
+	      case MateStatus::PAIRED_END_LEFT:
+		{
+		  h.format = salmon::utils::hitType(h.pos, h.fwd);
+		}
+		break;
+	      case MateStatus::PAIRED_END_RIGHT:
+		{
+		  h.format = salmon::utils::hitType(h.pos, h.fwd);
+		}
+		break;
+	      case MateStatus::PAIRED_END_PAIRED:
+		{
+		  uint32_t end1Pos = (h.fwd) ? h.pos : h.pos + h.readLen;
+		  uint32_t end2Pos = (h.mateIsFwd) ? h.matePos : h.matePos + h.mateLen;
+		  bool canDovetail = false;
+		  h.format = salmon::utils::hitType(end1Pos, h.fwd, h.readLen,
+		      end2Pos, h.mateIsFwd, h.mateLen, canDovetail);
+		}
+		break;
+	    }
+	  }
+	} // If we have no mappings --- then there's nothing to do
+
+        validHits += jointHits.size();
+        localNumAssignedFragments += (jointHits.size() > 0);
         locRead++;
         ++numObservedFragments;
-        if (numObservedFragments % 50000 == 0) {
+        if (numObservedFragments % 500000 == 0) {
     	    iomutex.lock();
             const char RESET_COLOR[] = "\x1b[0m";
             char green[] = "\x1b[30m";
@@ -1882,9 +731,9 @@ void processReadsMEM(ParserT* parser,
             red[3] = '0' + static_cast<char>(fmt::RED);
             if (initialRound) {
                 fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n", green, red, numObservedFragments, green, RESET_COLOR);
-                fmt::print(stderr, "hits per frag:  {}; hit upper bound: {}",
-                           validHits / static_cast<float>(prevObservedFrags),
-                           upperBoundHits.load());
+                fmt::print(stderr, "hits: {}, hits per frag:  {}",
+                        validHits,
+                        validHits / static_cast<float>(prevObservedFrags));
             } else {
                 fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red, numObservedFragments, green, RESET_COLOR);
             }
@@ -1894,150 +743,148 @@ void processReadsMEM(ParserT* parser,
 
     } // end for i < j->nb_filled
 
+    prevObservedFrags = numObservedFragments;
+    AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(structureVec.begin(), structureVec.begin() + rangeSize);
+    processMiniBatch<QuasiAlignment>(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
+                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
+  }
+}
 
-    // NOT DOUBLY-COLLAPSED
-    // double logForgettingMass = fmCalc();
+// SINGLE END
 
-    // BEGIN: DOUBLY-COLLAPSED TESTING
-   // double logForgettingMass{0.0};
-   // uint64_t currentMinibatchTimestep{0};
-   // fmCalc.getLogMassAndTimestep(logForgettingMass, currentMinibatchTimestep);
-    // END: DOUBLE-COLLAPSED TESTING
+// To use the parser in the following, we get "jobs" until none is
+// available. A job behaves like a pointer to the type
+// jellyfish::sequence_list (see whole_sequence_parser.hpp).
+template <typename RapMapIndexT>
+void processReadsQuasi(single_parser* parser,
+               ReadExperiment& readExp,
+               ReadLibrary& rl,
+               AlnGroupVec<QuasiAlignment>& structureVec,
+               std::atomic<uint64_t>& numObservedFragments,
+               std::atomic<uint64_t>& numAssignedFragments,
+               std::atomic<uint64_t>& validHits,
+               std::atomic<uint64_t>& upperBoundHits,
+               RapMapIndexT* qidx,
+               std::vector<Transcript>& transcripts,
+               ForgettingMassCalculator& fmCalc,
+               ClusterForest& clusterForest,
+               FragmentLengthDistribution& fragLengthDist,
+               mem_opt_t* memOptions,
+               SalmonOpts& salmonOpts,
+               double coverageThresh,
+	           std::mutex& iomutex,
+               bool initialRound,
+               std::atomic<bool>& burnedIn,
+               volatile bool& writeToCache) {
+  uint64_t count_fwd = 0, count_bwd = 0;
+  // Seed with a real random value, if available
+  std::random_device rd;
 
-    prevObservedFrags = numObservedFragments;
+  // Create a random uniform distribution
+  std::default_random_engine eng(rd());
 
-   processMiniBatch(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
-                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
-    if (writeToCache) {
+  uint64_t prevObservedFrags{1};
+  uint64_t leftHitCount{0};
+  uint64_t hitListCount{0};
 
-#if defined(__MOODYCAMEL__)
-        // Moody camel
-        if (!outputGroups.enqueue_bulk(hitLists.begin(), hitLists.size())) {
-            salmonOpts.jointLog->critical("Could not enqueue items in "
-                                          "outputGroups queue\n");
-            std::exit(1);
-        }
-#else
-        // TBB
-        for (auto hl : hitLists) { outputGroups.push(hl); }
-#endif
-    } else {
+  auto& readBias = readExp.readBias();
+  const char* txomeStr = qidx->seq.c_str();
 
-#if defined(__MOODYCAMEL__)
-        // Moody camel
-        if (!structureCache.enqueue_bulk(hitLists.begin(), hitLists.size())) {
-            salmonOpts.jointLog->critical("Could not enqueue items in "
-                                          "structureCache queue\n");
-            std::exit(1);
-        }
-#else
-        // TBB
-        for (auto hl : hitLists) { structureCache.push(hl); }
-#endif
-    }
-    // At this point, the parser can re-claim the strings
-  }
-  smem_aux_destroy(auxHits);
-  smem_itr_destroy(itr);
-}
+  auto expectedLibType = rl.format();
 
-// To use the parser in the following, we get "jobs" until none is
-// available. A job behaves like a pointer to the type
-// jellyfish::sequence_list (see whole_sequence_parser.hpp).
-void processCachedAlignmentsHelper(
-        ReadExperiment& readExp,
-        ReadLibrary& rl,
-        AlnGroupQueue& structureCache,
-        AlnGroupQueue& alignmentCache,
-        std::atomic<uint64_t>& numObservedFragments,
-        std::atomic<uint64_t>& numAssignedFragments,
-        std::atomic<uint64_t>& validHits,
-        std::vector<Transcript>& transcripts,
-        ForgettingMassCalculator& fmCalc,
-        ClusterForest& clusterForest,
-        FragmentLengthDistribution& fragLengthDist,
-        const SalmonOpts& salmonOpts,
-        std::mutex& iomutex,
-        bool initialRound,
-        volatile bool& cacheExhausted,
-        bool& burnedIn) {
 
-    // Seed with a real random value, if available
-    std::random_device rd;
+  uint64_t firstTimestepOfRound = fmCalc.getCurrentTimestep();
 
-    // Create a random uniform distribution
-    std::default_random_engine eng(rd());
+  size_t locRead{0};
+  uint64_t localUpperBoundHits{0};
+  size_t rangeSize{0};
 
-    std::vector<AlignmentGroup<SMEMAlignment>*> hitLists;
+  bool tooManyHits{false};
+  size_t readLen{0};
+  size_t maxNumHits{salmonOpts.maxReadOccs};
+  SACollector<RapMapIndexT> hitCollector(qidx);
+  SASearcher<RapMapIndexT> saSearcher(qidx);
+  rapmap::utils::HitCounters hctr;
 
-    uint64_t prevObservedFrags{1};
-    auto expectedLibType = rl.format();
+  while(true) {
+    typename single_parser::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
+    if(j.is_empty()) break;           // If got nothing, quit
 
-    uint32_t batchCount{miniBatchSize};
-    uint64_t locRead{0};
-    uint64_t locValidHits{0};
-    uint32_t numConsumed{0};
-#if defined(__MOODYCAMEL__)
-    uint32_t obtained{0};
-#else
-    bool obtained{false};
-#endif
+    rangeSize = j->nb_filled;
+    if (rangeSize > structureVec.size()) {
+        salmonOpts.jointLog->error("rangeSize = {}, but structureVec.size() = {} --- this shouldn't happen.\n"
+                                   "Please report this bug on GitHub", rangeSize, structureVec.size());
+        std::exit(1);
+    }
 
-    uint64_t firstTimestepOfRound = fmCalc.getCurrentTimestep();
-    hitLists.resize(batchCount);
-    auto it = hitLists.begin();
+    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read in this batch
+        readLen = j->data[i].seq.length();
+        tooManyHits = false;
+        localUpperBoundHits = 0;
+        auto& jointHitGroup = structureVec[i];
+        auto& jointHits = jointHitGroup.alignments();
+        jointHitGroup.clearAlignments();
 
-    while(!cacheExhausted or
-#if defined(__MOODYCAMEL__)
-    // Moody camel
-          (obtained = alignmentCache.try_dequeue_bulk(it, batchCount - numConsumed)) > 0) {
-      numConsumed += obtained;
-#else
-    // TBB
-            (obtained = alignmentCache.try_pop(*it))) {
-        numConsumed += obtained ? 1 : 0;
-#endif
+        bool lh = hitCollector(j->data[i].seq,
+                               jointHits, saSearcher,
+                               MateStatus::SINGLE_END,
+                               true);
 
-        /** Get alignment groups from the queue while they still exist
-         * once the cacheExhausted variable is true, there will be
-         * no more alignment groups written in this round.  If cacheExhausted
-         * is true and the alignment cache is empty, then there are no
-         * more alignments to process (am I certain about this in concurrent
-         * crazy multi-threaded land?).
-         */
-        while (numConsumed < batchCount) {
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            it += obtained;
-            obtained = alignmentCache.try_dequeue_bulk(it, batchCount - numConsumed);
-            numConsumed += obtained;
-            if (cacheExhausted and obtained == 0) {
-#else
-            // TBB
-            it += obtained ? 1 : 0;
-            obtained = alignmentCache.try_pop(*it);
-            numConsumed += obtained ? 1 : 0;
-            if (cacheExhausted and !obtained) {
-#endif
-                break;
-            }
+        if (initialRound) {
+            upperBoundHits += (jointHits.size() > 0);
         }
-        // At this point, we either have the requested # of alignemnts, or
-        // have exhausted the alignment queue.
 
-        hitLists.resize(numConsumed);
-        for (auto hitList : hitLists) {
-            locValidHits += hitList->size();
+        // If the read mapped to > maxReadOccs places, discard it
+        if (jointHits.size() > salmonOpts.maxReadOccs ) { jointHitGroup.clearAlignments(); }
+
+	bool needBiasSample = salmonOpts.biasCorrect;
+
+        for (auto& h : jointHits) {
+
+	    // ---- Collect bias samples ------ //
+	    int32_t pos = static_cast<int32_t>(h.pos);
+	    auto dir = salmon::utils::boolToDirection(h.fwd);
+
+	    // If bias correction is turned on, and we haven't sampled a mapping
+	    // for this read yet, and we haven't collected the required number of
+	    // samples overall.
+	    if(needBiasSample and salmonOpts.numBiasSamples > 0){
+	      // the "start" position is the leftmost position if
+	      // we hit the forward strand, and the leftmost
+	      // position + the read length if we hit the reverse complement
+	      int32_t startPos = h.fwd ? pos : pos + h.readLen;
+
+
+	      auto& t = transcripts[h.tid];
+	      if (startPos > 0 and startPos < t.RefLength) {
+		const char* txpStart = t.Sequence;
+		const char* readStart = txpStart + startPos;
+		const char* txpEnd = txpStart + t.RefLength;
+		bool success = readBias.update(txpStart, readStart, txpEnd, dir);
+		if (success) {
+		  salmonOpts.numBiasSamples -= 1;
+		  needBiasSample = false;
+		}
+	      }
+	    }
+	    // ---- Collect bias samples ------ //
+
+
+
+            switch (h.mateStatus) {
+                case MateStatus::SINGLE_END:
+                    {
+                        h.format = salmon::utils::hitType(h.pos, h.fwd);
+                    }
+                    break;
+            }
         }
-        validHits += locValidHits;
-        locRead += numConsumed;
-
-        uint32_t updateRate = 500000;
-        uint64_t prevMod = numObservedFragments % updateRate;
-        numObservedFragments += numConsumed;
-        uint64_t newMod = numObservedFragments % updateRate;
-        if (newMod < prevMod) {
-            iomutex.lock();
+
+        validHits += jointHits.size();
+        locRead++;
+        ++numObservedFragments;
+        if (numObservedFragments % 500000 == 0) {
+    	    iomutex.lock();
             const char RESET_COLOR[] = "\x1b[0m";
             char green[] = "\x1b[30m";
             green[3] = '0' + static_cast<char>(fmt::GREEN);
@@ -2045,115 +892,49 @@ void processCachedAlignmentsHelper(
             red[3] = '0' + static_cast<char>(fmt::RED);
             if (initialRound) {
                 fmt::print(stderr, "\033[A\r\r{}processed{} {} {}fragments{}\n", green, red, numObservedFragments, green, RESET_COLOR);
-                fmt::print(stderr, "hits per frag:  {}", validHits / static_cast<float>(prevObservedFrags));
+                fmt::print(stderr, "hits: {}; hits per frag:  {}",
+                        validHits,
+                        validHits / static_cast<float>(prevObservedFrags));
             } else {
                 fmt::print(stderr, "\r\r{}processed{} {} {}fragments{}", green, red, numObservedFragments, green, RESET_COLOR);
             }
-            salmonOpts.fileLog->info("processed {} fragments\n", numObservedFragments);
-            iomutex.unlock();
+    	    iomutex.unlock();
         }
 
-        // NOT DOUBLY-COLLAPSED
-        // double logForgettingMass = fmCalc();
 
-        processMiniBatch(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
-                fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
-#if defined(__MOODYCAMEL__)
-        if (!structureCache.enqueue_bulk(std::make_move_iterator(hitLists.begin()), hitLists.size())) {
-            salmonOpts.jointLog->error("Could not enqueue structures in "
-                                       "structureCache; exiting\n\n");
-            std::exit(1);
-        }
-#else
-        // TBB
-        for (auto& hl : hitLists) { structureCache.push(hl); }
-#endif
-        numConsumed = 0;
-        obtained = 0;
-        hitLists.clear();
-        hitLists.resize(batchCount);
-        it = hitLists.begin();
-        // At this point, the parser can re-claim the strings
-    }
+    } // end for i < j->nb_filled
 
+    prevObservedFrags = numObservedFragments;
+    AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(structureVec.begin(), structureVec.begin() + rangeSize);
+    processMiniBatch<QuasiAlignment>(readExp, fmCalc,firstTimestepOfRound, rl, salmonOpts, hitLists, transcripts, clusterForest,
+                     fragLengthDist, numAssignedFragments, eng, initialRound, burnedIn);
+  }
 }
 
+/// DONE QUASI
 
 
-
-int performBiasCorrection(boost::filesystem::path featPath,
-                          boost::filesystem::path expPath,
-                          double estimatedReadLength,
-                          double kmersPerRead,
-                          uint64_t mappedKmers,
-                          uint32_t merLen,
-                          boost::filesystem::path outPath,
-                          size_t numThreads);
-
-void processCachedAlignments(
-        ReadExperiment& readExp,
-        ReadLibrary& rl,
-        AlnGroupQueue& structureCache,
-        AlnGroupQueue& alignmentCache,
-        std::atomic<uint64_t>& numObservedFragments,
-        std::atomic<uint64_t>& numAssignedFragments,
-        std::vector<Transcript>& transcripts,
-        ForgettingMassCalculator& fmCalc,
-        ClusterForest& clusterForest,
-        FragmentLengthDistribution& fragLengthDist,
-        const SalmonOpts& salmonOpts,
-        std::mutex& ioMutex,
-        bool initialRound,
-        volatile bool& cacheExhausted,
-        bool& burnedIn,
-        size_t numQuantThreads) {
-
-        std::atomic<uint64_t> numValidHits{0};
-        std::vector<std::thread> quantThreads;
-        for (size_t i = 0; i < numQuantThreads; ++i) {
-                quantThreads.emplace_back(processCachedAlignmentsHelper,
-                        std::ref(readExp),
-                        std::ref(rl),
-                        std::ref(structureCache),
-                        std::ref(alignmentCache),
-                        std::ref(numObservedFragments),
-                        std::ref(numAssignedFragments),
-                        std::ref(numValidHits),
-                        std::ref(transcripts),
-                        std::ref(fmCalc),
-                        std::ref(clusterForest),
-                        std::ref(fragLengthDist),
-                        std::ref(salmonOpts),
-                        std::ref(ioMutex),
-                        initialRound,
-                        std::ref(cacheExhausted),
-                        std::ref(burnedIn));
-
-        }
-        for (auto& t : quantThreads) { t.join(); }
-}
-
+template <typename AlnT>
 void processReadLibrary(
         ReadExperiment& readExp,
         ReadLibrary& rl,
-        bwaidx_t* idx,
+        SalmonIndex* sidx,
         std::vector<Transcript>& transcripts,
         ClusterForest& clusterForest,
         std::atomic<uint64_t>& numObservedFragments, // total number of reads we've looked at
         std::atomic<uint64_t>& numAssignedFragments, // total number of assigned reads
         std::atomic<uint64_t>& upperBoundHits, // upper bound on # of mapped frags
         bool initialRound,
-        bool& burnedIn,
+        std::atomic<bool>& burnedIn,
         ForgettingMassCalculator& fmCalc,
         FragmentLengthDistribution& fragLengthDist,
         mem_opt_t* memOptions,
-        const SalmonOpts& salmonOpts,
+        SalmonOpts& salmonOpts,
         double coverageThresh,
         bool greedyChain,
         std::mutex& iomutex,
         size_t numThreads,
-        AlnGroupQueue& structureCache,
-        AlnGroupQueue& outputGroups,
+        std::vector<AlnGroupVec<AlnT>>& structureVec,
         volatile bool& writeToCache){
 
             std::vector<std::thread> threads;
@@ -2161,78 +942,131 @@ void processReadLibrary(
             std::atomic<uint64_t> numValidHits{0};
             rl.checkValid();
 
+            auto indexType = sidx->indexType();
+
             std::unique_ptr<paired_parser> pairedParserPtr{nullptr};
             std::unique_ptr<single_parser> singleParserPtr{nullptr};
             // If the read library is paired-end
             // ------ Paired-end --------
             if (rl.format().type == ReadType::PAIRED_END) {
 
-                char* readFiles[] = { const_cast<char*>(rl.mates1().front().c_str()),
-                    const_cast<char*>(rl.mates2().front().c_str()) };
-
-                size_t maxReadGroup{miniBatchSize}; // Number of reads in each "job"
-                size_t concurrentFile{2}; // Number of files to read simultaneously
-                pairedParserPtr.reset(new
-                        paired_parser(4 * numThreads, maxReadGroup,
-                                      concurrentFile, readFiles, readFiles + 2));
-
-
-                for(int i = 0; i < numThreads; ++i)  {
-                    if (greedyChain) {
-                        auto threadFun = [&]() -> void {
-                                    processReadsMEM<paired_parser, TranscriptHitList>(
-                                    pairedParserPtr.get(),
-                                    readExp,
-                                    rl,
-                                    structureCache,
-                                    outputGroups,
-                                    numObservedFragments,
-                                    numAssignedFragments,
-                                    numValidHits,
-                                    upperBoundHits,
-                                    idx,
-                                    transcripts,
-                                    fmCalc,
-                                    clusterForest,
-                                    fragLengthDist,
-                                    memOptions,
-                                    salmonOpts,
-                                    coverageThresh,
-                                    iomutex,
-                                    initialRound,
-                                    burnedIn,
-                                    writeToCache);
-                        };
-                        threads.emplace_back(threadFun);
-                    } else {
-                        /*
-                        auto threadFun = [&]() -> void {
-                                    processReadsMEM<paired_parser, FragmentList>(
-                                    &parser,
-                                    rl,
-                                    numObservedFragments,
-                                    numAssignedFragments,
-                                    idx,
-                                    transcripts,
-                                    batchNum,
-                                    logForgettingMass,
-                                    ffMutex,
-                                    clusterForest,
-                                    fragLengthDist,
-                                    memOptions,
-                                    salmonOpts,
-                                    coverageThresh,
-                                    iomutex,
-                                    initialRound,
-                                    burnedIn);
-                        };
-                        threads.emplace_back(threadFun);
-                        */
-                    }
-                }
 
-                for(int i = 0; i < numThreads; ++i)
-                    threads[i].join();
+		    if (rl.mates1().size() != rl.mates2().size()) {
+			    salmonOpts.jointLog->error("The number of provided files for "
+					    "-1 and -2 must be the same!");
+			    std::exit(1);
+		    }
+
+		    size_t numFiles = rl.mates1().size() + rl.mates2().size();
+		    char** pairFileList = new char*[numFiles];
+		    for (size_t i = 0; i < rl.mates1().size(); ++i) {
+			    pairFileList[2*i] = const_cast<char*>(rl.mates1()[i].c_str());
+			    pairFileList[2*i+1] = const_cast<char*>(rl.mates2()[i].c_str());
+		    }
+
+		    size_t maxReadGroup{miniBatchSize}; // Number of reads in each "job"
+		    size_t concurrentFile{2}; // Number of files to read simultaneously
+		    pairedParserPtr.reset(new
+				    paired_parser(4 * numThreads, maxReadGroup,
+					    concurrentFile, pairFileList, pairFileList+numFiles));
+
+		    switch (indexType) {
+			case SalmonIndexType::FMD:
+			    {
+				for(int i = 0; i < numThreads; ++i)  {
+				    // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+				    // change value before the lambda below is evaluated --- crazy!
+				    auto threadFun = [&,i]() -> void {
+					processReadsMEM<paired_parser, TranscriptHitList>(
+						pairedParserPtr.get(),
+						readExp,
+						rl,
+						structureVec[i],
+						numObservedFragments,
+						numAssignedFragments,
+						numValidHits,
+						upperBoundHits,
+						sidx,
+						transcripts,
+						fmCalc,
+						clusterForest,
+						fragLengthDist,
+						memOptions,
+						salmonOpts,
+						coverageThresh,
+						iomutex,
+						initialRound,
+						burnedIn,
+						writeToCache);
+				    };
+				    threads.emplace_back(threadFun);
+				}
+				break;
+				case SalmonIndexType::QUASI:
+				{
+            // True if we have a 64-bit SA index, false otherwise
+            bool largeIndex = sidx->is64BitQuasi();
+				    for(int i = 0; i < numThreads; ++i)  {
+					// NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+					// change value before the lambda below is evaluated --- crazy!
+          if (largeIndex) {
+            auto threadFun = [&,i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int64_t>>(
+                pairedParserPtr.get(),
+                readExp,
+                rl,
+                structureVec[i],
+                numObservedFragments,
+                numAssignedFragments,
+                numValidHits,
+                upperBoundHits,
+                sidx->quasiIndex64(),
+                transcripts,
+                fmCalc,
+                clusterForest,
+                fragLengthDist,
+                memOptions,
+                salmonOpts,
+                coverageThresh,
+                iomutex,
+                initialRound,
+                burnedIn,
+                writeToCache);
+              };
+              threads.emplace_back(threadFun);
+            } else {
+              auto threadFun = [&,i]() -> void {
+              processReadsQuasi<RapMapSAIndex<int32_t>>(
+                pairedParserPtr.get(),
+                readExp,
+                rl,
+                structureVec[i],
+                numObservedFragments,
+                numAssignedFragments,
+                numValidHits,
+                upperBoundHits,
+                sidx->quasiIndex32(),
+                transcripts,
+                fmCalc,
+                clusterForest,
+                fragLengthDist,
+                memOptions,
+                salmonOpts,
+                coverageThresh,
+                iomutex,
+                initialRound,
+                burnedIn,
+                writeToCache);
+              };
+              threads.emplace_back(threadFun);
+            }
+
+				    }
+				}
+				break;
+			    } // end switch
+		    }
+		    for(int i = 0; i < numThreads; ++i) { threads[i].join(); }
 
             } // ------ Single-end --------
             else if (rl.format().type == ReadType::SINGLE_END) {
@@ -2248,323 +1082,107 @@ void processReadLibrary(
                                       concurrentFile,
                                       streams));
 
-                for(int i = 0; i < numThreads; ++i)  {
-                    if (greedyChain) {
-                        auto threadFun = [&]() -> void {
+                switch (indexType) {
+                    case SalmonIndexType::FMD:
+                        {
+                            for(int i = 0; i < numThreads; ++i)  {
+                                // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+                                // change value before the lambda below is evaluated --- crazy!
+                                auto threadFun = [&,i]() -> void {
                                     processReadsMEM<single_parser, TranscriptHitList>(
-                                    singleParserPtr.get(),
-                                    readExp,
-                                    rl,
-                                    structureCache,
-                                    outputGroups,
-                                    numObservedFragments,
-                                    numAssignedFragments,
-                                    numValidHits,
-                                    upperBoundHits,
-                                    idx,
-                                    transcripts,
-                                    fmCalc,
-                                    clusterForest,
-                                    fragLengthDist,
-                                    memOptions,
-                                    salmonOpts,
-                                    coverageThresh,
-                                    iomutex,
-                                    initialRound,
-                                    burnedIn,
-                                    writeToCache);
-                        };
-                        threads.emplace_back(threadFun);
-                    } else {
-                        /*
-                        auto threadFun = [&]() -> void {
-                                    processReadsMEM<single_parser, FragmentList>( &parser,
-                                    rl,
-                                    numObservedFragments,
-                                    numAssignedFragments,
-                                    idx,
-                                    transcripts,
-                                    batchNum,
-                                    logForgettingMass,
-                                    ffMutex,
-                                    clusterForest,
-                                    fragLengthDist,
-                                    coverageThresh,
-                                    iomutex,
-                                    initialRound,
-                                    burnedIn);
-                        };
-                        threads.emplace_back(threadFun);
-                        */
-                    }
-                }
-                for(int i = 0; i < numThreads; ++i)
-                    threads[i].join();
-            } // ------ END Single-end --------
-}
-
-bool writeAlignmentCacheToFile(
-        AlnGroupQueue& outputGroups,
-        AlnGroupQueue& structureCache,
-        uint64_t& numWritten,
-        std::atomic<uint64_t>& numObservedFragments,
-        uint64_t numRequiredFragments,
-        SalmonOpts& salmonOpts,
-        volatile bool& writeToCache,
-        cereal::BinaryOutputArchive& outputStream ) {
-
-        size_t blockSize{miniBatchSize};
-        size_t numDequed{0};
-        bool cacheUniqueReads = !salmonOpts.useMassBanking;
-#if defined(__MOODYCAMEL__)
-        // Moody camel
-        AlignmentGroup<SMEMAlignment>* alnGroups[blockSize];
-#else
-        // TBB
-        AlignmentGroup<SMEMAlignment>* alnGroups[1];
-#endif
-
-        while (writeToCache) {
-#if defined(__MOODYCAMEL__)
-            // MOODY CAMEL QUEUE
-            while ( (numDequed = outputGroups.try_dequeue_bulk(alnGroups, blockSize)) > 0) {
-                for (size_t i = 0; i < numDequed; ++i) {
-                    // only write ambigously mapped fragments to the cache
-                    // for processing in subsequent rounds
-                    if (cacheUniqueReads or !alnGroups[i]->isUniquelyMapped()) {
-                        outputStream((*alnGroups[i]));
-                        ++numWritten;
-                    }
-                }
-
-                structureCache.enqueue_bulk(alnGroups, numDequed);
-
-                // If, at any point, we've seen the required number of
-                // fragments, then we don't need the cache any longer.
-                if (numObservedFragments > numRequiredFragments) {
-                    writeToCache = false;
-                }
-            }
-#else
-            // TBB QUEUE
-            while (outputGroups.try_pop(alnGroups[0])) {
-                if (cacheUniqueReads or !alnGroups[0]->isUniquelyMapped()) {
-                    outputStream(*alnGroups[0]);
-                    ++numWritten;
-                }
-
-                structureCache.push(alnGroups[0]);
+                                            singleParserPtr.get(),
+                                            readExp,
+                                            rl,
+                                            structureVec[i],
+                                            numObservedFragments,
+                                            numAssignedFragments,
+                                            numValidHits,
+                                            upperBoundHits,
+                                            sidx,
+                                            transcripts,
+                                            fmCalc,
+                                            clusterForest,
+                                            fragLengthDist,
+                                            memOptions,
+                                            salmonOpts,
+                                            coverageThresh,
+                                            iomutex,
+                                            initialRound,
+                                            burnedIn,
+                                            writeToCache);
+                                };
+                                threads.emplace_back(threadFun);
+                            }
+                        }
+                        break;
+
+                    case SalmonIndexType::QUASI:
+                    {
+                      // True if we have a 64-bit SA index, false otherwise
+                      bool largeIndex = sidx->is64BitQuasi();
+                      for(int i = 0; i < numThreads; ++i)  {
+                        // NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
+                        // change value before the lambda below is evaluated --- crazy!
+                        if (largeIndex) {
+                          auto threadFun = [&,i]() -> void {
+                            processReadsQuasi<RapMapSAIndex<int64_t>>(
+                              singleParserPtr.get(),
+                              readExp,
+                              rl,
+                              structureVec[i],
+                              numObservedFragments,
+                              numAssignedFragments,
+                              numValidHits,
+                              upperBoundHits,
+                              sidx->quasiIndex64(),
+                              transcripts,
+                              fmCalc,
+                              clusterForest,
+                              fragLengthDist,
+                              memOptions,
+                              salmonOpts,
+                              coverageThresh,
+                              iomutex,
+                              initialRound,
+                              burnedIn,
+                              writeToCache);
+                            };
+                            threads.emplace_back(threadFun);
+                          } else {
+                            auto threadFun = [&,i]() -> void {
+                              processReadsQuasi<RapMapSAIndex<int32_t>>(
+                                singleParserPtr.get(),
+                                readExp,
+                                rl,
+                                structureVec[i],
+                                numObservedFragments,
+                                numAssignedFragments,
+                                numValidHits,
+                                upperBoundHits,
+                                sidx->quasiIndex32(),
+                                transcripts,
+                                fmCalc,
+                                clusterForest,
+                                fragLengthDist,
+                                memOptions,
+                                salmonOpts,
+                                coverageThresh,
+                                iomutex,
+                                initialRound,
+                                burnedIn,
+                                writeToCache);
+                              };
+                              threads.emplace_back(threadFun);
+                            }
 
-                // If, at any point, we've seen the required number of
-                // fragments, then we don't need the cache any longer.
-                if (numObservedFragments > numRequiredFragments) {
-                    writeToCache = false;
+                        }
+                      }
+                      break;
                 }
-            }
-#endif
-        }
-
-#if defined(__MOODYCAMEL__)
-        // Moody camel
-        while (outputGroups.try_dequeue(alnGroups[0])) {
-            if (cacheUniqueReads or !alnGroups[0]->isUniquelyMapped()) {
-                outputStream((*alnGroups[0]));
-                ++numWritten;
-            }
-            structureCache.enqueue(alnGroups[0]);
-        }
-#else
-        // TBB
-        while (outputGroups.try_pop(alnGroups[0])) {
-            outputStream((*alnGroups[0]));
-            ++numWritten;
-            structureCache.push(alnGroups[0]);
-        }
-#endif
-        return true;
-}
-
-bool readAlignmentCache(
-        AlnGroupQueue& alnGroupQueue,
-        AlnGroupQueue& structureCache,
-        uint64_t numWritten,
-        volatile bool& finishedParsing,
-        boost::filesystem::path& cacheFilePath) {
-
-        std::ifstream alnCacheFile(cacheFilePath.c_str(), std::ios::binary);
-        cereal::BinaryInputArchive alnCacheArchive(alnCacheFile);
-
-        uint64_t numRead{0};
-        AlignmentGroup<SMEMAlignment>* alnGroup;
-        while (numRead < numWritten) {
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            while (!structureCache.try_dequeue(alnGroup)) {}
-#else
-            // TBB
-            while (!structureCache.try_pop(alnGroup)) {}
-#endif
-
-            alnCacheArchive((*alnGroup));
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            alnGroupQueue.enqueue(alnGroup);
-#else
-            // TBB
-            alnGroupQueue.push(alnGroup);
-#endif
-            ++numRead;
-        }
-        finishedParsing = true;
-
-        alnCacheFile.close();
-        return true;
+                for(int i = 0; i < numThreads; ++i) { threads[i].join(); }
+            } // ------ END Single-end --------
 }
 
-struct CacheFile {
-    CacheFile(boost::filesystem::path& pathIn, uint64_t numWrittenIn) :
-        filePath(pathIn), numWritten(numWrittenIn), inMemory(false){}
-
-    bool populateCache(volatile bool& finishedParsing,
-                       uint32_t buffQueueSize) {
-
-        if (inMemory) {
-            // If the queue already exists, then just
-            // swap the processed and unprocessed structs
-            // and return
-             if (toProcess) {
-                std::swap(toProcess, processed);
-                finishedParsing = true;
-                return true;
-             } else {
-                // otherwise, create the queues and fill them as we
-                // normally would (i.e. if we weren't holding every thing
-                // in memory).
-#if defined(__MOODYCAMEL__)
-                 // Moody camel
-                toProcess.reset(new AlnGroupQueue(numWritten));
-                initCache.reset(new AlnGroupQueue(numWritten));
-#else
-                 // TBB
-                toProcess.reset(new AlnGroupQueue);
-                initCache.reset(new AlnGroupQueue);
-#endif
-                for (size_t i = 0; i < numWritten; ++i) {
-#if defined(__MOODYCAMEL__)
-                    // Moody camel
-                    initCache->enqueue( new AlignmentGroup<SMEMAlignment>() );
-#else
-                    // TBB
-                    initCache->push( new AlignmentGroup<SMEMAlignment>() );
-#endif
-                }
-                processed.reset(new AlnGroupQueue);
-             }
-        } else {
-            // If we won't be keeping everything in memory
-            // determine whether or not we need to create "working space"
-            // queues (this is only necessary the first time).
-            if (!toProcess) {
-#if defined(__MOODYCAMEL__)
-                // Moody camel
-                toProcess.reset(new AlnGroupQueue(buffQueueSize));
-                processed.reset(new AlnGroupQueue(buffQueueSize));
-#else
-                // TBB
-                toProcess.reset(new AlnGroupQueue);
-                processed.reset(new AlnGroupQueue);
-#endif
-                for (size_t i = 0; i < buffQueueSize; ++i) {
-#if defined(__MOODYCAMEL__)
-                    // Moody camel
-                    processed->enqueue( new AlignmentGroup<SMEMAlignment>() );
-#else
-                    // TBB
-                    processed->push( new AlignmentGroup<SMEMAlignment>() );
-#endif
-                }
-            }
-        }
-
-        // At this point, the queues exist, and we're either reading the
-        // information from file and using the queue as a buffer, or we're
-        // making our first "in memory" pass and we have to fill the buffers
-        // anyway
-        if (inMemory) {
-            cacheReaderThread_.reset(new std::thread (readAlignmentCache,
-                        std::ref(*toProcess),
-                        std::ref(*initCache),
-                        numWritten,
-                        std::ref(finishedParsing),
-                        std::ref(filePath)));
-        } else {
-            cacheReaderThread_.reset(new std::thread (readAlignmentCache,
-                        std::ref(*toProcess),
-                        std::ref(*processed),
-                        numWritten,
-                        std::ref(finishedParsing),
-                        std::ref(filePath)));
-        }
-        return true;
-    }
-
-    bool flushCache() {
-        if (cacheReaderThread_) {
-            cacheReaderThread_->join();
-            cacheReaderThread_.reset(nullptr);
-        }
-        return true;
-    }
-
-    void clearQueues() {
-        AlignmentGroup<SMEMAlignment>* ag;
-
-        if (toProcess) {
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            while (toProcess->try_dequeue(ag)) { delete ag; }
-#else
-            // TBB
-            while (toProcess->try_pop(ag)) { delete ag; }
-#endif
-        }
-        if (processed) {
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            while (processed->try_dequeue(ag)) { delete ag; }
-#else
-            // TBB
-            while (processed->try_pop(ag)) { delete ag; }
-#endif
-        }
-        if (initCache) {
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            while (initCache->try_dequeue(ag)) { delete ag; }
-#else
-            // TBB
-            while (initCache->try_pop(ag)) { delete ag; }
-#endif
-        }
-    }
-
-
-    boost::filesystem::path filePath;
-    uint64_t numWritten{0};
-    bool inMemory;
-
-    std::unique_ptr<AlnGroupQueue> toProcess{nullptr};
-    std::unique_ptr<AlnGroupQueue> processed{nullptr};
-    std::unique_ptr<AlnGroupQueue> initCache{nullptr};
-
-    // If the file is small enough, we'll make the mapping cache reside "in memory"
-    // that's what this guy is for.
-    // std::vector<char> inMemoryMappingCache;
-
-    private:
-        // The thread that will read the mapping cache
-        std::unique_ptr<std::thread> cacheReaderThread_{nullptr};
-};
 
 /**
   *  Quantify the targets given in the file `transcriptFile` using the
@@ -2573,6 +1191,7 @@ struct CacheFile {
   *  specified by `libFmt`.
   *
   */
+template <typename AlnT>
 void quantifyLibrary(
         ReadExperiment& experiment,
         bool greedyChain,
@@ -2607,7 +1226,6 @@ void quantifyLibrary(
     std::mutex ioMutex;
 
     size_t numPrevObservedFragments = 0;
-    std::vector<CacheFile> cacheFiles;
 
     size_t maxReadGroup{miniBatchSize};
     uint32_t structCacheSize = numQuantThreads * maxReadGroup * 10;
@@ -2641,148 +1259,52 @@ void quantifyLibrary(
                 break;
             }
 
-            if (numObservedFragments - numPrevObservedFragments <= salmonOpts.mappingCacheMemoryLimit
-                and roundNum < 2) {
-                for (auto& cf : cacheFiles) { cf.inMemory = true; }
-            }
             numPrevObservedFragments = numObservedFragments;
         }
 
-        if (initialRound or salmonOpts.disableMappingCache) {
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            AlnGroupQueue outputGroups(structCacheSize);
-            AlnGroupQueue groupCache(structCacheSize);
-#else
-            // TBB
-            AlnGroupQueue outputGroups;
-            AlnGroupQueue groupCache;
-#endif
-
-            for (size_t i = 0; i < structCacheSize; ++i) {
- #if defined(__MOODYCAMEL__)
-                // Moody camel
-                groupCache.enqueue( new AlignmentGroup<SMEMAlignment>() );
-#else
-                // TBB
-                groupCache.push( new AlignmentGroup<SMEMAlignment>() );
-#endif
-            }
+        // This structure is a vector of vectors of alignment
+        // groups.  Each thread will get its own vector, so we
+        // allocate these up front to save time and allow
+        // reuse.
+        std::vector<AlnGroupVec<AlnT>> groupVec;
+        for (size_t i = 0; i < numQuantThreads; ++i) {
+            groupVec.emplace_back(maxReadGroup);
+        }
 
-            volatile bool writeToCache = !salmonOpts.disableMappingCache;
-            auto processReadLibraryCallback =  [&](
-                    ReadLibrary& rl, bwaidx_t* idx,
-                    std::vector<Transcript>& transcripts, ClusterForest& clusterForest,
-                    FragmentLengthDistribution& fragLengthDist,
-                    std::atomic<uint64_t>& numAssignedFragments,
-                    size_t numQuantThreads, bool& burnedIn) -> void  {
-
-                // The file where the alignment cache was / will be written
-                fmt::MemoryWriter fname;
-                fname << "alnCache_" << cacheFiles.size() << ".bin";
-                boost::filesystem::path alnCacheFilename = salmonOpts.outputDirectory / fname.str();
-                cacheFiles.emplace_back(alnCacheFilename, uint64_t(0));
-
-                std::unique_ptr<std::ofstream> alnCacheFile{nullptr};
-                std::unique_ptr<std::thread> cacheWriterThread{nullptr};
-                if (writeToCache) {
-                    alnCacheFile.reset(new std::ofstream(alnCacheFilename.c_str(), std::ios::binary));
-                    cereal::BinaryOutputArchive alnCacheArchive(*alnCacheFile);
-                    cacheWriterThread.reset(new std::thread(writeAlignmentCacheToFile,
-                        std::ref(outputGroups),
-                        std::ref(groupCache),
-                        std::ref(cacheFiles.back().numWritten),
-                        std::ref(numObservedFragments),
-                        numRequiredFragments,
-                        std::ref(salmonOpts),
-                        std::ref(writeToCache),
-                        std::ref(alnCacheArchive)));
-                }
 
-                processReadLibrary(experiment, rl, idx, transcripts, clusterForest,
-                        numObservedFragments, totalAssignedFragments, upperBoundHits,
-                        initialRound, burnedIn, fmCalc, fragLengthDist,
-                        memOptions, salmonOpts, coverageThresh, greedyChain,
-                        ioMutex, numQuantThreads,
-                        groupCache, outputGroups, writeToCache);
+        bool writeToCache = !salmonOpts.disableMappingCache;
+        auto processReadLibraryCallback =  [&](
+                ReadLibrary& rl, SalmonIndex* sidx,
+                std::vector<Transcript>& transcripts, ClusterForest& clusterForest,
+                FragmentLengthDistribution& fragLengthDist,
+                std::atomic<uint64_t>& numAssignedFragments,
+                size_t numQuantThreads, std::atomic<bool>& burnedIn) -> void  {
 
-                numAssignedFragments = totalAssignedFragments - prevNumAssignedFragments;
-                prevNumAssignedFragments = totalAssignedFragments;
+            processReadLibrary<AlnT>(experiment, rl, sidx, transcripts, clusterForest,
+                    numObservedFragments, totalAssignedFragments, upperBoundHits,
+                    initialRound, burnedIn, fmCalc, fragLengthDist,
+                    memOptions, salmonOpts, coverageThresh, greedyChain,
+                    ioMutex, numQuantThreads,
+                    groupVec, writeToCache);
 
-                // join the thread the writes the file
-                writeToCache = false;
-                if (cacheWriterThread) { cacheWriterThread->join(); }
-                if (alnCacheFile) { alnCacheFile->close(); }
-            };
+            numAssignedFragments = totalAssignedFragments - prevNumAssignedFragments;
+            prevNumAssignedFragments = totalAssignedFragments;
+        };
 
-            // Process all of the reads
-            experiment.processReads(numQuantThreads, processReadLibraryCallback);
-            experiment.setNumObservedFragments(numObservedFragments);
+        // Process all of the reads
+        fmt::print(stderr, "\n\n\n\n");
+        experiment.processReads(numQuantThreads, salmonOpts, processReadLibraryCallback);
+        experiment.setNumObservedFragments(numObservedFragments);
 
-            // Empty the structure cache here
-            AlignmentGroup<SMEMAlignment>* ag;
-#if defined(__MOODYCAMEL__)
-            // Moody camel
-            while (groupCache.try_dequeue(ag)) { delete ag; }
-#else
-            // TBB
-            while (groupCache.try_pop(ag)) { delete ag; }
-#endif
-            //EQCLASS
-            bool done = experiment.equivalenceClassBuilder().finish();
-            // skip the extra online rounds
-            terminate = true;
-            // END EQCLASS
-        } else {
-            uint32_t libNum{0};
-            auto processReadLibraryCallback =  [&](
-                    ReadLibrary& rl, bwaidx_t* idx,
-                    std::vector<Transcript>& transcripts, ClusterForest& clusterForest,
-                    FragmentLengthDistribution& fragLengthDist,
-                    std::atomic<uint64_t>& numAssignedFragments,
-                    size_t numQuantThreads, bool& burnedIn) -> void  {
-
-                volatile bool finishedParsing{false};
-
-                // The file where the alignment cache was / will be written
-                auto& cf = cacheFiles[libNum];
-                ++libNum;
-
-                cf.populateCache(finishedParsing, structCacheSize);
-
-                uint64_t priorTotalAssignedFragments = totalAssignedFragments;
-                uint64_t priorTotalObservedFragments = numObservedFragments;
-                processCachedAlignments(
-                        experiment,
-                        rl,
-                        //groupCache, alnGroupQueue,
-                        *(cf.processed.get()),
-                        *(cf.toProcess.get()),
-                        numObservedFragments, totalAssignedFragments,
-                        transcripts, fmCalc, clusterForest, fragLengthDist,
-                        salmonOpts, ioMutex, initialRound, finishedParsing,
-                        burnedIn, numQuantThreads);
-
-                cf.flushCache();
-
-               if (salmonOpts.useMassBanking) {
-                   // If we're using mass banking
-                   // Regardless of what we count, we see the same total number
-                   // of fragments we did in the first round
-                   totalAssignedFragments = priorTotalAssignedFragments + experiment.numAssignedFragsInFirstPass();
-                   numObservedFragments = priorTotalObservedFragments + experiment.numAssignedFragsInFirstPass();
-               }
-                // Before mass banking
-                numAssignedFragments = totalAssignedFragments - prevNumAssignedFragments;
-                prevNumAssignedFragments = totalAssignedFragments;
-            };
-
-            // Process all of the reads
-            experiment.processReads(numQuantThreads, processReadLibraryCallback);
-        }
+        //EQCLASS
+        bool done = experiment.equivalenceClassBuilder().finish();
+        // skip the extra online rounds
+        terminate = true;
 
         initialRound = false;
         ++roundNum;
+        fmt::print(stderr, "\n\n\n\n");
+        /*
         fmt::print(stderr, "\n# observed = {} / # required = {}\n",
                    numObservedFragments, numRequiredFragments);
         fmt::print(stderr, "hard # assigned = {} / # observed (this round) = {} : "
@@ -2790,6 +1312,7 @@ void quantifyLibrary(
                    experiment.numAssignedFragments(),
                    numObservedFragments - numPrevObservedFragments,
                    upperBoundHits);
+        */
         salmonOpts.fileLog->info("\nAt end of round {}\n"
                                    "==================\n"
                                    "Observed {} total fragments ({} in most recent round)\n",
@@ -2799,14 +1322,25 @@ void quantifyLibrary(
     }
     fmt::print(stderr, "\n\n\n\n");
 
-    // delete any temporary alignment cache files
-    for (auto& cf : cacheFiles) {
-        if (boost::filesystem::exists(cf.filePath)) {
-            boost::filesystem::remove(cf.filePath);
-        }
-        // TODO: clear any queues allocated by
-        // the cache files.
-        cf.clearQueues();
+    // If we didn't achieve burnin, then at least compute effective
+    // lengths and mention this to the user.
+    if (totalAssignedFragments < salmonOpts.numBurninFrags) {
+        std::atomic<bool> dummyBool{false};
+        experiment.updateTranscriptLengthsAtomic(dummyBool);
+
+        jointLog->warn("Only {} fragments were mapped, but the number of burn-in fragments was set to {}.\n"
+                "The effective lengths have been computed using the observed mappings.\n",
+                totalAssignedFragments, salmonOpts.numBurninFrags);
+
+	// If we didn't have a sufficient number of samples for burnin,
+	// then also ignore modeling of the fragment start position
+	// distribution.
+	if (salmonOpts.useFSPD) {
+	  salmonOpts.useFSPD = false;
+	  jointLog->warn("Since only {} (< {}) fragments were observed, modeling of the fragment start position "
+			 "distribution has been disabled", totalAssignedFragments, salmonOpts.numBurninFrags);
+
+	}
     }
 
     if (numObservedFragments <= prevNumObservedFragments) {
@@ -2819,22 +1353,20 @@ void quantifyLibrary(
             static_cast<double>(numObservedFragments.load());
         experiment.setNumObservedFragments(numObservedFragments - prevNumObservedFragments);
         experiment.setUpperBoundHits(upperBoundHits.load());
-        experiment.setEffetiveMappingRate(upperBoundMappingRate);
+        if (salmonOpts.allowOrphans) {
+           double mappingRate = totalAssignedFragments.load() /
+               static_cast<double>(numObservedFragments.load());
+           experiment.setEffectiveMappingRate(mappingRate);
+        } else {
+            experiment.setEffectiveMappingRate(upperBoundMappingRate);
+        }
     }
 
-        jointLog->info("Overall mapping rate = {}\%; "
-                   "Effective mapping rate = {}\%\n",
-                   experiment.mappingRate() * 100.0,
+        jointLog->info("Mapping rate = {}\%\n",
                    experiment.effectiveMappingRate() * 100.0);
     jointLog->info("finished quantifyLibrary()");
 }
 
-int performBiasCorrectionSalmon(
-        boost::filesystem::path featureFile,
-        boost::filesystem::path expressionFile,
-        boost::filesystem::path outputFile,
-        size_t numThreads);
-
 int salmonQuantify(int argc, char *argv[]) {
     using std::cerr;
     using std::vector;
@@ -2842,9 +1374,9 @@ int salmonQuantify(int argc, char *argv[]) {
     namespace bfs = boost::filesystem;
     namespace po = boost::program_options;
 
-    bool biasCorrect{false};
     bool optChain{false};
     size_t requiredObservations;
+    int32_t numBiasSamples{0};
 
     SalmonOpts sopt;
     mem_opt_t* memOptions = mem_opt_init();
@@ -2870,6 +1402,11 @@ int salmonQuantify(int argc, char *argv[]) {
         "File containing the #1 mates")
     ("mates2,2", po::value<vector<string>>(&mate2ReadFiles)->multitoken(),
         "File containing the #2 mates")
+    ("allowOrphans", po::bool_switch(&(sopt.allowOrphans))->default_value(false), "Consider orphaned reads as valid hits when "
+                        "performing lightweight-alignment.  This option will increase sensitivity (allow more reads to map and "
+                        "more transcripts to be detected), but may decrease specificity as orphaned alignments are more likely "
+                        "to be spurious -- this option is *always* set to true when using quasi-mapping.")
+    ("biasCorrect", po::value(&(sopt.biasCorrect))->zero_tokens(), "Perform sequence-specific bias correction.")
     ("threads,p", po::value<uint32_t>(&(sopt.numThreads))->default_value(sopt.numThreads), "The number of threads to use concurrently.")
     ("incompatPrior", po::value<double>(&(sopt.incompatPrior))->default_value(1e-20), "This option "
                         "sets the prior probability that an alignment that disagrees with the specified "
@@ -2877,19 +1414,17 @@ int salmonQuantify(int argc, char *argv[]) {
                         "specifies that alignments that disagree with the library type should be \"impossible\", "
                         "while setting it to 1 says that alignments that disagree with the library type are no "
                         "less likely than those that do")
-    ("numRequiredObs,n", po::value(&requiredObservations)->default_value(50000000),
-                                        "[Deprecated]: The minimum number of observations (mapped reads) that must be observed before "
-                                        "the inference procedure will terminate.  If fewer mapped reads exist in the "
-                                        "input file, then it will be read through multiple times.")
     ("minLen,k", po::value<int>(&(memOptions->min_seed_len))->default_value(19), "(S)MEMs smaller than this size won't be considered.")
+    ("sensitive", po::bool_switch(&(sopt.sensitive))->default_value(false), "Setting this option enables the splitting of SMEMs that are larger "
+                                        "than 1.5 times the minimum seed length (minLen/k above).  This may reveal high scoring chains of MEMs "
+                                        "that are masked by long SMEMs.  However, this option makes lightweight-alignment a bit slower and is "
+                                        "usually not necessary if the reference is of reasonable quality.")
     ("extraSensitive", po::bool_switch(&(sopt.extraSeedPass))->default_value(false), "Setting this option enables an extra pass of \"seed\" search. "
                                         "Enabling this option may improve sensitivity (the number of reads having sufficient coverage), but will "
                                         "typically slow down quantification by ~40%.  Consider enabling this option if you find the mapping rate to "
                                         "be significantly lower than expected.")
     ("coverage,c", po::value<double>(&coverageThresh)->default_value(0.70), "required coverage of read by union of SMEMs to consider it a \"hit\".")
     ("output,o", po::value<std::string>()->required(), "Output quantification file.")
-    ("biasCorrect", po::value(&biasCorrect)->zero_tokens(), "[Experimental: Output both bias-corrected and non-bias-corrected "
-                                                               "qunatification estimates.")
     ("geneMap,g", po::value<string>(), "File containing a mapping of transcripts to genes.  If this file is provided "
                                         "Salmon will output both quant.sf and quant.genes.sf files, where the latter "
                                         "contains aggregated gene-level abundance estimates.  The transcript to gene mapping "
@@ -2900,8 +1435,6 @@ int salmonQuantify(int argc, char *argv[]) {
                                         "format; files with any other extension are assumed to be in the simple format.");
     //("optChain", po::bool_switch(&optChain)->default_value(false), "Chain MEMs optimally rather than greedily")
 
-    // no sequence bias for now
-    sopt.noSeqBiasModel = true;
     sopt.noRichEqClasses = false;
     // mapping cache has been deprecated
     sopt.disableMappingCache = true;
@@ -2924,27 +1457,22 @@ int salmonQuantify(int argc, char *argv[]) {
                         "in the online learning schedule.  A smaller value results in quicker learning, but higher variance "
                         "and may be unstable.  A larger value results in slower learning but may be more stable.  Value should "
                         "be in the interval (0.5, 1.0].")
-    ("mappingCacheMemoryLimit", po::value<uint32_t>(&(sopt.mappingCacheMemoryLimit))->default_value(5000000), "If the file contained fewer than this "
-                                        "many reads, then just keep the data in memory for subsequent rounds of inference. Obviously, this value should "
-                                        "not be too large if you wish to keep a low memory usage, but setting it large enough can substantially speed up "
-                                        "inference on \"small\" files that contain only a few million reads.")
     ("maxOcc,m", po::value<int>(&(memOptions->max_occ))->default_value(200), "(S)MEMs occuring more than this many times won't be considered.")
     ("maxReadOcc,w", po::value<uint32_t>(&(sopt.maxReadOccs))->default_value(100), "Reads \"mapping\" to more than this many places won't be considered.")
     ("noEffectiveLengthCorrection", po::bool_switch(&(sopt.noEffectiveLengthCorrection))->default_value(false), "Disables "
                         "effective length correction when computing the probability that a fragment was generated "
                         "from a transcript.  If this flag is passed in, the fragment length distribution is not taken "
                         "into account when computing this probability.")
-    ("noFragLengthDist", po::bool_switch(&(sopt.noFragLengthDist))->default_value(false), "[Currently Experimental] : "
+    ("noFragLengthDist", po::bool_switch(&(sopt.noFragLengthDist))->default_value(false), "[experimental] : "
                         "Don't consider concordance with the learned fragment length distribution when trying to determine "
                         "the probability that a fragment has originated from a specified location.  Normally, Fragments with "
                          "unlikely lengths will be assigned a smaller relative probability than those with more likely "
                         "lengths.  When this flag is passed in, the observed fragment length has no effect on that fragment's "
                         "a priori probability.")
-    ("noFragStartPosDist", po::bool_switch(&(sopt.noFragStartPosDist))->default_value(false), "[Currently Experimental] : "
-                        "Don't consider / model non-uniformity in the fragment start positions "
-                        "across the transcript.")
-    //("noSeqBiasModel", po::bool_switch(&(sopt.noSeqBiasModel))->default_value(false),
-    //                    "Don't learn and apply a model of sequence-specific bias")
+    ("useFSPD", po::bool_switch(&(sopt.useFSPD))->default_value(false), "[experimental] : "
+                        "Consider / model non-uniformity in the fragment start positions across the transcript.")
+    ("numBiasSamples", po::value<int32_t>(&numBiasSamples)->default_value(1000000),
+            "Number of fragment mappings to use when learning the sequence-specific bias model.")
     ("numAuxModelSamples", po::value<uint32_t>(&(sopt.numBurninFrags))->default_value(5000000), "The first <numAuxModelSamples> are used to train the "
      			"auxiliary model parameters (e.g. fragment length distribution, bias, etc.).  After ther first <numAuxModelSamples> observations "
 			"the auxiliary model parameters will be assumed to have converged and will be fixed.")
@@ -2952,23 +1480,21 @@ int salmonQuantify(int argc, char *argv[]) {
      			"assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models.  The purpose "
 			"of ignoring the auxiliary models for the first <numPreAuxModelSamples> observations is to avoid applying these models before thier "
 			"parameters have been learned sufficiently well.")
+    ("numRequiredObs,n", po::value(&requiredObservations)->default_value(50000000),
+                                        "[Deprecated]: The minimum number of observations (mapped reads) that must be observed before "
+                                        "the inference procedure will terminate.  If fewer mapped reads exist in the "
+                                        "input file, then it will be read through multiple times.")
     ("splitWidth,s", po::value<int>(&(memOptions->split_width))->default_value(0), "If (S)MEM occurs fewer than this many times, search for smaller, contained MEMs. "
                                         "The default value will not split (S)MEMs, a higher value will result in more MEMs being explore and, thus, will "
                                         "result in increased running time.")
     ("splitSpanningSeeds,b", po::bool_switch(&(sopt.splitSpanningSeeds))->default_value(false), "Attempt to split seeds that happen to fall on the "
                                         "boundary between two transcripts.  This can improve the  fragment hit-rate, but is usually not necessary.")
-    ("useMassBanking", po::bool_switch(&(sopt.useMassBanking))->default_value(false), "[Deprecated] : "
-                        "Use mass \"banking\" in subsequent epoch of inference.  Rather than re-observing uniquely "
-                        "mapped reads, simply remember the ratio of uniquely to ambiguously mapped reads for each "
-                        "transcript and distribute the unique mass uniformly throughout the epoch.")
-    ("useVBOpt,v", po::bool_switch(&(sopt.useVBOpt))->default_value(false), "Use the Variational Bayesian EM rather than the "
+    ("useVBOpt", po::bool_switch(&(sopt.useVBOpt))->default_value(false), "Use the Variational Bayesian EM rather than the "
      			"traditional EM algorithm for optimization in the batch passes.")
-    ("useGSOpt", po::bool_switch(&(sopt.useGSOpt))->default_value(false), "[*super*-experimental]: After the initial optimization has finished, "
-                "use collapsed Gibbs sampling to refine estimates even further (and obtain variance)")
-    ("numGibbsSamples", po::value<uint32_t>(&(sopt.numGibbsSamples))->default_value(500), "[*super*-experimental]: Number of Gibbs sampling rounds to "
-     		"perform.");
-
-
+    ("numGibbsSamples", po::value<uint32_t>(&(sopt.numGibbsSamples))->default_value(0), "Number of Gibbs sampling rounds to "
+     "perform.")
+    ("numBootstraps", po::value<uint32_t>(&(sopt.numBootstraps))->default_value(0), "Number of bootstrap samples to generate. Note: "
+      "This is mutually exclusive with Gibbs sampling.");
 
     po::options_description testing("\n"
             "testing options");
@@ -3000,7 +1526,7 @@ int salmonQuantify(int argc, char *argv[]) {
             auto hstring = R"(
 Quant
 ==========
-Perform streaming SMEM-based estimation of
+Perform streaming mapping-based estimation of
 transcript abundance from RNA-seq reads
 )";
             std::cout << hstring << std::endl;
@@ -3010,8 +1536,10 @@ transcript abundance from RNA-seq reads
 
         po::notify(vm);
 
+
+
         std::stringstream commentStream;
-        commentStream << "# salmon (smem-based) v" << salmon::version << "\n";
+        commentStream << "# salmon (mapping-based) v" << salmon::version << "\n";
         commentStream << "# [ program ] => salmon \n";
         commentStream << "# [ command ] => quant \n";
         for (auto& opt : orderedOptions.options) {
@@ -3024,13 +1552,24 @@ transcript abundance from RNA-seq reads
         std::string commentString = commentStream.str();
         fmt::print(stderr, "{}", commentString);
 
+        // TODO: Fix fragment start pos dist
+        // sopt.useFSPD = false;
+
+	// Set the atomic variable numBiasSamples from the local version
+	sopt.numBiasSamples.store(numBiasSamples);
+
+        // Get the time at the start of the run
+        std::time_t result = std::time(NULL);
+        std::string runStartTime(std::asctime(std::localtime(&result)));
+        runStartTime.pop_back(); // remove the newline
+
         // Verify the geneMap before we start doing any real work.
         bfs::path geneMapPath;
         if (vm.count("geneMap")) {
             // Make sure the provided file exists
             geneMapPath = vm["geneMap"].as<std::string>();
             if (!bfs::exists(geneMapPath)) {
-                std::cerr << "Could not fine transcript <=> gene map file " << geneMapPath << "\n";
+                std::cerr << "Could not find transcript <=> gene map file " << geneMapPath << "\n";
                 std::cerr << "Exiting now: please either omit the \'geneMap\' option or provide a valid file\n";
                 std::exit(1);
             }
@@ -3038,7 +1577,7 @@ transcript abundance from RNA-seq reads
 
         bool greedyChain = !optChain;
         bfs::path outputDirectory(vm["output"].as<std::string>());
-        bfs::create_directory(outputDirectory);
+        bfs::create_directories(outputDirectory);
         if (!(bfs::exists(outputDirectory) and bfs::is_directory(outputDirectory))) {
             std::cerr << "Couldn't create output directory " << outputDirectory << "\n";
             std::cerr << "exiting\n";
@@ -3052,7 +1591,7 @@ transcript abundance from RNA-seq reads
         sopt.outputDirectory = outputDirectory;
 
         // Create the logger and the logging directory
-        bfs::create_directory(logDirectory);
+        bfs::create_directories(logDirectory);
         if (!(bfs::exists(logDirectory) and bfs::is_directory(logDirectory))) {
             std::cerr << "Couldn't create log directory " << logDirectory << "\n";
             std::cerr << "exiting\n";
@@ -3061,13 +1600,13 @@ transcript abundance from RNA-seq reads
         std::cerr << "Logs will be written to " << logDirectory.string() << "\n";
 
         bfs::path logPath = logDirectory / "salmon_quant.log";
-	// must be a power-of-two
+	    // must be a power-of-two
         size_t max_q_size = 2097152;
         spdlog::set_async_mode(max_q_size);
 
         auto fileSink = std::make_shared<spdlog::sinks::simple_file_sink_mt>(logPath.string(), true);
         auto consoleSink = std::make_shared<spdlog::sinks::stderr_sink_mt>();
-        auto consoleLog = spdlog::create("consoleLog", {consoleSink});
+        auto consoleLog = spdlog::create("stderrLog", {consoleSink});
         auto fileLog = spdlog::create("fileLog", {fileSink});
         auto jointLog = spdlog::create("jointLog", {fileSink, consoleSink});
 
@@ -3075,10 +1614,18 @@ transcript abundance from RNA-seq reads
         sopt.fileLog = fileLog;
 
         // Verify that no inconsistent options were provided
+        if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
+            jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
+                            "Please choose one.");
+            jointLog->flush();
+            std::exit(1);
+        }
+
         {
             if (sopt.noFragLengthDist and !sopt.noEffectiveLengthCorrection) {
                 jointLog->info() << "Error: You cannot enable --noFragLengthDist without "
                                  << "also enabling --noEffectiveLengthCorrection; exiting!\n";
+                jointLog->flush();
                 std::exit(1);
             }
         }
@@ -3092,36 +1639,100 @@ transcript abundance from RNA-seq reads
         }
         // END: option checking
 
+        // Write out information about the command / run
+        {
+            bfs::path cmdInfoPath = outputDirectory / "cmd_info.json";
+            std::ofstream os(cmdInfoPath.string());
+            cereal::JSONOutputArchive oa(os);
+            oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
+            for (auto& opt : orderedOptions.options) {
+                if (opt.value.size() == 1) {
+                    oa(cereal::make_nvp(opt.string_key, opt.value.front()));
+                } else {
+                    oa(cereal::make_nvp(opt.string_key, opt.value));
+                }
+            }
+        }
+
         jointLog->info() << "parsing read library format";
 
         vector<ReadLibrary> readLibraries = salmon::utils::extractReadLibraries(orderedOptions);
+
+        SalmonIndexVersionInfo versionInfo;
+        boost::filesystem::path versionPath = indexDirectory / "versionInfo.json";
+        versionInfo.load(versionPath);
+        versionInfo.indexType();
+
         ReadExperiment experiment(readLibraries, indexDirectory, sopt);
 
+        // Parameter validation
+        // If we're allowing orphans, make sure that the read libraries are paired-end.
+        // Otherwise, this option makes no sense.
+        /*
+        if (sopt.allowOrphans) {
+            for (auto& rl : readLibraries) {
+                if (!rl.isPairedEnd()) {
+                    jointLog->error("You cannot specify the --allowOrphans argument "
+                                    "for single-end libraries; exiting!");
+                    std::exit(1);
+                }
+            }
+        }
+        */
+        // end parameter validation
+
+
         // This will be the class in charge of maintaining our
-	// rich equivalence classes
+    	// rich equivalence classes
         experiment.equivalenceClassBuilder().start();
 
-        quantifyLibrary(experiment, greedyChain, memOptions, sopt, coverageThresh,
-                        requiredObservations, sopt.numThreads);
+        auto indexType = experiment.getIndex()->indexType();
+
+        switch (indexType) {
+            case SalmonIndexType::FMD:
+                {
+                    /** Currently no seq-specific bias correction with
+                     *  FMD index.
+                     */
+                    if (sopt.biasCorrect) {
+                        sopt.biasCorrect = false;
+                        jointLog->warn("Sequence-specific bias correction requires "
+                                "use of the quasi-index. Disabling bias correction");
+                    }
+                    quantifyLibrary<SMEMAlignment>(experiment, greedyChain, memOptions, sopt, coverageThresh,
+                            requiredObservations, sopt.numThreads);
+                }
+                break;
+            case SalmonIndexType::QUASI:
+                {
+                    sopt.allowOrphans = true;
+                    sopt.useQuasi = true;
+                     quantifyLibrary<QuasiAlignment>(experiment, greedyChain, memOptions, sopt, coverageThresh,
+                                                     requiredObservations, sopt.numThreads);
+                }
+                break;
+        }
 
         // Now that the streaming pass is complete, we have
-	// our initial estimates, and our rich equivalence
-	// classes.  Perform further optimization until
-	// convergence.
+        // our initial estimates, and our rich equivalence
+        // classes.  Perform further optimization until
+        // convergence.
+        // NOTE: A side-effect of calling the optimizer is that
+        // the `EffectiveLength` field of each transcript is
+        // set to its final value.
         CollapsedEMOptimizer optimizer;
         jointLog->info("Starting optimizer");
     	salmon::utils::normalizeAlphas(sopt, experiment);
-        optimizer.optimize(experiment, sopt, 0.01, 10000);
+        bool optSuccess = optimizer.optimize(experiment, sopt, 0.01, 10000);
+
+	if (!optSuccess) {
+	  jointLog->error("The optimization algorithm failed. This is likely the result of "
+			  "bad input (or a bug). If you cannot track down the cause, please "
+			  "report this issue on GitHub.");
+	  return 1;
+	}
         jointLog->info("Finished optimizer");
 
-        if (sopt.useGSOpt) {
-            jointLog->info("Starting Gibbs Sampler");
-            CollapsedGibbsSampler sampler;
-            sampler.sample(experiment, sopt, sopt.numGibbsSamples);
-            jointLog->info("Finished Gibbs Sampler");
-        }
-
-
         free(memOptions);
         size_t tnum{0};
 
@@ -3129,16 +1740,60 @@ transcript abundance from RNA-seq reads
 
         bfs::path estFilePath = outputDirectory / "quant.sf";
 
-        commentStream << "# [ mapping rate ] => { " << experiment.mappingRate() * 100.0 << "\% }\n";
+        commentStream << "# [ mapping rate ] => { " << experiment.effectiveMappingRate() * 100.0 << "\% }\n";
         commentString = commentStream.str();
 
-        salmon::utils::writeAbundancesFromCollapsed(
-                sopt, experiment, estFilePath, commentString);
+        GZipWriter gzw(outputDirectory, jointLog);
+        // Write the main results
+        gzw.writeAbundances(sopt, experiment);
+        // Write meta-information about the run
+        gzw.writeMeta(sopt, experiment, runStartTime);
+
+        if (sopt.numGibbsSamples > 0) {
+
+            jointLog->info("Starting Gibbs Sampler");
+            CollapsedGibbsSampler sampler;
+            // The function we'll use as a callback to write samples
+            std::function<bool(const std::vector<int>&)> bsWriter =
+                [&gzw](const std::vector<int>& alphas) -> bool {
+                    return gzw.writeBootstrap(alphas);
+                };
+
+            bool sampleSuccess = sampler.sample(experiment, sopt,
+                    bsWriter,
+                    sopt.numGibbsSamples);
+            if (!sampleSuccess) {
+                jointLog->error("Encountered error during Gibb sampling .\n"
+                        "This should not happen.\n"
+                        "Please file a bug report on GitHub.\n");
+                return 1;
+            }
+            jointLog->info("Finished Gibbs Sampler");
+        } else if (sopt.numBootstraps > 0) {
+            // The function we'll use as a callback to write samples
+            std::function<bool(const std::vector<double>&)> bsWriter =
+                [&gzw](const std::vector<double>& alphas) -> bool {
+                    return gzw.writeBootstrap(alphas);
+                };
+
+            jointLog->info("Staring Bootstrapping");
+            bool bootstrapSuccess = optimizer.gatherBootstraps(
+                    experiment, sopt,
+                    bsWriter, 0.01, 10000);
+            jointLog->info("Finished Bootstrapping");
+            if (!bootstrapSuccess) {
+                jointLog->error("Encountered error during bootstrapping.\n"
+                        "This should not happen.\n"
+                        "Please file a bug report on GitHub.\n");
+                return 1;
+            }
+        }
+
 
         // Now create a subdirectory for any parameters of interest
         bfs::path paramsDir = outputDirectory / "libParams";
         if (!boost::filesystem::exists(paramsDir)) {
-            if (!boost::filesystem::create_directory(paramsDir)) {
+            if (!boost::filesystem::create_directories(paramsDir)) {
                 fmt::print(stderr, "{}ERROR{}: Could not create "
                            "output directory for experimental parameter "
                            "estimates [{}]. exiting.", ioutils::SET_RED,
@@ -3158,31 +1813,12 @@ transcript abundance from RNA-seq reads
                 fmt::print(distOut.get(), "{}\n", experiment.fragmentLengthDistribution()->toString());
             }
         }
-        if (!sopt.noSeqBiasModel) {
-            bfs::path biasFileName = paramsDir / "seqBias.txt";
-            {
-                std::unique_ptr<std::FILE, int (*)(std::FILE *)> biasOut(std::fopen(biasFileName.c_str(), "w"), std::fclose);
-                fmt::print(biasOut.get(), "{}\n", experiment.sequenceBiasModel().toString());
-            }
-        }
-
-        if (biasCorrect) {
-            auto origExpressionFile = estFilePath;
-
-            auto outputDirectory = estFilePath;
-            outputDirectory.remove_filename();
-
-            auto biasFeatPath = indexDirectory / "bias_feats.txt";
-            auto biasCorrectedFile = outputDirectory / "quant_bias_corrected.sf";
-            performBiasCorrectionSalmon(biasFeatPath, estFilePath, biasCorrectedFile, sopt.numThreads);
-        }
 
         /** If the user requested gene-level abundances, then compute those now **/
         if (vm.count("geneMap")) {
             try {
                 salmon::utils::generateGeneLevelEstimates(geneMapPath,
-                                                            outputDirectory,
-                                                            biasCorrect);
+                                                          outputDirectory);
             } catch (std::invalid_argument& e) {
                 fmt::print(stderr, "Error: [{}] when trying to compute gene-level "\
                                    "estimates. The gene-level file(s) may not exist",
@@ -3206,4 +1842,3 @@ transcript abundance from RNA-seq reads
 
     return 0;
 }
-
diff --git a/src/SalmonQuantifyAlignments.cpp b/src/SalmonQuantifyAlignments.cpp
index 53cd26c..dc5a5f4 100644
--- a/src/SalmonQuantifyAlignments.cpp
+++ b/src/SalmonQuantifyAlignments.cpp
@@ -5,7 +5,7 @@ extern "C" {
 }
 
 // for cpp-format
-#include "format.h"
+#include "spdlog/details/format.h"
 
 // are these used?
 #include <boost/dynamic_bitset.hpp>
@@ -60,6 +60,9 @@ extern "C" {
 #include "spdlog/spdlog.h"
 #include "EquivalenceClassBuilder.hpp"
 #include "CollapsedEMOptimizer.hpp"
+#include "CollapsedGibbsSampler.hpp"
+#include "GZipWriter.hpp"
+#include "TextBootstrapWriter.hpp"
 
 namespace bfs = boost::filesystem;
 using salmon::math::LOG_0;
@@ -67,7 +70,7 @@ using salmon::math::LOG_1;
 using salmon::math::logAdd;
 using salmon::math::logSub;
 
-constexpr uint32_t miniBatchSize{250};
+constexpr uint32_t miniBatchSize{1000};
 
 template <typename FragT>
 using AlignmentBatch = std::vector<FragT>;
@@ -122,8 +125,8 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                       std::mutex& cvmutex,
                       volatile bool& doneParsing,
                       std::atomic<size_t>& activeBatches,
-                      const SalmonOpts& salmonOpts,
-                      bool& burnedIn,
+                      SalmonOpts& salmonOpts,
+                      std::atomic<bool>& burnedIn,
                       bool initialRound,
                       std::atomic<size_t>& processedReads) {
 
@@ -141,6 +144,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
 
     //EQClass
     EquivalenceClassBuilder& eqBuilder = alnLib.equivalenceClassBuilder();
+    auto& readBias = alnLib.readBias();
 
     using salmon::math::LOG_0;
     using salmon::math::logAdd;
@@ -154,10 +158,10 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
     std::vector<FragmentStartPositionDistribution>& fragStartDists =
         alnLib.fragmentStartPositionDistributions();
 
-    auto& fragLengthDist = alnLib.fragmentLengthDistribution();
+    auto& fragLengthDist = *(alnLib.fragmentLengthDistribution());
     auto& alnMod = alnLib.alignmentModel();
 
-    bool useFSPD{!salmonOpts.noFragStartPosDist};
+    bool useFSPD{salmonOpts.useFSPD};
     bool useFragLengthDist{!salmonOpts.noFragLengthDist};
     bool noFragLenFactor{salmonOpts.noFragLenFactor};
 
@@ -178,7 +182,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
         // Try up to numTries times to get work from the queue before
         // giving up and waiting on the condition variable
     	constexpr uint32_t numTries = 100;
-        bool foundWork = tryToGetWork(workQueue, miniBatch, 100);
+        bool foundWork = tryToGetWork(workQueue, miniBatch, numTries);
 
         // If work wasn't immediately available, then wait for it using
     	// a condition variable to avoid burning CPU cycles for no reason.
@@ -197,6 +201,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
             // double logForgettingMass = fmCalc();
             double logForgettingMass{0.0};
             uint64_t currentMinibatchTimestep{0};
+	    // logForgettingMass and currentMinibatchTimestep are OUT parameters!
             fmCalc.getLogMassAndTimestep(logForgettingMass, currentMinibatchTimestep);
             miniBatch->logForgettingMass = logForgettingMass;
 
@@ -206,72 +211,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
             using HitIDVector = std::vector<size_t>;
             using HitProbVector = std::vector<double>;
 
-            // BEGIN: DOUBLY-COLLAPSED TESTING
-            struct HitInfo {
-                uint32_t numHits = 0;
-                bool observed = false;
-                double newUniqueMass = LOG_0;
-            };
-
-            std::unordered_map<TranscriptID, HitInfo> hitInfo;
-            // We only need to fill this in if it's not the first round
-            if (useMassBanking) {
-                for (auto& alnGroup : alignmentGroups) {
-                    for (auto a : alnGroup->alignments()) {
-                        auto transcriptID = a->transcriptID();
-                        if (transcriptID < 0 or transcriptID >= refs.size()) {
-                            salmonOpts.jointLog->warn("Invalid Transcript ID [{}] encountered", transcriptID);
-                        }
-                        auto& info = hitInfo[transcriptID];
-                        auto& txp =refs[transcriptID];
-                        if(!info.observed) {
-                            info.observed = true;
-
-                            if (txp.uniqueCount() > 0) {
-                                /*
-                                double dormantInterval = static_cast<double>(currentMinibatchTimestep -
-                                        firstTimestepOfRound + 1);
-                                        */
-                                // The cumulative mass last time this was updated
-                                // double prevUpdateMass = startingCumulativeMass;
-
-                                double updateFraction = std::log(txp.uniqueUpdateFraction());
-                                auto lastUpdate = txp.lastTimestepUpdated();
-                                double newUniqueMass = 0.0;
-                                if (lastUpdate >= currentMinibatchTimestep) {
-                                    newUniqueMass = logForgettingMass + updateFraction;
-                                } else {
-                                    double dormantInterval = static_cast<double>(currentMinibatchTimestep) - lastUpdate;
-                                    double prevUpdateMass = fmCalc.cumulativeLogMassAt(lastUpdate - 1);
-                                    double currentUpdateMass = fmCalc.cumulativeLogMassAt(currentMinibatchTimestep);
-                                    newUniqueMass = salmon::math::logSub(currentUpdateMass, prevUpdateMass) +
-                                        updateFraction - std::log(dormantInterval);
-                                }
-                                info.newUniqueMass = newUniqueMass;
-
-                                // The new unique mass to be added to this transcript
-				/*
-                                double newUniqueMass =
-                                    salmon::math::logSub(currentUpdateMass, prevUpdateMass) +
-                                    updateFraction - std::log(dormantInterval);
-				*/
-                                /*
-				double newUniqueMass = logForgettingMass + updateFraction;
-                                info.newUniqueMass = newUniqueMass;
-                                */
-                            }
-                        }
-                        info.numHits++;
-                    } // end alignments in group
-                } // end batch hits
-            } // end initial round
-            // END: DOUBLY-COLLAPSED TESTING
-
-
-            {
-                // The cumulative forgetting mass up through and including the current timestep.
-                double currentCumulativeMass = fmCalc.cumulativeLogMassAt(currentMinibatchTimestep);
-
+	    {
                 // Iterate over each group of alignments (a group consists of all alignments reported
                 // for a single read).  Distribute the read's mass proportionally dependent on the
                 // current
@@ -280,25 +220,30 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                     // EQCLASS
                     std::vector<uint32_t> txpIDs;
                     std::vector<double> auxProbs;
-                    size_t txpIDsHash{0};
+                    std::vector<double> posProbs;
                     double auxDenom = salmon::math::LOG_0;
+
+		    // The alignments must be sorted by transcript id
                     alnGroup->sortHits();
 
                     double sumOfAlignProbs{LOG_0};
+
                     // update the cluster-level properties
                     bool transcriptUnique{true};
                     auto firstTranscriptID = alnGroup->alignments().front()->transcriptID();
                     std::unordered_set<size_t> observedTranscripts;
+
                     for (auto& aln : alnGroup->alignments()) {
                         auto transcriptID = aln->transcriptID();
                         auto& transcript = refs[transcriptID];
                         transcriptUnique = transcriptUnique and (transcriptID == firstTranscriptID);
 
                         double refLength = transcript.RefLength > 0 ? transcript.RefLength : 1.0;
-
                         double logFragProb = salmon::math::LOG_1;
 
                         if (!salmonOpts.noFragLengthDist and useAuxParams) {
+                            /** Forget reads that are not paired **/
+                            /*
                             if(aln->fragLen() == 0) {
                                 if (aln->isLeft() and transcript.RefLength - aln->left() < fragLengthDist.maxVal()) {
                                     logFragProb = fragLengthDist.cmf(transcript.RefLength - aln->left());
@@ -306,6 +251,9 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                                     logFragProb = fragLengthDist.cmf(aln->right());
                                 }
                             } else {
+                            }
+                            */
+                            if(aln->isPaired() and aln->fragLen() > 0) {
                                 logFragProb = fragLengthDist.pmf(static_cast<size_t>(aln->fragLen()));
                             }
                         }
@@ -318,15 +266,26 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                         //fragProb = std::max(fragProb, 1e-3);
                         //fragProb /= cdf(fragLengthDist, refLength);
 
-                        // The alignment probability is the product of a transcript-level term (based on abundance and) an alignment-level
-                        // term below which is P(Q_1) * P(Q_2) * P(F | T)
-                        double logRefLength = std::log(refLength);
+                        // The alignment probability is the product of a
+                        // transcript-level term (based on abundance and) an
+                        // alignment-level term.
+                        double logRefLength{salmon::math::LOG_0};
+                        if (salmonOpts.noEffectiveLengthCorrection or !burnedIn) {
+                            logRefLength = std::log(transcript.RefLength);
+                        } else {
+                            logRefLength = transcript.getCachedLogEffectiveLength();
+                        }
 
                         // The probability that the fragments align to the given strands in the
                         // given orientations.
-                        double logAlignCompatProb = (useReadCompat) ?
-                                (salmon::utils::logAlignFormatProb(aln->libFormat(), expectedLibraryFormat, salmonOpts.incompatPrior)) :
-                                LOG_1;
+                        double logAlignCompatProb =
+                            (useReadCompat) ?
+                            (salmon::utils::logAlignFormatProb(
+                                  aln->libFormat(),
+                                  expectedLibraryFormat,
+                                  aln->pos(),
+                                  aln->fwd(), aln->mateStatus(), salmonOpts.incompatPrior)
+                            ) : LOG_1;
 
                         // Adjustment to the likelihood due to the
                         // error model
@@ -337,49 +296,44 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                         }
 
 			// Allow for a non-uniform fragment start position distribution
-                        double startPosProb = -logRefLength;
+			double startPosProb{-logRefLength};
+			double fragStartLogNumerator{salmon::math::LOG_1};
+			double fragStartLogDenominator{salmon::math::LOG_1};
+
                         auto hitPos = aln->left();
 			if (useFSPD and burnedIn and hitPos < refLength) {
-			  auto& fragStartDist =
-				  fragStartDists[transcript.lengthClassIndex()];
-			  startPosProb = fragStartDist(hitPos, refLength, logRefLength);
+			  auto& fragStartDist = fragStartDists[transcript.lengthClassIndex()];
+			  // Get the log(numerator) and log(denominator) for the fragment start position
+			  // probability.
+			  bool nonZeroProb = fragStartDist.logNumDenomMass(hitPos, refLength, logRefLength,
+			      fragStartLogNumerator, fragStartLogDenominator);
+			  // Set the overall probability.
+			  startPosProb = (nonZeroProb) ?
+			    fragStartLogNumerator - fragStartLogDenominator :
+			    salmon::math::LOG_0;
 			}
 
-			// Pre FSPD
-			/*
-                        double auxProb = -logRefLength + logFragProb +
-                                          aln->logQualProb() +
-                                          errLike + logAlignCompatProb;
-		        */
-            double auxProb = startPosProb + logFragProb +
-                             aln->logQualProb() +
-                             errLike + logAlignCompatProb;
-
+                        // The total auxiliary probabilty is the product (sum in log-space) of
+                        // The fragment length probabilty
+                        // The mapping score (under error model) probability
+                        // The fragment compatibility probability
 
+                        // The auxProb does *not* account for the start position
+                        // probability!
+                        double auxProb = logFragProb + errLike + logAlignCompatProb;
 
                         // The overall mass of this transcript, which is used to
                         // account for this transcript's relaive abundance
                         double transcriptLogCount = transcript.mass(initialRound);
 
-                        // BEGIN: DOUBLY-COLLAPSED TESTING
-                        // If this is not the initial round, then add the
-                        // appropriate proportion of unique read mass for
-                        // every ambiguous alignment we encounter.  Here,
-                        // we're not assigning the extra mass yet, but just
-                        // adding it to the transcriptLogCount.
-                        if (useMassBanking and transcript.uniqueCount() > 0) {
-                            auto txpHitInfo = hitInfo[transcriptID];
-                            transcriptLogCount = salmon::math::logAdd(transcriptLogCount, txpHitInfo.newUniqueMass);
-                        }
-                        // END: DOUBLY-COLLAPSED TESTING
-
                         if ( transcriptLogCount != LOG_0 and
-                             auxProb != LOG_0) {
-                           aln->logProb = transcriptLogCount + auxProb;
+                              auxProb != LOG_0 and
+                              startPosProb != LOG_0 ) {
+                            aln->logProb = transcriptLogCount + auxProb + startPosProb;
 
                             sumOfAlignProbs = logAdd(sumOfAlignProbs, aln->logProb);
                             if (updateCounts and
-                                observedTranscripts.find(transcriptID) == observedTranscripts.end()) {
+                                    observedTranscripts.find(transcriptID) == observedTranscripts.end()) {
                                 refs[transcriptID].addTotalCount(1);
                                 observedTranscripts.insert(transcriptID);
                             }
@@ -387,7 +341,10 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                             txpIDs.push_back(transcriptID);
                             auxProbs.push_back(auxProb);
                             auxDenom = salmon::math::logAdd(auxDenom, auxProb);
-                            boost::hash_combine(txpIDsHash, transcriptID);
+
+			    if (useFSPD) {
+			      posProbs.push_back(fragStartLogNumerator);
+			    }
 
                         } else {
                             aln->logProb = LOG_0;
@@ -403,19 +360,21 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                     }
 
                     // EQCLASS
-                    TranscriptGroup tg(txpIDs, txpIDsHash);
                     double auxProbSum{0.0};
                     for (auto& p : auxProbs) {
                         p = std::exp(p - auxDenom);
                         auxProbSum += p;
                     }
-                    if (std::abs(auxProbSum - 1.0) > 0.01) {
-                        std::cerr << "weights had sum of " << auxProbSum
-                                  << " but it should be 1!!\n\n";
+
+                    if (txpIDs.size() > 0) {
+                        TranscriptGroup tg(txpIDs);
+                        eqBuilder.addGroup(std::move(tg), auxProbs, posProbs);
                     }
-                    eqBuilder.addGroup(std::move(tg), auxProbs);
 
 
+                    // Are we doing bias correction?
+                    bool needBiasSample = salmonOpts.biasCorrect;
+
                     // Normalize the scores
                     for (auto& aln : alnGroup->alignments()) {
                         if (aln->logProb == LOG_0) { continue; }
@@ -425,32 +384,59 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
                         auto& transcript = refs[transcriptID];
 
                         double newMass = logForgettingMass + aln->logProb;
-                        if (useMassBanking and transcript.uniqueCount() > 0) {
-                            newMass = salmon::math::logAdd(newMass, hitInfo[transcriptID].newUniqueMass);
-                        }
                         transcript.addMass(newMass);
                         transcript.setLastTimestepUpdated(currentMinibatchTimestep);
 
+                        /**
+                         * Update the auxiliary models.
+                         **/
                         double r = uni(eng);
                         if (!burnedIn and r < std::exp(aln->logProb)) {
-			    // Update the error model
+                            /**
+                             * Update the bias sequence-specific bias model
+                             **/
+                            if (needBiasSample and salmonOpts.numBiasSamples > 0) {
+                                // the "start" position is the leftmost position if
+                                // we hit the forward strand, and the leftmost
+                                // position + the read length if we hit the reverse complement
+                                bam_seq_t* r = aln->get5PrimeRead();
+                                if (r) {
+                                    bool fwd{bam_strand(r) == 0};
+                                    int32_t pos{bam_pos(r)};
+                                    int32_t startPos = fwd ? pos : pos + bam_seq_len(r);
+                                    auto dir = salmon::utils::boolToDirection(fwd);
+
+                                    if (startPos > 0 and startPos < transcript.RefLength) {
+                                        const char* txpStart = transcript.Sequence;
+                                        const char* readStart = txpStart + startPos;
+                                        const char* txpEnd = txpStart + transcript.RefLength;
+                                        bool success = readBias.update(txpStart, readStart, txpEnd, dir);
+                                        if (success) {
+                                            salmonOpts.numBiasSamples -= 1;
+                                            needBiasSample = false;
+                                        }
+                                    }
+                                }
+                            }
+
+                            // Update the error model
                             if (salmonOpts.useErrorModel) {
                                 alnMod.update(*aln, transcript, LOG_1, logForgettingMass);
                             }
-			    // Update the fragment length distribution
+                            // Update the fragment length distribution
                             if (aln->isPaired() and !salmonOpts.noFragLengthDist) {
                                 double fragLength = aln->fragLen();
                                 fragLengthDist.addVal(fragLength, logForgettingMass);
                             }
-			    // Update the fragment start position distribution
-			    if (useFSPD) {
-				    auto hitPos = aln->left();
-				    auto& fragStartDist =
-					    fragStartDists[transcript.lengthClassIndex()];
-				    fragStartDist.addVal(hitPos,
-						    transcript.RefLength,
-						    logForgettingMass);
-			    }
+                            // Update the fragment start position distribution
+                            if (useFSPD) {
+                                auto hitPos = aln->left();
+                                auto& fragStartDist =
+                                    fragStartDists[transcript.lengthClassIndex()];
+                                fragStartDist.addVal(hitPos,
+                                        transcript.RefLength,
+                                        logForgettingMass);
+                            }
                         }
                     }
 
@@ -529,8 +515,17 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
             --activeBatches;
             processedReads += batchReads;
             if (processedReads >= numBurninFrags and !burnedIn) {
-                burnedIn = true;
+                if (useFSPD) {
+                    // update all of the fragment start position
+                    // distributions
+                    for (auto& fspd : fragStartDists) {
+                        fspd.update();
+                    }
+                }
                 fragLengthDist.cacheCMF();
+                // NOTE: only one thread should succeed here, and that
+                // thread will set burnedIn to true
+                alnLib.updateTranscriptLengthsAtomic(burnedIn);
             }
         }
         miniBatch = nullptr;
@@ -548,9 +543,9 @@ template <typename FragT>
 bool quantifyLibrary(
         AlignmentLibrary<FragT>& alnLib,
         size_t numRequiredFragments,
-        const SalmonOpts& salmonOpts) {
+        SalmonOpts& salmonOpts) {
 
-    bool burnedIn{false};
+    std::atomic<bool> burnedIn{false};
 
     auto& refs = alnLib.transcripts();
     size_t numTranscripts = refs.size();
@@ -579,19 +574,17 @@ bool quantifyLibrary(
     size_t maxCacheSize{salmonOpts.mappingCacheMemoryLimit};
 
     NullFragmentFilter<FragT>* nff = nullptr;
+    bool terminate{false};
 
     // Give ourselves some space
     fmt::print(stderr, "\n\n\n\n");
 
-    // EQCLASS
-    bool terminate{false};
-
     while (numObservedFragments < numRequiredFragments and !terminate) {
         if (!initialRound) {
 
     	    size_t numToCache = (useMassBanking) ?
-				(alnLib.numMappedReads() - alnLib.numUniquelyMappedReads()) :
-				(alnLib.numMappedReads());
+				(alnLib.numMappedFragments() - alnLib.numUniquelyMappedFragments()) :
+				(alnLib.numMappedFragments());
 
             if (haveCache) {
                 std::swap(workQueuePtr, processedCachePtr);
@@ -729,7 +722,7 @@ bool quantifyLibrary(
         }
         fmt::print(stderr, "\n\n");
 
-        numObservedFragments += alnLib.numMappedReads();
+        numObservedFragments += alnLib.numMappedFragments();
 
         fmt::print(stderr, "# observed = {} / # required = {}\033[A\033[A\033[A\033[A\033[A",
                    numObservedFragments, numRequiredFragments);
@@ -739,10 +732,10 @@ bool quantifyLibrary(
                                       "Total # of mapped reads : {}\n"
                                       "# of uniquely mapped reads : {}\n"
                                       "# ambiguously mapped reads : {}\n\n\n",
-                                      alnLib.numMappedReads(),
-                                      alnLib.numUniquelyMappedReads(),
-                                      alnLib.numMappedReads() -
-                                      alnLib.numUniquelyMappedReads());
+                                      alnLib.numMappedFragments(),
+                                      alnLib.numUniquelyMappedFragments(),
+                                      alnLib.numMappedFragments() -
+                                      alnLib.numUniquelyMappedFragments());
         }
 
         initialRound = false;
@@ -761,6 +754,28 @@ bool quantifyLibrary(
 
     fmt::print(stderr, "\n\n\n\n");
 
+
+    // If we didn't achieve burnin, then at least compute effective
+    // lengths and mention this to the user.
+    if (alnLib.numMappedFragments() < salmonOpts.numBurninFrags) {
+        std::atomic<bool> dummyBool{false};
+        alnLib.updateTranscriptLengthsAtomic(dummyBool);
+        salmonOpts.jointLog->warn("Only {} fragments were mapped, but the number of burn-in fragments was set to {}.\n"
+                "The effective lengths have been computed using the observed mappings.\n",
+                alnLib.numMappedFragments(), salmonOpts.numBurninFrags);
+
+	// If we didn't have a sufficient number of samples for burnin,
+	// then also ignore modeling of the fragment start position
+	// distribution.
+	if (salmonOpts.useFSPD) {
+	  salmonOpts.useFSPD = false;
+	  salmonOpts.jointLog->warn("Since only {} (< {}) fragments were observed, modeling of the fragment start position "
+			 "distribution has been disabled", alnLib.numMappedFragments() , salmonOpts.numBurninFrags);
+
+	}
+    }
+
+
     // In this case, we have to give the structures held
     // in the cache back to the appropriate queues
     if (haveCache) {
@@ -776,20 +791,105 @@ bool quantifyLibrary(
         }
     }
 
-    return burnedIn;
+    return burnedIn.load();
 }
 
-int computeBiasFeatures(
-    std::vector<std::string>& transcriptFiles,
-    boost::filesystem::path outFilePath,
-    bool useStreamingParser,
-    size_t numThreads);
+template <typename ReadT>
+bool processSample(AlignmentLibrary<ReadT>& alnLib,
+                   const std::string& runStartTime,
+                   size_t requiredObservations,
+                   SalmonOpts& sopt,
+                   boost::filesystem::path outputDirectory) {
+
+    auto& jointLog = sopt.jointLog;
+    // EQCLASS
+    alnLib.equivalenceClassBuilder().start();
+
+    bool burnedIn = quantifyLibrary<ReadT>(alnLib, requiredObservations, sopt);
+
+    // EQCLASS
+    // NOTE: A side-effect of calling the optimizer is that
+    // the `EffectiveLength` field of each transcript is
+    // set to its final value.
+    CollapsedEMOptimizer optimizer;
+    jointLog->info("starting optimizer");
+    salmon::utils::normalizeAlphas(sopt, alnLib);
+    bool optSuccess = optimizer.optimize(alnLib, sopt, 0.01, 10000);
+    // If the optimizer didn't work, then bail out here.
+    if (!optSuccess) { return false; }
+    jointLog->info("finished optimizer");
+
+    // EQCLASS
+    fmt::print(stderr, "\n\nwriting output \n");
+    GZipWriter gzw(outputDirectory, jointLog);
+    // Write the main results
+    gzw.writeAbundances(sopt, alnLib);
+    // Write meta-information about the run
+    gzw.writeMeta(sopt, alnLib, runStartTime);
+
+    if (sopt.numGibbsSamples > 0) {
+
+        jointLog->info("Starting Gibbs Sampler");
+        CollapsedGibbsSampler sampler;
+        // The function we'll use as a callback to write samples
+        std::function<bool(const std::vector<int>&)> bsWriter =
+            [&gzw](const std::vector<int>& alphas) -> bool {
+                return gzw.writeBootstrap(alphas);
+            };
+
+        bool sampleSuccess = sampler.sample(alnLib, sopt,
+                bsWriter,
+                sopt.numGibbsSamples);
+        if (!sampleSuccess) {
+            jointLog->error("Encountered error during Gibb sampling .\n"
+                    "This should not happen.\n"
+                    "Please file a bug report on GitHub.\n");
+            return false;
+        }
+        jointLog->info("Finished Gibbs Sampler");
+    } else if (sopt.numBootstraps > 0) {
+        // The function we'll use as a callback to write samples
+        std::function<bool(const std::vector<double>&)> bsWriter =
+            [&gzw](const std::vector<double>& alphas) -> bool {
+                return gzw.writeBootstrap(alphas);
+            };
+
+        jointLog->info("Staring Bootstrapping");
+        bool bootstrapSuccess = optimizer.gatherBootstraps(
+                alnLib, sopt,
+                bsWriter, 0.01, 10000);
+        jointLog->info("Finished Bootstrapping");
+        if (!bootstrapSuccess) {
+            jointLog->error("Encountered error during bootstrapping.\n"
+                    "This should not happen.\n"
+                    "Please file a bug report on GitHub.\n");
+            return false;
+        }
+    }
+
+
+
+    if (sopt.sampleOutput) {
+        // In this case, we should "re-convert" transcript
+        // masses to be counts in log space
+        auto nr = alnLib.numMappedFragments();
+        for (auto& t : alnLib.transcripts()) {
+            double m = t.mass(false) * nr;
+            if (m > 0.0) {
+                t.setMass(std::log(m));
+            }
+        }
+
+        bfs::path sampleFilePath = outputDirectory / "postSample.bam";
+        bool didSample = salmon::sampler::sampleLibrary<ReadT>(alnLib, sopt, burnedIn, sampleFilePath, sopt.sampleUnaligned);
+        if (!didSample) {
+            jointLog->warn("There may have been a problem generating the sampled output file; please check the log\n");
+        }
+    }
+
+    return true;
+}
 
-int performBiasCorrectionSalmon(
-        boost::filesystem::path featureFile,
-        boost::filesystem::path expressionFile,
-        boost::filesystem::path outputFile,
-        size_t numThreads);
 
 int salmonAlignmentQuantify(int argc, char* argv[]) {
     using std::cerr;
@@ -800,9 +900,6 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
 
     SalmonOpts sopt;
 
-    bool sampleOutput{false};
-    bool sampleUnaligned{false};
-    bool biasCorrect{false};
     uint32_t numThreads{4};
     size_t requiredObservations{50000000};
 
@@ -818,18 +915,18 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                                             "so until there is a faster multi-threaded SAM/BAM parser to feed the "
                                             "quantification threads, one should not expect much of a speed-up beyond "
                                             "~6 threads.")
+    ("biasCorrect", po::value(&(sopt.biasCorrect))->zero_tokens(), "Perform sequence-specific bias correction.")
     ("incompatPrior", po::value<double>(&(sopt.incompatPrior))->default_value(1e-20), "This option "
                         "sets the prior probability that an alignment that disagrees with the specified "
                         "library type (--libType) results from the true fragment origin.  Setting this to 0 "
                         "specifies that alignments that disagree with the library type should be \"impossible\", "
                         "while setting it to 1 says that alignments that disagree with the library type are no "
                         "less likely than those that do")
-    ("useErrorModel", po::bool_switch(&(sopt.useErrorModel))->default_value(false), "[Currently Experimental] : "
+    ("useErrorModel", po::bool_switch(&(sopt.useErrorModel))->default_value(false), "[experimental] : "
                         "Learn and apply an error model for the aligned reads.  This takes into account the "
                         "the observed frequency of different types of mismatches when computing the likelihood of "
                         "a given alignment.")
     ("output,o", po::value<std::string>()->required(), "Output quantification directory.")
-    ("biasCorrect", po::value(&biasCorrect)->zero_tokens(), "[Experimental]: Output both bias-corrected and non-bias-corrected ")
     ("numRequiredObs,n", po::value(&requiredObservations)->default_value(50000000),
                                         "[Deprecated]: The minimum number of observations (mapped reads) that must be observed before "
                                         "the inference procedure will terminate.  If fewer mapped reads exist in the "
@@ -844,6 +941,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                                         "format; files with any other extension are assumed to be in the simple format.");
 
     // no sequence bias for now
+    sopt.useMassBanking = false;
     sopt.noSeqBiasModel = true;
     sopt.noRichEqClasses = false;
 
@@ -865,14 +963,14 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                         "effective length correction when computing the probability that a fragment was generated "
                         "from a transcript.  If this flag is passed in, the fragment length distribution is not taken "
                         "into account when computing this probability.")
-    ("noFragLengthDist", po::bool_switch(&(sopt.noFragLengthDist))->default_value(false), "[Currently Experimental] : "
+    ("noFragLengthDist", po::bool_switch(&(sopt.noFragLengthDist))->default_value(false), "[experimental] : "
                         "Don't consider concordance with the learned fragment length distribution when trying to determine "
                         "the probability that a fragment has originated from a specified location.  Normally, Fragments with "
                          "unlikely lengths will be assigned a smaller relative probability than those with more likely "
                         "lengths.  When this flag is passed in, the observed fragment length has no effect on that fragment's "
                         "a priori probability.")
-    ("noFragStartPosDist", po::bool_switch(&(sopt.noFragStartPosDist))->default_value(false), "[Currently Experimental] : "
-                        "Don't consider / model non-uniformity in the fragment start positions "
+    ("useFSPD", po::bool_switch(&(sopt.useFSPD))->default_value(false), "[experimental] : "
+                        "Consider / model non-uniformity in the fragment start positions "
                         "across the transcript.")
     /*
     // Don't expose this yet
@@ -894,19 +992,16 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
     ("numAuxModelSamples", po::value<uint32_t>(&(sopt.numBurninFrags))->default_value(5000000), "The first <numAuxModelSamples> are used to train the "
      			"auxiliary model parameters (e.g. fragment length distribution, bias, etc.).  After ther first <numAuxModelSamples> observations "
 			"the auxiliary model parameters will be assumed to have converged and will be fixed.")
-    ("sampleOut,s", po::bool_switch(&sampleOutput)->default_value(false), "Write a \"postSample.bam\" file in the output directory "
+    ("sampleOut,s", po::bool_switch(&(sopt.sampleOutput))->default_value(false), "Write a \"postSample.bam\" file in the output directory "
                         "that will sample the input alignments according to the estimated transcript abundances. If you're "
                         "going to perform downstream analysis of the alignments with tools which don't, themselves, take "
                         "fragment assignment ambiguity into account, you should use this output.")
-    ("sampleUnaligned,u", po::bool_switch(&sampleUnaligned)->default_value(false), "In addition to sampling the aligned reads, also write "
+    ("sampleUnaligned,u", po::bool_switch(&(sopt.sampleUnaligned))->default_value(false), "In addition to sampling the aligned reads, also write "
                         "the un-aligned reads to \"posSample.bam\".")
-    ("useMassBanking", po::bool_switch(&(sopt.useMassBanking))->default_value(false), "[Deprecated] : "
-                        "Use mass \"banking\" in subsequent epoch of inference.  Rather than re-observing uniquely "
-                        "mapped reads, simply remember the ratio of uniquely to ambiguously mapped reads for each "
-                        "transcript and distribute the unique mass uniformly throughout the epoch.")
-    ("useVBOpt,v", po::bool_switch(&(sopt.useVBOpt))->default_value(false), "Use the Variational Bayesian EM rather than the "
-     			"traditional EM algorithm for optimization in the batch passes.");
-
+    ("numGibbsSamples", po::value<uint32_t>(&(sopt.numGibbsSamples))->default_value(0), "Number of Gibbs sampling rounds to "
+     "perform.")
+    ("numBootstraps", po::value<uint32_t>(&(sopt.numBootstraps))->default_value(0), "Number of bootstrap samples to generate. Note: "
+      "This is mutually exclusive with Gibbs sampling.");
 
     po::options_description testing("\n"
             "testing options");
@@ -942,6 +1037,8 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         }
         po::notify(vm);
 
+        sopt.alnMode = true;
+
         if (numThreads < 2) {
             fmt::print(stderr, "salmon requires at least 2 threads --- "
                                "setting # of threads = 2\n");
@@ -970,6 +1067,14 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         std::string commentString = commentStream.str();
         fmt::print(stderr, "{}", commentString);
 
+        // TODO: Fix fragment start pos dist
+        // sopt.useFSPD = false;
+
+        // Get the time at the start of the run
+        std::time_t result = std::time(NULL);
+        std::string runStartTime(std::asctime(std::localtime(&result)));
+        runStartTime.pop_back(); // remove the newline
+
         // Verify the geneMap before we start doing any real work.
         bfs::path geneMapPath;
         if (vm.count("geneMap")) {
@@ -986,7 +1091,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         vector<string> alignmentFileNames = vm["alignments"].as<vector<string>>();
         vector<bfs::path> alignmentFiles;
         for (auto& alignmentFileName : alignmentFileNames) {
-            bfs::path alignmentFile(alignmentFileName);//vm["alignments"].as<std::string>());
+            bfs::path alignmentFile(alignmentFileName);
             if (!bfs::exists(alignmentFile)) {
                 std::stringstream ss;
                 ss << "The provided alignment file: " << alignmentFile <<
@@ -1031,7 +1136,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         bfs::path logDirectory = outputDirectory / "logs";
 
         // Create the logger and the logging directory
-        bfs::create_directory(logDirectory);
+        bfs::create_directories(logDirectory);
         if (!(bfs::exists(logDirectory) and bfs::is_directory(logDirectory))) {
             std::cerr << "Couldn't create log directory " << logDirectory << "\n";
             std::cerr << "exiting\n";
@@ -1052,7 +1157,15 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         sopt.jointLog = jointLog;
         sopt.fileLog = fileLog;
 
-        if (!sampleOutput and sampleUnaligned) {
+        // Verify that no inconsistent options were provided
+        if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
+            jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
+                            "Please choose one.");
+            jointLog->flush();
+            std::exit(1);
+        }
+
+        if (!sopt.sampleOutput and sopt.sampleUnaligned) {
             fmt::MemoryWriter wstr;
             wstr << "WARNING: you passed in the (-u/--sampleUnaligned) flag, but did not request a sampled "
                  << "output file (-s/--sampleOut).  This flag will be ignored!\n";
@@ -1067,12 +1180,10 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
             sopt.incompatPrior = std::log(sopt.incompatPrior);
         }
 
-        // If we made it this far, the output directory exists
-        bfs::path outputFile = outputDirectory / "quant.sf";
         // Now create a subdirectory for any parameters of interest
         bfs::path paramsDir = outputDirectory / "libParams";
         if (!boost::filesystem::exists(paramsDir)) {
-            if (!boost::filesystem::create_directory(paramsDir)) {
+            if (!boost::filesystem::create_directories(paramsDir)) {
                 fmt::print(stderr, "{}ERROR{}: Could not create "
                            "output directory for experimental parameter "
                            "estimates [{}]. exiting.", ioutils::SET_RED,
@@ -1081,6 +1192,22 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
             }
         }
 
+
+        // Write out information about the command / run
+        {
+            bfs::path cmdInfoPath = outputDirectory / "cmd_info.json";
+            std::ofstream os(cmdInfoPath.string());
+            cereal::JSONOutputArchive oa(os);
+            oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
+            for (auto& opt : orderedOptions.options) {
+                if (opt.value.size() == 1) {
+                    oa(cereal::make_nvp(opt.string_key, opt.value.front()));
+                } else {
+                    oa(cereal::make_nvp(opt.string_key, opt.value));
+                }
+            }
+        }
+
         // The transcript file contains the target sequences
         bfs::path transcriptFile(vm["targets"].as<std::string>());
 
@@ -1097,6 +1224,8 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
         sopt.numParseThreads = numParseThreads;
         std::cerr << "numQuantThreads = " << numQuantThreads << "\n";
 
+        bool success{false};
+
         switch (libFmt.type) {
             case ReadType::SINGLE_END:
                 {
@@ -1104,44 +1233,10 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                                                           transcriptFile,
                                                           libFmt,
                                                           sopt);
-                    // EQCLASS
-                    alnLib.equivalenceClassBuilder().start();
-
-                    bool burnedIn = quantifyLibrary<UnpairedRead>(alnLib, requiredObservations, sopt);
-
-                    // EQCLASS
-                    CollapsedEMOptimizer optimizer;
-                    jointLog->info("starting optimizer");
-                    salmon::utils::normalizeAlphas(sopt, alnLib);
-                    optimizer.optimize(alnLib, sopt, 0.01, 10000);
-                    jointLog->info("finished optimizer");
-
-                    // EQCLASS
-                    fmt::print(stderr, "\n\nwriting output \n");
-                    salmon::utils::writeAbundancesFromCollapsed(
-                        sopt, alnLib, outputFile, commentString);
-
-                    //fmt::print(stderr, "\n\nwriting output \n");
-                    //salmon::utils::writeAbundances(sopt, alnLib, outputFile, commentString);
-
-
-                    if (sampleOutput) {
-                        // In this case, we should "re-convert" transcript
-                        // masses to be counts in log space
-                        auto nr = alnLib.numMappedReads();
-                        for (auto& t : alnLib.transcripts()) {
-                            double m = t.mass(false) * nr;
-                            if (m > 0.0) {
-                                t.setMass(std::log(m));
-                            }
-                        }
 
-                        bfs::path sampleFilePath = outputDirectory / "postSample.bam";
-                        bool didSample = salmon::sampler::sampleLibrary<UnpairedRead>(alnLib, sopt, burnedIn, sampleFilePath, sampleUnaligned);
-                        if (!didSample) {
-                            jointLog->warn("There may have been a problem generating the sampled output file; please check the log\n");
-                        }
-                    }
+                    success = processSample<UnpairedRead>(alnLib, runStartTime,
+                                                          requiredObservations, sopt,
+                                                          outputDirectory);
                 }
                 break;
             case ReadType::PAIRED_END:
@@ -1150,55 +1245,10 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                                                       transcriptFile,
                                                       libFmt,
                                                       sopt);
-                    // EQCLASS
-                    alnLib.equivalenceClassBuilder().start();
-
-                    bool burnedIn = quantifyLibrary<ReadPair>(alnLib, requiredObservations, sopt);
-
-                    // EQCLASS
-                    CollapsedEMOptimizer optimizer;
-                    jointLog->info("starting optimizer");
-                    salmon::utils::normalizeAlphas(sopt, alnLib);
-                    optimizer.optimize(alnLib, sopt, 0.01, 10000);
-                    jointLog->info("finished optimizer");
-
-                    fmt::print(stderr, "\n\nwriting output \n");
-                    // EQCLASS
-                    salmon::utils::writeAbundancesFromCollapsed(
-                        sopt, alnLib, outputFile, commentString);
-
-                    /*
-                    fmt::print(stderr, "\n\nwriting output \n");
-                    salmon::utils::writeAbundances(sopt, alnLib, outputFile, commentString);
-                    */
-
-                                        // Test writing out the fragment length distribution
-                    if (!sopt.noFragLengthDist) {
-                        bfs::path distFileName = paramsDir / "flenDist.txt";
-                        {
-                            std::unique_ptr<std::FILE, int (*)(std::FILE *)> distOut(std::fopen(distFileName.c_str(), "w"), std::fclose);
-                            fmt::print(distOut.get(), "{}\n", alnLib.fragmentLengthDistribution().toString());
-                        }
-                    }
-
-                    if (sampleOutput) {
-                        // In this case, we should "re-convert" transcript
-                        // masses to be counts in log space
-                        auto nr = alnLib.numMappedReads();
-                        for (auto& t : alnLib.transcripts()) {
-                            double m = t.mass(false) * nr;
-                            if (m > 0.0) {
-                                t.setMass(std::log(m));
-                            }
-                        }
-
-                        bfs::path sampleFilePath = outputDirectory / "postSample.bam";
-                        bool didSample = salmon::sampler::sampleLibrary<ReadPair>(alnLib, sopt, burnedIn, sampleFilePath, sampleUnaligned);
-                        if (!didSample) {
-                            jointLog->warn("There may have been a problem generating the sampled output file; please check the log\n");
-                        }
 
-                    }
+                    success = processSample<ReadPair>(alnLib, runStartTime,
+                                                      requiredObservations, sopt,
+                                                      outputDirectory);
                 }
                 break;
             default:
@@ -1207,37 +1257,21 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
                 std::exit(1);
         }
 
-        bfs::path estFilePath = outputDirectory / "quant.sf";
-
-        if (biasCorrect) {
-            // First, compute the transcript features in case the user
-            // ever wants to bias-correct his / her results
-            bfs::path transcriptBiasFile(outputDirectory); transcriptBiasFile /= "bias_feats.txt";
-
-            bool useStreamingParser{true};
-            std::vector<std::string> transcriptFiles{transcriptFile.string()};
-            std::cerr << "computeBiasFeatures( {";
-            for (auto& tf : transcriptFiles) {
-                std::cerr << "[" << tf << "] ";
-            }
-            std::cerr << ", " << transcriptBiasFile << ", " << useStreamingParser << ", " << numThreads << ")\n";
-            computeBiasFeatures(transcriptFiles, transcriptBiasFile, useStreamingParser, numThreads);
-
-            auto origExpressionFile = estFilePath;
-
-            auto outputDirectory = estFilePath;
-            outputDirectory.remove_filename();
-
-            auto biasCorrectedFile = outputDirectory / "quant_bias_corrected.sf";
-            performBiasCorrectionSalmon(transcriptBiasFile, estFilePath, biasCorrectedFile, numThreads);
+        // Make sure the quantification was successful.
+        if (!success) {
+            jointLog->error("Quantification was un-successful.  Please check the log "
+                            "for information about why quantification failed. If this "
+                            "problem persists, please report this issue on GitHub.");
+            return 1;
         }
 
+        bfs::path estFilePath = outputDirectory / "quant.sf";
+
         /** If the user requested gene-level abundances, then compute those now **/
         if (vm.count("geneMap")) {
             try {
                 salmon::utils::generateGeneLevelEstimates(geneMapPath,
-                                                            outputDirectory,
-                                                            biasCorrect);
+                                                            outputDirectory);
             } catch (std::exception& e) {
                 fmt::print(stderr, "Error: [{}] when trying to compute gene-level "\
                                    "estimates. The gene-level file(s) may not exist",
diff --git a/src/SalmonUtils.cpp b/src/SalmonUtils.cpp
index 8498131..2fe3f76 100644
--- a/src/SalmonUtils.cpp
+++ b/src/SalmonUtils.cpp
@@ -6,6 +6,7 @@
 #include <unordered_set>
 #include <unordered_map>
 #include <vector>
+#include <random>
 #include <boost/filesystem.hpp>
 #include <boost/range/join.hpp>
 
@@ -83,9 +84,20 @@ namespace utils {
         return os;
     }
 
-    double logAlignFormatProb(const LibraryFormat observed, const LibraryFormat expected, double incompatPrior) {
-        // Allow orphaned reads in a paired-end library, but
-        // decrease their a priori probability.
+    double logAlignFormatProb(const LibraryFormat observed,
+                              const LibraryFormat expected,
+                              int32_t start, bool isForward,
+                              rapmap::utils::MateStatus ms,
+                              double incompatPrior) {
+        // If we're dealing with a single end read.
+        bool compat {false};
+        if (ms != rapmap::utils::MateStatus::PAIRED_END_PAIRED) {
+            compat = compatibleHit(expected, start, isForward, ms);
+        } else {
+            compat = compatibleHit(expected, observed);
+        }
+        return (compat) ? salmon::math::LOG_1 : incompatPrior;
+        /** Old compat code
         if (expected.type == ReadType::PAIRED_END and
             observed.type == ReadType::SINGLE_END) {
             double logOrphanProb = salmon::math::LOG_ORPHAN_PROB;
@@ -106,12 +118,6 @@ namespace utils {
                 if (expected.strandedness == observed.strandedness) {
                     return salmon::math::LOG_1;
                 } else {
-                    /**
-                    * Let's not complain about this for now, but find
-                    * a different way to report it.
-                    * std::cerr << "expected = " << expected << "\n";
-                    * std::cerr << "observed = " << observed << "\n";
-                    */
                     return incompatPrior;
                 }
             }
@@ -119,6 +125,92 @@ namespace utils {
 
         fmt::print(stderr, "WARNING: logAlignFormatProb --- should not get here");
         return salmon::math::LOG_0;
+        */
+    }
+
+    // for single end reads or orphans
+    bool compatibleHit(const LibraryFormat expected,
+            int32_t start, bool isForward, MateStatus ms) {
+        auto expectedStrand = expected.strandedness;
+        switch (ms) {
+            case MateStatus::SINGLE_END:
+                if (isForward) { // U, SF
+                    return (expectedStrand == ReadStrandedness::U or
+                            expectedStrand == ReadStrandedness::S);
+                } else { // U, SR
+                    return (expectedStrand == ReadStrandedness::U or
+                            expectedStrand == ReadStrandedness::A);
+                }
+                break;
+            case MateStatus::PAIRED_END_LEFT:
+                // "M"atching or same orientation is a special case
+                if (expected.orientation == ReadOrientation::SAME) {
+                    return (expectedStrand == ReadStrandedness::U
+                            or
+                            (expectedStrand == ReadStrandedness::S and isForward)
+                            or
+                            (expectedStrand == ReadStrandedness::A and !isForward));
+                } else if (isForward) { // IU, ISF, OU, OSF, MU, MSF
+                    return (expectedStrand == ReadStrandedness::U or
+                            expectedStrand == ReadStrandedness::S);
+                } else { // IU, ISR, OU, OSR, MU, MSR
+                    return (expectedStrand == ReadStrandedness::U or
+                            expectedStrand == ReadStrandedness::A);
+                }
+                break;
+            case MateStatus::PAIRED_END_RIGHT:
+                // "M"atching or same orientation is a special case
+                if (expected.orientation == ReadOrientation::SAME) {
+                    return (expectedStrand == ReadStrandedness::U
+                            or
+                            (expectedStrand == ReadStrandedness::S and isForward)
+                            or
+                            (expectedStrand == ReadStrandedness::A and !isForward));
+                } else if (isForward) { // IU, ISR, OU, OSR, MU, MSR
+                    return (expectedStrand == ReadStrandedness::U or
+                            expectedStrand == ReadStrandedness::A);
+                } else { // IU, ISF, OU, OSF, MU, MSF
+                    return (expectedStrand == ReadStrandedness::U or
+                            expectedStrand == ReadStrandedness::S);
+                }
+                break;
+            default:
+                // SHOULD NOT GET HERE
+                fmt::print(stderr, "WARNING: Could not associate known library type with read!\n");
+                return false;
+                break;
+        }
+        // SHOULD NOT GET HERE
+        fmt::print(stderr, "WARNING: Could not associate known library type with read!\n");
+        return false;
+    }
+
+
+    // for paired-end reads
+    bool compatibleHit(const LibraryFormat expected, const LibraryFormat observed) {
+        if (observed.type != ReadType::PAIRED_END) {
+            // SHOULD NOT GET HERE
+            fmt::print(stderr, "WARNING: PE compatibility function called with SE read!\n");
+            return false;
+        }
+
+        auto es = expected.strandedness;
+        auto eo = expected.orientation;
+
+        auto os = observed.strandedness;
+        auto oo = observed.orientation;
+
+        // If the orientations are different, they are incompatible
+        if (eo != oo) {
+            return false;
+        } else { // In this branch, the orientations are always compatible
+            return (es == ReadStrandedness::U or
+                    es == os);
+        }
+        // SHOULD NOT GET HERE
+        fmt::print(stderr, "WARNING: Could not determine strand compatibility!");
+        fmt::print(stderr, "please report this.\n");
+        return false;
     }
 
     template <typename ExpLib>
@@ -130,24 +222,28 @@ namespace utils {
         using salmon::math::LOG_0;
         using salmon::math::LOG_1;
 
+        // If we're using lightweight-alignment (FMD)
+        // and not allowing orphans.
+        bool useScaledCounts = (!sopt.useQuasi and sopt.allowOrphans == false);
+
         std::unique_ptr<std::FILE, int (*)(std::FILE *)> output(std::fopen(fname.c_str(), "w"), std::fclose);
 
         fmt::print(output.get(), "{}", headerComments);
-        fmt::print(output.get(), "# Name\tLength\tTPM\tNumReads\n");
+	fmt::print(output.get(), "Name\tLength\tEffectiveLength\tTPM\tNumReads\n");
 
         double numMappedFrags = alnLib.upperBoundHits();
 
         std::vector<Transcript>& transcripts_ = alnLib.transcripts();
         for (auto& transcript : transcripts_) {
-            transcript.projectedCounts =
-                transcript.mass(false) * numMappedFrags;
+            transcript.projectedCounts = useScaledCounts ?
+                (transcript.mass(false) * numMappedFrags) : transcript.sharedCount();
         }
 
         double tfracDenom{0.0};
         for (auto& transcript : transcripts_) {
             double refLength = sopt.noEffectiveLengthCorrection ?
                                transcript.RefLength :
-                               std::exp(transcript.getCachedEffectiveLength());
+                               std::exp(transcript.getCachedLogEffectiveLength());
             tfracDenom += (transcript.projectedCounts / numMappedFrags) / refLength;
         }
 
@@ -156,15 +252,14 @@ namespace utils {
         for (auto& transcript : transcripts_) {
             double logLength = sopt.noEffectiveLengthCorrection ?
                                std::log(transcript.RefLength) :
-                               transcript.getCachedEffectiveLength();
+                               transcript.getCachedLogEffectiveLength();
             double count = transcript.projectedCounts;
             double npm = (transcript.projectedCounts / numMappedFrags);
-            double refLength = std::exp(logLength);
-            double tfrac = (npm / refLength) / tfracDenom;
+            double effLength = std::exp(logLength);
+            double tfrac = (npm / effLength) / tfracDenom;
             double tpm = tfrac * million;
-
-            fmt::print(output.get(), "{}\t{}\t{}\t{}\n",
-                    transcript.RefName, transcript.RefLength,
+            fmt::print(output.get(), "{}\t{}\t{}\t{}\t{}\n",
+                    transcript.RefName, transcript.RefLength, effLength,
                     tpm, count);
         }
 
@@ -185,12 +280,12 @@ namespace utils {
 
 
         auto& refs = alnLib.transcripts();
-        auto numMappedReads = alnLib.numMappedReads();
+        auto numMappedFragments = alnLib.numMappedFragments();
         const double logBillion = std::log(1000000000.0);
         const double million = 1000000.0;
-        const double logNumFragments = std::log(static_cast<double>(numMappedReads));
+        const double logNumFragments = std::log(static_cast<double>(numMappedFragments));
         const double upperBoundFactor = static_cast<double>(alnLib.upperBoundHits()) /
-                                        numMappedReads;
+                                        numMappedFragments;
 
         auto clusters = alnLib.clusterForest().getClusters();
         size_t clusterID = 0;
@@ -248,16 +343,16 @@ namespace utils {
         for (auto& transcript : transcripts_) {
             double refLength = sopt.noEffectiveLengthCorrection ?
                                transcript.RefLength :
-                               std::exp(transcript.getCachedEffectiveLength());
+                               std::exp(transcript.getCachedLogEffectiveLength());
             //refLength = transcript.RefLength;
-            tfracDenom += (transcript.projectedCounts / numMappedReads) / refLength;
+            tfracDenom += (transcript.projectedCounts / numMappedFragments) / refLength;
         }
 
         // Now posterior has the transcript fraction
         for (auto& transcript : transcripts_) {
             double logLength = sopt.noEffectiveLengthCorrection ?
                                std::log(transcript.RefLength) :
-                               transcript.getCachedEffectiveLength();
+                               transcript.getCachedLogEffectiveLength();
             /*
             if (!sopt.noSeqBiasModel) {
                 double avgLogBias = transcript.getAverageSequenceBias(
@@ -271,7 +366,7 @@ namespace utils {
             //double countTotal = transcripts_[transcriptID].totalCounts;
             //double countUnique = transcripts_[transcriptID].uniqueCounts;
             double fpkm = count > 0 ? fpkmFactor * count : 0.0;
-            double npm = (transcript.projectedCounts / numMappedReads);
+            double npm = (transcript.projectedCounts / numMappedFragments);
             double refLength = std::exp(logLength);
             double tfrac = (npm / refLength) / tfracDenom;
             double tpm = tfrac * million;
@@ -293,8 +388,8 @@ namespace utils {
         using salmon::math::LOG_1;
 
         auto& refs = alnLib.transcripts();
-        auto numMappedReads = alnLib.numMappedReads();
-        const double logNumFragments = std::log(static_cast<double>(numMappedReads));
+        auto numMappedFragments = alnLib.numMappedFragments();
+        const double logNumFragments = std::log(static_cast<double>(numMappedFragments));
         auto clusters = alnLib.clusterForest().getClusters();
         size_t clusterID = 0;
         for(auto cptr : clusters) {
@@ -347,12 +442,12 @@ namespace utils {
         auto& transcripts_ = refs;
         double nFracDenom{0.0};
         for (auto& transcript : transcripts_) {
-            nFracDenom += (transcript.projectedCounts / numMappedReads);
+            nFracDenom += (transcript.projectedCounts / numMappedFragments);
         }
 
-	double invNFracTotal = 1.0 / nFracDenom;
+	    double invNFracTotal = 1.0 / nFracDenom;
         for (auto& transcript : transcripts_) {
-		double v = transcript.projectedCounts / numMappedReads;
+		double v = transcript.projectedCounts / numMappedFragments;
 		//transcript.setMass(v * invNFracTotal);
 		transcript.setMass(transcript.projectedCounts);
         }
@@ -943,37 +1038,42 @@ TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFi
     return TranscriptGeneMap(transcriptNames, geneNames, t2g);
 }
 
+
 class ExpressionRecord {
-    public:
-        ExpressionRecord(const std::string& targetIn, uint32_t lengthIn,
-                         std::vector<double>& expValsIn) :
-            target(targetIn), length(lengthIn), expVals(expValsIn) {}
-
-        ExpressionRecord( ExpressionRecord&& other ) {
-            std::swap(target, other.target);
-            length = other.length;
-            std::swap(expVals, other.expVals);
-        }
+  public:
+    ExpressionRecord(const std::string& targetIn, uint32_t lengthIn, double effLengthIn,
+	std::vector<double>& expValsIn) :
+      target(targetIn), length(lengthIn), effLength(effLengthIn), expVals(expValsIn) {}
+
+    ExpressionRecord( ExpressionRecord&& other ) {
+      std::swap(target, other.target);
+      length = other.length;
+      effLength = other.effLength;
+      std::swap(expVals, other.expVals);
+    }
 
-        ExpressionRecord(std::vector<std::string>& inputLine) {
-            if (inputLine.size() < 3) {
-                std::string err ("Any expression line must contain at least 3 tokens");
-                throw std::invalid_argument(err);
-            } else {
-                auto it = inputLine.begin();
-                target = *it; ++it;
-                length = std::stoi(*it); ++it;
-                for (; it != inputLine.end(); ++it) {
-                    expVals.push_back(std::stod(*it));
-                }
-            }
-        }
+    ExpressionRecord(std::vector<std::string>& inputLine) {
+      if (inputLine.size() < 3) {
+	std::string err ("Any expression line must contain at least 3 tokens");
+	throw std::invalid_argument(err);
+      } else {
+	auto it = inputLine.begin();
+	target = *it; ++it;
+	length = std::stoi(*it); ++it;
+	effLength = std::stod(*it); ++it;
+	for (; it != inputLine.end(); ++it) {
+	  expVals.push_back(std::stod(*it));
+	}
+      }
+    }
 
-        std::string target;
-        uint32_t length;
-        std::vector<double> expVals;
+    std::string target;
+    uint32_t length;
+    double effLength;
+    std::vector<double> expVals;
 };
 
+
 // From : http://stackoverflow.com/questions/9435385/split-a-string-using-c11
 std::vector<std::string> split(const std::string& str, int delimiter(int) = ::isspace){
     using namespace std;
@@ -990,85 +1090,307 @@ std::vector<std::string> split(const std::string& str, int delimiter(int) = ::is
     return result;
 }
 
-void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath) {
+std::vector<int32_t> samplesFromLogPMF(FragmentLengthDistribution* fld, int32_t numSamples) {
+    std::vector<double> logPMF;
+    size_t minVal;
+    size_t maxVal;
+    double logFLDMean = fld->mean();
+    fld->dumpPMF(logPMF, minVal, maxVal);
+    double sum = salmon::math::LOG_0;
+    for (auto v : logPMF) {
+        sum = salmon::math::logAdd(sum, v);
+    }
+    for (auto& v : logPMF) {
+        v -= sum;
+    }
 
-    using std::vector;
-    using std::string;
-    using std::ofstream;
-    using std::unordered_map;
-    using std::move;
-    using std::cerr;
-    using std::max;
+    // Create the non-logged pmf
+    std::vector<double> pmf(maxVal + 1, 0.0);
+    for (size_t i = minVal; i < maxVal; ++i) {
+        pmf[i] = std::exp(logPMF[i-minVal]);
+    }
 
-    std::ifstream expFile(inputPath.string());
+    // generate samples
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::discrete_distribution<int32_t> dist(pmf.begin(), pmf.end());
 
-    if (!expFile.is_open()) {
-        perror("Error reading file");
+    std::vector<int32_t> samples(pmf.size());
+    for (int32_t i = 0; i < numSamples; ++i) {
+        ++samples[dist(gen)];
     }
+    return samples;
+}
 
-    //====================== From GeneSum ====================
-    vector<string> comments;
-    unordered_map<string, vector<ExpressionRecord>> geneExps;
-    string l;
-    size_t ln{0};
 
-    while (getline(expFile, l)) {
-        if (++ln % 1000 == 0) {
-            cerr << "\r\rParsed " << ln << " expression lines";
-        }
-        auto it = find_if(l.begin(), l.end(),
-                    [](char c) -> bool {return !isspace(c);});
-        if (it != l.end()) {
-            if (*it == '#') {
-                comments.push_back(l);
-            } else {
-                vector<string> toks = split(l);
-                ExpressionRecord er(toks);
-                auto gn = tgm.geneName(er.target);
-                geneExps[gn].push_back(move(er));
-            }
-        }
+/**
+ * Computes (and returns) new effective lengths for the transcripts
+ * based on the current abundance estimates (alphas) and the current
+ * effective lengths (effLensIn).  This approach is based on the one
+ * taken in Kallisto, and seems to work well given its low computational
+ * requirements.
+ */
+template <typename AbundanceVecT, typename ReadExpT>
+Eigen::VectorXd updateEffectiveLengths(ReadExpT& readExp,
+    Eigen::VectorXd& effLensIn,
+    AbundanceVecT& alphas,
+    std::vector<double>& transcriptKmerDist) {
+  using std::vector;
+  double minAlpha = 1e-8;
+
+  // calculate read bias normalization factor -- total count in read
+  // distribution.
+  auto& readBias = readExp.readBias();
+  int32_t K = readBias.getK();
+  double readNormFactor = static_cast<double>(readBias.totalCount());
+
+  // Reset the transcript (normalized) counts
+  transcriptKmerDist.clear();
+  transcriptKmerDist.resize(constExprPow(4, K), 1.0);
+
+  // Make this const so there are no shenanigans
+  const auto& transcripts = readExp.transcripts();
+
+  // The effective lengths adjusted for bias
+  Eigen::VectorXd effLensOut(effLensIn.size());
+
+  for(size_t it=0; it < transcripts.size(); ++it) {
+
+    // First in the forward direction
+    int32_t refLen = static_cast<int32_t>(transcripts[it].RefLength);
+    int32_t elen = static_cast<int32_t>(transcripts[it].EffectiveLength);
+
+    // How much of this transcript (beginning and end) should
+    // not be considered
+    int32_t unprocessedLen = std::max(0, refLen - elen);
+
+    // Skip transcripts with trivial expression or that are too
+    // short.
+    if (alphas[it] < minAlpha or unprocessedLen <= 0) {
+      continue;
+    }
+
+    // Otherwise, proceed with the following weight.
+    double contribution = 0.5*(alphas[it]/effLensIn(it));
+
+    // From the start of the transcript up until the last valid
+    // kmer.
+    bool firstKmer{true};
+    uint32_t idx{0};
+
+    // This transcript's sequence
+    const char* tseq = transcripts[it].Sequence;
+    if (!tseq) {
+        std::cerr << "Transcript " << transcripts[it].RefName << " had no sequence available.\n";
+        std::cerr << "To enable sequence-specific bias correction, you must provide a "
+                  << "reference file with sequences for all transcripts.\n";
+        return effLensIn;
+    }
+
+    // From the start of the transcript through the effective length
+    for (int32_t i = 0; i < elen - K; ++i) {
+      if (firstKmer) {
+	idx = indexForKmer(tseq, K, Direction::FORWARD);
+	firstKmer = false;
+      } else {
+	idx = nextKmerIndex(idx, tseq[i-1+K], K, Direction::FORWARD);
+      }
+      transcriptKmerDist[idx] += contribution;
+    }
+
+    // Then in the reverse complement direction
+    firstKmer = true;
+    idx = 0;
+    // Start from the end and go until the fragment length
+    // distribution says we should stop
+    for (int32_t i = refLen - K - 1; i >= unprocessedLen; --i) {
+      if (firstKmer) {
+	idx = indexForKmer(tseq + i, K, Direction::REVERSE_COMPLEMENT);
+	firstKmer = false;
+      } else {
+	idx = nextKmerIndex(idx, tseq[i], K, Direction::REVERSE_COMPLEMENT);
+      }
+      transcriptKmerDist[idx] += contribution;
     }
-    cerr << "\ndone\n";
-    expFile.close();
+  }
+
+  // The total mass of the transcript distribution
+  double txomeNormFactor = 0.0;
+  for(auto m : transcriptKmerDist) { txomeNormFactor += m; }
+
+  // Now, compute the effective length of each transcript using
+  // the k-mer biases
+  for(size_t it = 0; it < transcripts.size(); ++it) {
+    // Starts out as 0
+    double effLength = 0.0;
+
+    // First in the forward direction, from the start of the
+    // transcript up until the last valid kmer.
+    int32_t refLen = static_cast<int32_t>(transcripts[it].RefLength);
+    int32_t elen = static_cast<int32_t>(transcripts[it].EffectiveLength);
+
+    // How much of this transcript (beginning and end) should
+    // not be considered
+    int32_t unprocessedLen = std::max(0, refLen - elen);
+
+    if (alphas[it] >= minAlpha and unprocessedLen > 0) {
+      bool firstKmer{true};
+      uint32_t idx{0};
+      // This transcript's sequence
+      const char* tseq = transcripts[it].Sequence;
+
+      for (int32_t i = 0; i < elen - K; ++i) {
+	if (firstKmer) {
+	  idx = indexForKmer(tseq, K, Direction::FORWARD);
+	  firstKmer = false;
+	} else {
+	  idx = nextKmerIndex(idx, tseq[i-1+K], K, Direction::FORWARD);
+	}
+	effLength += (readBias.counts[idx]/transcriptKmerDist[idx]);
+      }
+
+      // Then in the reverse complement direction
+      firstKmer = true;
+      idx = 0;
+      // Start from the end and go until the fragment length
+      // distribution says we should stop
+      for (int32_t i = refLen - K - 1; i >= unprocessedLen; --i) {
+	if (firstKmer) {
+	  idx = indexForKmer(tseq + i, K, Direction::REVERSE_COMPLEMENT);
+	  firstKmer = false;
+	} else {
+	  idx = nextKmerIndex(idx, tseq[i], K, Direction::REVERSE_COMPLEMENT);
+	}
+	effLength += (readBias.counts[idx]/transcriptKmerDist[idx]);
+      }
 
-    cerr << "Aggregating expressions to gene level . . .";
-    boost::filesystem::path outputFilePath(inputPath);
-    outputFilePath.replace_extension(".genes.sf");
-    ofstream outFile(outputFilePath.string());
+      effLength *= 0.5 * (txomeNormFactor / readNormFactor);
+    }
 
-    // preserve any comments in the output
-    for (auto& c : comments) {
-        outFile << c << '\n';
+    if(unprocessedLen > 0.0 and effLength > unprocessedLen) {
+      effLensOut(it) = effLength;
+    } else {
+      effLensOut(it) = effLensIn(it);
     }
+  }
 
-    for (auto& kv : geneExps) {
-        auto& gn = kv.first;
+  return effLensOut;
+}
 
-        uint32_t geneLength{kv.second.front().length};
-        vector<double> expVals(kv.second.front().expVals.size(), 0);
-        const size_t NE{expVals.size()};
 
+void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm, boost::filesystem::path& inputPath) {
+  using std::vector;
+  using std::string;
+  using std::ofstream;
+  using std::unordered_map;
+  using std::move;
+  using std::cerr;
+  using std::max;
+
+  constexpr double minTPM = std::numeric_limits<double>::denorm_min();
+  std::ifstream expFile(inputPath.string());
+
+  if (!expFile.is_open()) {
+    perror("Error reading file");
+  }
+
+  //====================== From GeneSum ====================
+  vector<string> comments;
+  unordered_map<string, vector<ExpressionRecord>> geneExps;
+  string l;
+  size_t ln{0};
+
+
+  bool headerLine{true};
+  while (getline(expFile, l)) {
+      if (++ln % 1000 == 0) {
+          cerr << "\r\rParsed " << ln << " expression lines";
+      }
+      auto it = find_if(l.begin(), l.end(),
+              [](char c) -> bool {return !isspace(c);});
+      if (it != l.end()) {
+          if (*it == '#') {
+              comments.push_back(l);
+          } else {
+              // If this isn't the first non-comment line
+              if (!headerLine) {
+                  vector<string> toks = split(l);
+                  ExpressionRecord er(toks);
+                  auto gn = tgm.geneName(er.target);
+                  geneExps[gn].push_back(move(er));
+              } else { // treat the header line as a comment
+                  comments.push_back(l);
+                  headerLine = false;
+              }
+          }
+      }
+  }
+  cerr << "\ndone\n";
+  expFile.close();
+
+  cerr << "Aggregating expressions to gene level . . .";
+  boost::filesystem::path outputFilePath(inputPath);
+  outputFilePath.replace_extension(".genes.sf");
+  ofstream outFile(outputFilePath.string());
+
+  // preserve any comments in the output
+  for (auto& c : comments) {
+    outFile << c << '\n';
+  }
+
+  for (auto& kv : geneExps) {
+    auto& gn = kv.first;
+
+    double geneLength = kv.second.front().length;
+    double geneEffLength = kv.second.front().effLength;
+    vector<double> expVals(kv.second.front().expVals.size(), 0);
+    const size_t NE{expVals.size()};
+
+    size_t tpmIdx{0};
+    double totalTPM{0.0};
+    for (auto& tranExp : kv.second) {
+      // expVals[0] = TPM
+      // expVals[1] = count
+      for (size_t i = 0; i < NE; ++i) { expVals[i] += tranExp.expVals[i]; }
+      totalTPM += expVals[tpmIdx];
+    }
+
+    // If this gene was expressed
+    if (totalTPM > minTPM) {
+        geneLength = 0.0;
+        geneEffLength = 0.0;
         for (auto& tranExp : kv.second) {
-            geneLength = max(geneLength, tranExp.length);
-            for (size_t i = 0; i < NE; ++i) { expVals[i] += tranExp.expVals[i]; }
+            double frac = tranExp.expVals[tpmIdx] / totalTPM;
+            geneLength += tranExp.length * frac;
+            geneEffLength += tranExp.effLength * frac;
         }
-
-        outFile << gn << '\t' << geneLength;
-        for (size_t i = 0; i < NE; ++i) {
-            outFile << '\t' << expVals[i];
+    } else {
+        geneLength = 0.0;
+        geneEffLength = 0.0;
+        double frac = 1.0 / kv.second.size();
+        for (auto& tranExp : kv.second) {
+            geneLength += tranExp.length * frac;
+            geneEffLength += tranExp.effLength * frac;
         }
-        outFile << '\n';
     }
 
-    outFile.close();
-    cerr << " done\n";
-    //====================== From GeneSum =====================
+    // Otherwise, if the gene wasn't expressed, the length
+    // is reported as the longest transcript length.
+
+    outFile << gn << '\t' << geneLength << '\t' << geneEffLength;
+    for (size_t i = 0; i < NE; ++i) {
+      outFile << '\t' << expVals[i];
+    }
+    outFile << '\n';
+  }
+
+  outFile.close();
+  cerr << " done\n";
+  //====================== From GeneSum =====================
 }
 
 void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
-                                boost::filesystem::path& estDir,
-                                bool haveBiasCorrectedFile) {
+                                boost::filesystem::path& estDir) {
     namespace bfs = boost::filesystem;
     std::cerr << "Computing gene-level abundance estimates\n";
     bfs::path gtfExtension(".gtf");
@@ -1099,6 +1421,7 @@ void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
     }
 
     /** Create a gene-level summary of the bias-corrected estimates as well if these exist **/
+    /*
     if (haveBiasCorrectedFile) {
         bfs::path biasCorrectEstFilePath = estDir / "quant_bias_corrected.sf";
         if (!bfs::exists(biasCorrectEstFilePath)) {
@@ -1110,11 +1433,15 @@ void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
             salmon::utils::aggregateEstimatesToGeneLevel(tranGeneMap, biasCorrectEstFilePath);
         }
     }
+    */
 }
 
 }
 }
 
+
+// === Explicit instantiations
+
 template
 void salmon::utils::writeAbundances<AlignmentLibrary<ReadPair>>(
                                               const SalmonOpts& opts,
@@ -1165,6 +1492,49 @@ template
 void salmon::utils::normalizeAlphas<AlignmentLibrary<ReadPair>>(const SalmonOpts& sopt,
                          	     AlignmentLibrary<ReadPair>& alnLib);
 
+
+template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>, ReadExperiment>(
+                ReadExperiment& readExp,
+                Eigen::VectorXd& effLensIn,
+                std::vector<tbb::atomic<double>>& alphas,
+                std::vector<double>& expectedBias
+                );
+
+template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<double>, ReadExperiment>(
+                ReadExperiment& readExp,
+                Eigen::VectorXd& effLensIn,
+                std::vector<double>& alphas,
+                std::vector<double>& expectedBias
+                );
+
+template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>, AlignmentLibrary<ReadPair>>(
+                AlignmentLibrary<ReadPair>& readExp,
+                Eigen::VectorXd& effLensIn,
+                std::vector<tbb::atomic<double>>& alphas,
+                std::vector<double>& expectedBias
+                );
+
+template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<double>, AlignmentLibrary<ReadPair>>(
+                AlignmentLibrary<ReadPair>& readExp,
+                Eigen::VectorXd& effLensIn,
+                std::vector<double>& alphas,
+                std::vector<double>& expectedBias
+                );
+
+template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<tbb::atomic<double>>, AlignmentLibrary<UnpairedRead>>(
+                AlignmentLibrary<UnpairedRead>& readExp,
+                Eigen::VectorXd& effLensIn,
+                std::vector<tbb::atomic<double>>& alphas,
+                std::vector<double>& expectedBias
+                );
+
+template Eigen::VectorXd salmon::utils::updateEffectiveLengths<std::vector<double>, AlignmentLibrary<UnpairedRead>>(
+                AlignmentLibrary<UnpairedRead>& readExp,
+                Eigen::VectorXd& effLensIn,
+                std::vector<double>& alphas,
+                std::vector<double>& expectedBias
+                );
+
 // Old / unused code
 
 /*
diff --git a/src/SequenceBiasModel.cpp b/src/SequenceBiasModel.cpp
index ac33e06..89b9583 100644
--- a/src/SequenceBiasModel.cpp
+++ b/src/SequenceBiasModel.cpp
@@ -3,7 +3,7 @@
 
 #include <boost/config.hpp> // for BOOST_LIKELY/BOOST_UNLIKELY
 
-#include "format.h"
+#include "spdlog/details/format.h"
 #include "SequenceBiasModel.hpp"
 #include "LibraryFormat.hpp"
 #include "Transcript.hpp"
diff --git a/src/TranscriptGroup.cpp b/src/TranscriptGroup.cpp
index 0870a05..576c026 100644
--- a/src/TranscriptGroup.cpp
+++ b/src/TranscriptGroup.cpp
@@ -3,17 +3,14 @@
 
 #include "TranscriptGroup.hpp"
 #include "SalmonMath.hpp"
-
+#include "xxhash.h"
 
 TranscriptGroup::TranscriptGroup() : hash(0) {}
 
 TranscriptGroup::TranscriptGroup(std::vector<uint32_t> txpsIn) : txps(txpsIn),
     valid(true) {
         size_t seed{0};
-        for (auto e : txps) {
-            boost::hash_combine(seed, e);
-        }
-        hash = seed;
+        hash = XXH64(static_cast<void*>(txps.data()), txps.size() * sizeof(uint32_t), seed);
     }
 
 TranscriptGroup::TranscriptGroup(
diff --git a/src/cokus.cpp b/src/cokus.cpp
deleted file mode 100755
index 103e7d3..0000000
--- a/src/cokus.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-// This is the Mersenne Twister random number generator MT19937, which
-// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1)
-// starting from any odd seed in 0..(2^32 - 1).  This version is a recode
-// by Shawn Cokus (Cokus at math.washington.edu) on March 8, 1998 of a version by
-// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in
-// July-August 1997).
-//
-// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha
-// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to
-// generate 300 million random numbers; after recoding: 24.0 sec. for the same
-// (i.e., 46.5% of original time), so speed is now about 12.5 million random
-// number generations per second on this machine.
-//
-// According to the URL <http://www.math.keio.ac.jp/~matumoto/emt.html>
-// (and paraphrasing a bit in places), the Mersenne Twister is ``designed
-// with consideration of the flaws of various existing generators,'' has
-// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally
-// equidistributed, and ``has passed many stringent tests, including the
-// die-hard test of G. Marsaglia and the load test of P. Hellekalek and
-// S. Wegenkittl.''  It is efficient in memory usage (typically using 2506
-// to 5012 bytes of static data, depending on data type sizes, and the code
-// is quite short as well).  It generates random numbers in batches of 624
-// at a time, so the caching and pipelining of modern systems is exploited.
-// It is also divide- and mod-free.
-//
-// This library is free software; you can redistribute it and/or modify it
-// under the terms of the GNU Library General Public License as published by
-// the Free Software Foundation (either version 2 of the License or, at your
-// option, any later version).  This library is distributed in the hope that
-// it will be useful, but WITHOUT ANY WARRANTY, without even the implied
-// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
-// the GNU Library General Public License for more details.  You should have
-// received a copy of the GNU Library General Public License along with this
-// library; if not, write to the Free Software Foundation, Inc., 59 Temple
-// Place, Suite 330, Boston, MA 02111-1307, USA.
-//
-// The code as Shawn received it included the following notice:
-//
-//   Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura.  When
-//   you use this, send an e-mail to <matumoto at math.keio.ac.jp> with
-//   an appropriate reference to your work.
-//
-// It would be nice to CC: <Cokus at math.washington.edu> when you write.
-//
-
-//#include <stdio.h>
-//#include <stdlib.h>
-
-//
-// uint32 must be an unsigned integer type capable of holding at least 32
-// bits; exactly 32 should be fastest, but 64 is better on an Alpha with
-// GCC at -O3 optimization so try your options and see whats best for you
-//
-
-typedef unsigned long uint32;
-
-#define N              (624)                 // length of state vector
-#define M              (397)                 // a period parameter
-#define K              (0x9908B0DFU)         // a magic constant
-#define hiBit(u)       ((u) & 0x80000000U)   // mask all but highest   bit of u
-#define loBit(u)       ((u) & 0x00000001U)   // mask all but lowest    bit of u
-#define loBits(u)      ((u) & 0x7FFFFFFFU)   // mask     the highest   bit of u
-#define mixBits(u, v)  (hiBit(u)|loBits(v))  // move hi bit of u to hi bit of v
-
-static uint32   state[N+1];     // state vector + 1 extra to not violate ANSI C
-static uint32   *next;          // next random value is computed from here
-static int      left = -1;      // can *next++ this many times before reloading
-
-
-void seedMT(uint32 seed)
- {
-    //
-    // We initialize state[0..(N-1)] via the generator
-    //
-    //   x_new = (69069 * x_old) mod 2^32
-    //
-    // from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuths
-    // _The Art of Computer Programming_, Volume 2, 3rd ed.
-    //
-    // Notes (SJC): I do not know what the initial state requirements
-    // of the Mersenne Twister are, but it seems this seeding generator
-    // could be better.  It achieves the maximum period for its modulus
-    // (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if
-    // x_initial can be even, you have sequences like 0, 0, 0, ...;
-    // 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31,
-    // 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below.
-    //
-    // Even if x_initial is odd, if x_initial is 1 mod 4 then
-    //
-    //   the          lowest bit of x is always 1,
-    //   the  next-to-lowest bit of x is always 0,
-    //   the 2nd-from-lowest bit of x alternates      ... 0 1 0 1 0 1 0 1 ... ,
-    //   the 3rd-from-lowest bit of x 4-cycles        ... 0 1 1 0 0 1 1 0 ... ,
-    //   the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... ,
-    //    ...
-    //
-    // and if x_initial is 3 mod 4 then
-    //
-    //   the          lowest bit of x is always 1,
-    //   the  next-to-lowest bit of x is always 1,
-    //   the 2nd-from-lowest bit of x alternates      ... 0 1 0 1 0 1 0 1 ... ,
-    //   the 3rd-from-lowest bit of x 4-cycles        ... 0 0 1 1 0 0 1 1 ... ,
-    //   the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... ,
-    //    ...
-    //
-    // The generators potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is
-    // 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth.  It
-    // also does well in the dimension 2..5 spectral tests, but it could be
-    // better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth).
-    //
-    // Note that the random number user does not see the values generated
-    // here directly since reloadMT() will always munge them first, so maybe
-    // none of all of this matters.  In fact, the seed values made here could
-    // even be extra-special desirable if the Mersenne Twister theory says
-    // so-- thats why the only change I made is to restrict to odd seeds.
-    //
-
-    register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state;
-    register int    j;
-
-    for(left=0, *s++=x, j=N; --j;
-        *s++ = (x*=69069U) & 0xFFFFFFFFU);
- }
-
-
-uint32 reloadMT(void)
- {
-    register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1;
-    register int    j;
-
-    if(left < -1)
-        seedMT(4357U);
-
-    left=N-1, next=state+1;
-
-    for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++)
-        *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
-
-    for(pM=state, j=M; --j; s0=s1, s1=*p2++)
-        *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
-
-    s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
-    s1 ^= (s1 >> 11);
-    s1 ^= (s1 <<  7) & 0x9D2C5680U;
-    s1 ^= (s1 << 15) & 0xEFC60000U;
-    return(s1 ^ (s1 >> 18));
- }
-
-
-uint32 randomMT(void)
- {
-    uint32 y;
-
-    if(--left < 0)
-        return(reloadMT());
-
-    y  = *next++;
-    y ^= (y >> 11);
-    y ^= (y <<  7) & 0x9D2C5680U;
-    y ^= (y << 15) & 0xEFC60000U;
-    y ^= (y >> 18);
-    return(y);
- }
-
-/*
- #define uint32 unsigned long
-#define SMALL_INT char
-#define SMALL_INT_CLASS mxCHAR_CLASS
-void seedMT(uint32 seed);
-uint32 randomMT(void);
-
-#include "stdio.h"
-#include "math.h"
-
-int main(void)
- {
-    int j;
-
-    // you can seed with any uint32, but the best are odds in 0..(2^32 - 1)
-
-    seedMT(4357U);
-    uint32 MAX=pow(2,32)-1;
-    // print the first 2,002 random numbers seven to a line as an example
-
-    for(j=0; j<2002; j++)
-        printf(" %10lu%s", (unsigned long) randomMT(), (j%7)==6 ? "\n" : "");
-    
-    for(j=0; j<2002; j++)
-        printf(" %f%s", ((double)randomMT()/(double)MAX), (j%7)==6 ? "\n" : "");
-
-    
-    return(1);
- }
-*/
-
-
diff --git a/src/posix.cc b/src/posix.cc
deleted file mode 100644
index 0efb5af..0000000
--- a/src/posix.cc
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- A C++ interface to POSIX functions.
-
- Copyright (c) 2014 - 2015, Victor Zverovich
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice, this
-    list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Disable bogus MSVC warnings.
-#ifndef _CRT_SECURE_NO_WARNINGS
-# define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include "posix.h"
-
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#ifndef _WIN32
-# include <unistd.h>
-#else
-# include <windows.h>
-# include <io.h>
-
-# define O_CREAT _O_CREAT
-# define O_TRUNC _O_TRUNC
-
-# ifndef S_IRUSR
-#  define S_IRUSR _S_IREAD
-# endif
-
-# ifndef S_IWUSR
-#  define S_IWUSR _S_IWRITE
-# endif
-
-# ifdef __MINGW32__
-#  define _SH_DENYNO 0x40
-#  undef fileno
-# endif
-
-#endif  // _WIN32
-
-namespace {
-#ifdef _WIN32
-// Return type of read and write functions.
-typedef int RWResult;
-
-// On Windows the count argument to read and write is unsigned, so convert
-// it from size_t preventing integer overflow.
-inline unsigned convert_rwcount(std::size_t count) {
-  return count <= UINT_MAX ? static_cast<unsigned>(count) : UINT_MAX;
-}
-#else
-// Return type of read and write functions.
-typedef ssize_t RWResult;
-
-inline std::size_t convert_rwcount(std::size_t count) { return count; }
-#endif
-}
-
-fmt::BufferedFile::~BufferedFile() FMT_NOEXCEPT {
-  if (file_ && FMT_SYSTEM(fclose(file_)) != 0)
-    fmt::report_system_error(errno, "cannot close file");
-}
-
-fmt::BufferedFile::BufferedFile(fmt::StringRef filename, fmt::StringRef mode) {
-  FMT_RETRY_VAL(file_, FMT_SYSTEM(fopen(filename.c_str(), mode.c_str())), 0);
-  if (!file_)
-    throw SystemError(errno, "cannot open file {}", filename);
-}
-
-void fmt::BufferedFile::close() {
-  if (!file_)
-    return;
-  int result = FMT_SYSTEM(fclose(file_));
-  file_ = 0;
-  if (result != 0)
-    throw SystemError(errno, "cannot close file");
-}
-
-// A macro used to prevent expansion of fileno on broken versions of MinGW.
-#define FMT_ARGS
-
-int fmt::BufferedFile::fileno() const {
-  int fd = FMT_POSIX_CALL(fileno FMT_ARGS(file_));
-  if (fd == -1)
-    throw SystemError(errno, "cannot get file descriptor");
-  return fd;
-}
-
-fmt::File::File(fmt::StringRef path, int oflag) {
-  int mode = S_IRUSR | S_IWUSR;
-#if defined(_WIN32) && !defined(__MINGW32__)
-  fd_ = -1;
-  FMT_POSIX_CALL(sopen_s(&fd_, path.c_str(), oflag, _SH_DENYNO, mode));
-#else
-  FMT_RETRY(fd_, FMT_POSIX_CALL(open(path.c_str(), oflag, mode)));
-#endif
-  if (fd_ == -1)
-    throw SystemError(errno, "cannot open file {}", path);
-}
-
-fmt::File::~File() FMT_NOEXCEPT {
-  // Don't retry close in case of EINTR!
-  // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
-  if (fd_ != -1 && FMT_POSIX_CALL(close(fd_)) != 0)
-    fmt::report_system_error(errno, "cannot close file");
-}
-
-void fmt::File::close() {
-  if (fd_ == -1)
-    return;
-  // Don't retry close in case of EINTR!
-  // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
-  int result = FMT_POSIX_CALL(close(fd_));
-  fd_ = -1;
-  if (result != 0)
-    throw SystemError(errno, "cannot close file");
-}
-
-fmt::LongLong fmt::File::size() const {
-#ifdef _WIN32
-  // Use GetFileSize instead of GetFileSizeEx for the case when _WIN32_WINNT
-  // is less than 0x0500 as is the case with some default MinGW builds.
-  // Both functions support large file sizes.
-  DWORD size_upper = 0;
-  HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd_));
-  DWORD size_lower = FMT_SYSTEM(GetFileSize(handle, &size_upper));
-  if (size_lower == INVALID_FILE_SIZE) {
-    DWORD error = GetLastError();
-    if (error != NO_ERROR)
-      throw WindowsError(GetLastError(), "cannot get file size");
-  }
-  fmt::ULongLong size = size_upper;
-  return (size << sizeof(DWORD) * CHAR_BIT) | size_lower;
-#else
-  typedef struct stat Stat;
-  Stat file_stat = Stat();
-  if (FMT_POSIX_CALL(fstat(fd_, &file_stat)) == -1)
-    throw SystemError(errno, "cannot get file attributes");
-  FMT_STATIC_ASSERT(sizeof(fmt::LongLong) >= sizeof(file_stat.st_size),
-      "return type of File::size is not large enough");
-  return file_stat.st_size;
-#endif
-}
-
-std::size_t fmt::File::read(void *buffer, std::size_t count) {
-  RWResult result = 0;
-  FMT_RETRY(result, FMT_POSIX_CALL(read(fd_, buffer, convert_rwcount(count))));
-  if (result < 0)
-    throw SystemError(errno, "cannot read from file");
-  return result;
-}
-
-std::size_t fmt::File::write(const void *buffer, std::size_t count) {
-  RWResult result = 0;
-  FMT_RETRY(result, FMT_POSIX_CALL(write(fd_, buffer, convert_rwcount(count))));
-  if (result < 0)
-    throw SystemError(errno, "cannot write to file");
-  return result;
-}
-
-fmt::File fmt::File::dup(int fd) {
-  // Don't retry as dup doesn't return EINTR.
-  // http://pubs.opengroup.org/onlinepubs/009695399/functions/dup.html
-  int new_fd = FMT_POSIX_CALL(dup(fd));
-  if (new_fd == -1)
-    throw SystemError(errno, "cannot duplicate file descriptor {}", fd);
-  return File(new_fd);
-}
-
-void fmt::File::dup2(int fd) {
-  int result = 0;
-  FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
-  if (result == -1) {
-    throw SystemError(errno,
-      "cannot duplicate file descriptor {} to {}", fd_, fd);
-  }
-}
-
-void fmt::File::dup2(int fd, ErrorCode &ec) FMT_NOEXCEPT {
-  int result = 0;
-  FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
-  if (result == -1)
-    ec = ErrorCode(errno);
-}
-
-void fmt::File::pipe(File &read_end, File &write_end) {
-  // Close the descriptors first to make sure that assignments don't throw
-  // and there are no leaks.
-  read_end.close();
-  write_end.close();
-  int fds[2] = {};
-#ifdef _WIN32
-  // Make the default pipe capacity same as on Linux 2.6.11+.
-  enum { DEFAULT_CAPACITY = 65536 };
-  int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY));
-#else
-  // Don't retry as the pipe function doesn't return EINTR.
-  // http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html
-  int result = FMT_POSIX_CALL(pipe(fds));
-#endif
-  if (result != 0)
-    throw SystemError(errno, "cannot create pipe");
-  // The following assignments don't throw because read_fd and write_fd
-  // are closed.
-  read_end = File(fds[0]);
-  write_end = File(fds[1]);
-}
-
-fmt::BufferedFile fmt::File::fdopen(const char *mode) {
-  // Don't retry as fdopen doesn't return EINTR.
-  FILE *f = FMT_POSIX_CALL(fdopen(fd_, mode));
-  if (!f)
-    throw SystemError(errno, "cannot associate stream with file descriptor");
-  BufferedFile file(f);
-  fd_ = -1;
-  return file;
-}
-
-long fmt::getpagesize() {
-#ifdef _WIN32
-  SYSTEM_INFO si;
-  GetSystemInfo(&si);
-  return si.dwPageSize;
-#else
-  long size = FMT_POSIX_CALL(sysconf(_SC_PAGESIZE));
-  if (size < 0)
-    throw SystemError(errno, "cannot get memory page size");
-  return size;
-#endif
-}
diff --git a/src/xxhash.c b/src/xxhash.c
new file mode 100644
index 0000000..e6fb8f1
--- /dev/null
+++ b/src/xxhash.c
@@ -0,0 +1,915 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2015, Yann Collet
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/**************************************
+*  Tuning parameters
+**************************************/
+/* Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+ * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
+ * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
+ * You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
+ */
+#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USE_UNALIGNED_ACCESS 1
+#endif
+
+/* XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/* XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1.
+ * It will improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+
+/**************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/**************************************
+*  Includes & Memory related functions
+***************************************/
+#include "xxhash.h"
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+
+/**************************************
+*  Basic Types
+***************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+  typedef uint8_t  BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char      BYTE;
+  typedef unsigned short     U16;
+  typedef unsigned int       U32;
+  typedef   signed int       S32;
+  typedef unsigned long long U64;
+#endif
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val32;
+    memcpy(&val32, memPtr, 4);
+    return val32;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val64;
+    memcpy(&val64, memPtr, 8);
+    return val64;
+}
+
+
+
+/******************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/***************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+#ifndef XXH_CPU_LITTLE_ENDIAN   /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example using a compiler switch */
+static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
+#endif
+
+
+/*****************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+/***************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/***************************************
+*  Constants
+***************************************/
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned XXH32 (const void* input, size_t len, unsigned seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 mem32[4];   /* defined as U32 for alignment */
+    U32 memsize;
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U64 mem64[4];   /* defined as U64 for alignment */
+    U32 memsize;
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   /* fill in tmp buffer */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = state->mem32;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32(p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32(p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32(p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32(p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+U32 XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   /* fill in tmp buffer */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = state->mem64;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64(p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64(p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64(p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64(p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64(p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
diff --git a/tests/KmerHistTests.cpp b/tests/KmerHistTests.cpp
new file mode 100644
index 0000000..02de5f9
--- /dev/null
+++ b/tests/KmerHistTests.cpp
@@ -0,0 +1,115 @@
+#include "UtilityFunctions.hpp"
+
+// from http://stackoverflow.com/questions/2380962/generate-all-combinations-of-arbitrary-alphabet-up-to-arbitrary-length
+std::vector<std::string> getAllWords(int length) {
+
+    int N_LETTERS = 4;
+    char alphabet[] = {'A', 'C', 'G', 'T'};
+  std::vector<int> index(length, 0);
+  std::vector<std::string> words;
+
+  while(true)
+  {
+    std::string word(length, ' ');
+    for (int i = 0; i < length; ++i)
+      word[i] = alphabet[index[i]];
+    words.push_back(word);
+
+    for (int i = length-1; ; --i)
+    {
+      if (i < 0) return words;
+      index[i]++;
+      if (index[i] == N_LETTERS)
+        index[i] = 0;
+      else
+        break;
+    }
+  }
+}
+
+#include <atomic>
+
+SCENARIO("Kmers encode and decode correctly") {
+    using salmon::utils::Direction;
+    GIVEN("All 6-mers") {
+        std::vector<std::string> kmers = getAllWords(6);
+        //KmerDist<6, std::atomic<uint32_t>> kh;
+        for (auto& k : kmers) {
+            auto i = indexForKmer(k.c_str(), 6, Direction::FORWARD);
+            auto kp = kmerForIndex(i, 6);
+            WHEN("kmer is [" + k + "]") {
+                THEN("decodes as [" + kp + "]") {
+                    REQUIRE(k == kp);
+                }
+            }
+        }
+    }
+}
+
+
+SCENARIO("The next k-mer index function works correctly") {
+    using salmon::utils::Direction;
+    const uint32_t K = 6;
+    std::string s = "ATTCTCCACATAGTTGTCATCGAACCAGTACCCCGTAAGCGCCAACATAT";
+
+    GIVEN("The string " + s) {
+        auto idx = indexForKmer(s.c_str(), 6, Direction::FORWARD);
+        std::string k = s.substr(0, 6);
+        WHEN("kmer is [" + k + "]") {
+            auto kp = kmerForIndex(idx, 6);
+            THEN("decodes as [" + kp + "]") {
+                REQUIRE(k == kp);
+            }
+        }
+        for (size_t i = 0; i < s.size() - K; ++i) {
+            idx = nextKmerIndex(idx, s[i+K], 6, Direction::FORWARD);
+            k = s.substr(i+1, 6);
+            WHEN("kmer is [" + k + "]") {
+                auto kp = kmerForIndex(idx, 6);
+                THEN("decodes as [" + kp + "]") {
+                    REQUIRE(k == kp);
+                }
+            }
+        }
+    }
+
+    auto rc = [](std::string s) -> std::string {
+        std::string rc;
+        for (int32_t i = s.size() - 1; i >= 0; --i) {
+            switch(s[i]) {
+                case 'A': rc += 'T'; break;
+                case 'C': rc += 'G'; break;
+                case 'G': rc += 'C'; break;
+                case 'T': rc += 'A'; break;
+            }
+        }
+        return rc;
+    };
+
+    auto rcs = rc(s);
+
+    GIVEN("The string " + s + " in the reverse complement direction") {
+        auto idx = indexForKmer(s.c_str() + s.size() - K - 1, 6,
+                                Direction::REVERSE_COMPLEMENT);
+        std::string k = rc(s.substr(s.size() - K - 1, 6));
+        WHEN("kmer is [" + k + "]") {
+            auto kp = kmerForIndex(idx, 6);
+            THEN("decodes as [" + kp + "]") {
+                REQUIRE(k == kp);
+            }
+        }
+        for (int32_t i = s.size() - K - 2; i >= 0; --i) {
+            idx = nextKmerIndex(idx, s[i], 6, Direction::REVERSE_COMPLEMENT);
+            k = rc(s.substr(i, 6));
+            WHEN("kmer is [" + k + "]") {
+                auto kp = kmerForIndex(idx, 6);
+                THEN("decodes as [" + kp + "]") {
+                    REQUIRE(k == kp);
+                }
+            }
+        }
+    }
+
+}
+
+
diff --git a/tests/LibraryTypeTests.cpp b/tests/LibraryTypeTests.cpp
new file mode 100644
index 0000000..df2e820
--- /dev/null
+++ b/tests/LibraryTypeTests.cpp
@@ -0,0 +1,166 @@
+SCENARIO("Library types are encoded/decoded properly") {
+
+    GIVEN("A collection of library formats") {
+        std::unordered_map<std::string, LibraryFormat> fm =
+        {{"U", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U)},
+            {"SF", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::S)},
+            {"SR", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::A)},
+            {"IU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U)},
+            {"ISF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::S)},
+            {"ISR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::A)},
+            {"OU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::U)},
+            {"OSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::S)},
+            {"OSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::A)},
+            {"MU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::U)},
+            {"MSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::S)},
+            {"MSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::A)}};
+
+        for (auto& kv : fm) {
+            WHEN("type is " + kv.first) {
+                uint8_t id = kv.second.formatID();
+                THEN("decodes as " + kv.first) {
+                    REQUIRE(kv.second == LibraryFormat::formatFromID(id));
+                }
+            }
+        }
+    }
+}
+
+
+SCENARIO("Paired-end library types have proper compatibility") {
+
+    using salmon::utils::compatibleHit;
+    GIVEN("A series of observed [paired-end] library formats") {
+        std::unordered_map<std::string, LibraryFormat> refFM =
+        {{"U", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U)},
+            {"SF", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::S)},
+            {"SR", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::A)},
+            {"IU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U)},
+            {"ISF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::S)},
+            {"ISR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::A)},
+            {"OU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::U)},
+            {"OSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::S)},
+            {"OSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::A)},
+            {"MU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::U)},
+            {"MSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::S)},
+            {"MSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::A)}};
+
+        std::vector<std::string> testFormats{"ISF", "ISR",
+                                             "OSF", "OSR", "MSF", "MSR"};
+
+        for (auto kv : refFM) {
+            auto& expectedName = kv.first;
+            LibraryFormat expected(kv.second);
+            for (auto ok : testFormats) {
+                auto& observedName = ok;
+                auto it = refFM.find(observedName);
+                LibraryFormat observed(it->second);
+                WHEN("expected is " + expectedName + " and observed is " + observedName) {
+                    THEN("compatibilty should be") {
+                        if (expectedName == observedName) {
+                            REQUIRE(compatibleHit(expected, observed));
+                        } else if (expectedName == "IU" and
+                                (observedName == "ISF" or observedName == "ISR")) {
+                            REQUIRE(compatibleHit(expected, observed));
+                        } else if (expectedName == "OU" and
+                                (observedName == "OSF" or observedName == "OSR")) {
+                            REQUIRE(compatibleHit(expected, observed));
+                        } else if (expectedName == "MU" and
+                                (observedName == "MSF" or observedName == "MSR")) {
+                            REQUIRE(compatibleHit(expected, observed));
+                        } else {
+                            REQUIRE(!compatibleHit(expected, observed));
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+
+SCENARIO("Single-end library types have proper compatibility") {
+
+    using salmon::utils::compatibleHit;
+    using rapmap::utils::MateStatus;
+
+    GIVEN("A series of observed [single-end] library formats") {
+        std::unordered_map<std::string, LibraryFormat> refFM =
+        {{"U", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U)},
+            {"SF", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::S)},
+            {"SR", LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::A)},
+            {"IU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U)},
+            {"ISF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::S)},
+            {"ISR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::A)},
+            {"OU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::U)},
+            {"OSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::S)},
+            {"OSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::AWAY, ReadStrandedness::A)},
+            {"MU", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::U)},
+            {"MSF", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::S)},
+            {"MSR", LibraryFormat(ReadType::PAIRED_END, ReadOrientation::SAME, ReadStrandedness::A)}};
+
+        std::vector<std::string> testFormats{"SF", "SR"};
+
+        int32_t start{0};
+        std::vector<bool> isFwd({true, false});
+        std::vector<MateStatus> ms({MateStatus::PAIRED_END_LEFT,
+                                    MateStatus::PAIRED_END_RIGHT,
+                                    //MateStatus::PAIRED_END_PAIRED,
+                                    MateStatus::SINGLE_END});
+
+        for (auto kv : refFM) {
+            auto& expectedName = kv.first;
+            LibraryFormat expected(kv.second);
+
+            for (auto fwd : isFwd) {
+                for (auto s : ms) {
+                    std::string observedName = ((fwd) ? "SF" : "SR");
+                    if (s == MateStatus::PAIRED_END_LEFT) {
+                        observedName += " left orphan";
+                    } else if (s == MateStatus::PAIRED_END_RIGHT) {
+                        observedName += " right orphan";
+                    } else if (s == MateStatus::SINGLE_END) {
+                        observedName += " single end";
+                    } else {
+                        observedName += " should not happen!";
+                    }
+
+                    WHEN("expected is " + expectedName + " and observed is " + observedName) {
+                        THEN("compatibility should be") {
+                            /*
+                            if ((expected.type == ReadType::PAIRED_END) and
+                                (s == MateStatus::SINGLE_END)) {
+                                REQUIRE(!compatibleHit(expected, start, fwd, s));
+                            } else
+                            */
+                            if (expected.strandedness == ReadStrandedness::U) {
+                                REQUIRE(compatibleHit(expected, start, fwd, s));
+                            } else if ((expected.strandedness == ReadStrandedness::S and
+                                        expected.orientation != ReadOrientation::SAME) and
+                                    ((fwd and s == MateStatus::SINGLE_END) or
+                                     (fwd and s == MateStatus::PAIRED_END_LEFT) or
+                                     (!fwd and s == MateStatus::PAIRED_END_RIGHT))) {
+                                REQUIRE(compatibleHit(expected, start, fwd, s));
+                            } else if ((expected.strandedness == ReadStrandedness::A and
+                                        expected.orientation != ReadOrientation::SAME) and
+                                    ((!fwd and s == MateStatus::SINGLE_END) or
+                                     (!fwd and s == MateStatus::PAIRED_END_LEFT) or
+                                     (fwd and s == MateStatus::PAIRED_END_RIGHT))) {
+                                REQUIRE(compatibleHit(expected, start, fwd, s));
+                            } else if (expected.orientation  == ReadOrientation::SAME and
+                                      ((expected.strandedness == ReadStrandedness::S and fwd) or
+                                       (expected.strandedness == ReadStrandedness::A and !fwd))) {
+                                REQUIRE(compatibleHit(expected, start, fwd, s));
+                            } else {
+                                REQUIRE(!compatibleHit(expected, start, fwd, s));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+
diff --git a/tests/UnitTests.cpp b/tests/UnitTests.cpp
new file mode 100644
index 0000000..9676f98
--- /dev/null
+++ b/tests/UnitTests.cpp
@@ -0,0 +1,11 @@
+#define CATCH_CONFIG_MAIN  // This tells Catch to provide a main() - only do this in one cpp file
+#include <unordered_map>
+#include <iostream>
+#include "catch.hpp"
+#include "LibraryFormat.hpp"
+#include "SalmonUtils.hpp"
+
+bool verbose=false; // Apparently, we *need* this (OSX)
+
+#include "LibraryTypeTests.cpp"
+#include "KmerHistTests.cpp"
diff --git a/tests/catch.hpp b/tests/catch.hpp
new file mode 100644
index 0000000..de61226
--- /dev/null
+++ b/tests/catch.hpp
@@ -0,0 +1,9416 @@
+/*
+ *  Catch v1.2.1
+ *  Generated: 2015-06-30 18:23:27.961086
+ *  ----------------------------------------------------------
+ *  This file has been merged from multiple headers. Please don't edit it directly
+ *  Copyright (c) 2012 Two Blue Cubes Ltd. All rights reserved.
+ *
+ *  Distributed under the Boost Software License, Version 1.0. (See accompanying
+ *  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+#ifndef TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED
+#define TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED
+
+#define TWOBLUECUBES_CATCH_HPP_INCLUDED
+
+#ifdef __clang__
+#    pragma clang system_header
+#elif defined __GNUC__
+#    pragma GCC system_header
+#endif
+
+// #included from: internal/catch_suppress_warnings.h
+
+#define TWOBLUECUBES_CATCH_SUPPRESS_WARNINGS_H_INCLUDED
+
+#ifdef __clang__
+#   ifdef __ICC // icpc defines the __clang__ macro
+#       pragma warning(push)
+#       pragma warning(disable: 161 1682)
+#   else // __ICC
+#       pragma clang diagnostic ignored "-Wglobal-constructors"
+#       pragma clang diagnostic ignored "-Wvariadic-macros"
+#       pragma clang diagnostic ignored "-Wc99-extensions"
+#       pragma clang diagnostic ignored "-Wunused-variable"
+#       pragma clang diagnostic push
+#       pragma clang diagnostic ignored "-Wpadded"
+#       pragma clang diagnostic ignored "-Wc++98-compat"
+#       pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#       pragma clang diagnostic ignored "-Wswitch-enum"
+#    endif
+#elif defined __GNUC__
+#    pragma GCC diagnostic ignored "-Wvariadic-macros"
+#    pragma GCC diagnostic ignored "-Wunused-variable"
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wpadded"
+#endif
+
+#if defined(CATCH_CONFIG_MAIN) || defined(CATCH_CONFIG_RUNNER)
+#  define CATCH_IMPL
+#endif
+
+#ifdef CATCH_IMPL
+#  ifndef CLARA_CONFIG_MAIN
+#    define CLARA_CONFIG_MAIN_NOT_DEFINED
+#    define CLARA_CONFIG_MAIN
+#  endif
+#endif
+
+// #included from: internal/catch_notimplemented_exception.h
+#define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_H_INCLUDED
+
+// #included from: catch_common.h
+#define TWOBLUECUBES_CATCH_COMMON_H_INCLUDED
+
+#define INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) name##line
+#define INTERNAL_CATCH_UNIQUE_NAME_LINE( name, line ) INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line )
+#define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __LINE__ )
+
+#define INTERNAL_CATCH_STRINGIFY2( expr ) #expr
+#define INTERNAL_CATCH_STRINGIFY( expr ) INTERNAL_CATCH_STRINGIFY2( expr )
+
+#include <sstream>
+#include <stdexcept>
+#include <algorithm>
+
+// #included from: catch_compiler_capabilities.h
+#define TWOBLUECUBES_CATCH_COMPILER_CAPABILITIES_HPP_INCLUDED
+
+// Detect a number of compiler features - mostly C++11/14 conformance - by compiler
+// The following features are defined:
+//
+// CATCH_CONFIG_CPP11_NULLPTR : is nullptr supported?
+// CATCH_CONFIG_CPP11_NOEXCEPT : is noexcept supported?
+// CATCH_CONFIG_CPP11_GENERATED_METHODS : The delete and default keywords for compiler generated methods
+// CATCH_CONFIG_CPP11_IS_ENUM : std::is_enum is supported?
+// CATCH_CONFIG_CPP11_TUPLE : std::tuple is supported
+
+// CATCH_CONFIG_CPP11_OR_GREATER : Is C++11 supported?
+
+// CATCH_CONFIG_VARIADIC_MACROS : are variadic macros supported?
+
+// In general each macro has a _NO_<feature name> form
+// (e.g. CATCH_CONFIG_CPP11_NO_NULLPTR) which disables the feature.
+// Many features, at point of detection, define an _INTERNAL_ macro, so they
+// can be combined, en-mass, with the _NO_ forms later.
+
+// All the C++11 features can be disabled with CATCH_CONFIG_NO_CPP11
+
+#ifdef __clang__
+
+#  if __has_feature(cxx_nullptr)
+#    define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR
+#  endif
+
+#  if __has_feature(cxx_noexcept)
+#    define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT
+#  endif
+
+#endif // __clang__
+
+////////////////////////////////////////////////////////////////////////////////
+// Borland
+#ifdef __BORLANDC__
+
+#endif // __BORLANDC__
+
+////////////////////////////////////////////////////////////////////////////////
+// EDG
+#ifdef __EDG_VERSION__
+
+#endif // __EDG_VERSION__
+
+////////////////////////////////////////////////////////////////////////////////
+// Digital Mars
+#ifdef __DMC__
+
+#endif // __DMC__
+
+////////////////////////////////////////////////////////////////////////////////
+// GCC
+#ifdef __GNUC__
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6 && defined(__GXX_EXPERIMENTAL_CXX0X__) )
+#   define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR
+#endif
+
+#endif // __GNUC__
+
+////////////////////////////////////////////////////////////////////////////////
+// Visual C++
+#ifdef _MSC_VER
+
+#if (_MSC_VER >= 1600)
+#   define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR
+#endif
+
+#if (_MSC_VER >= 1900 ) // (VC++ 13 (VS2015))
+#define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT
+#define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS
+#endif
+
+#endif // _MSC_VER
+
+// Use variadic macros if the compiler supports them
+#if ( defined _MSC_VER && _MSC_VER > 1400 && !defined __EDGE__) || \
+    ( defined __WAVE__ && __WAVE_HAS_VARIADICS ) || \
+    ( defined __GNUC__ && __GNUC__ >= 3 ) || \
+    ( !defined __cplusplus && __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L )
+
+#define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS
+
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// C++ language feature support
+
+// catch all support for C++11
+#if (__cplusplus >= 201103L)
+
+#  define CATCH_CPP11_OR_GREATER
+
+#  if !defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR)
+#    define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR
+#  endif
+
+#  ifndef CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT
+#    define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT
+#  endif
+
+#  ifndef CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS
+#    define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS
+#  endif
+
+#  ifndef CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM
+#    define CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM
+#  endif
+
+#  ifndef CATCH_INTERNAL_CONFIG_CPP11_TUPLE
+#    define CATCH_INTERNAL_CONFIG_CPP11_TUPLE
+#  endif
+
+#  ifndef CATCH_INTERNAL_CONFIG_VARIADIC_MACROS
+#    define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS
+#  endif
+
+#endif // __cplusplus >= 201103L
+
+// Now set the actual defines based on the above + anything the user has configured
+#if defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NO_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_NO_CPP11)
+#   define CATCH_CONFIG_CPP11_NULLPTR
+#endif
+#if defined(CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NO_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_NO_CPP11)
+#   define CATCH_CONFIG_CPP11_NOEXCEPT
+#endif
+#if defined(CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_NO_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_NO_CPP11)
+#   define CATCH_CONFIG_CPP11_GENERATED_METHODS
+#endif
+#if defined(CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_NO_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_NO_CPP11)
+#   define CATCH_CONFIG_CPP11_IS_ENUM
+#endif
+#if defined(CATCH_INTERNAL_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_CPP11_NO_TUPLE) && !defined(CATCH_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_NO_CPP11)
+#   define CATCH_CONFIG_CPP11_TUPLE
+#endif
+#if defined(CATCH_INTERNAL_CONFIG_VARIADIC_MACROS) && !defined(CATCH_CONFIG_NO_VARIADIC_MACROS) && !defined(CATCH_CONFIG_VARIADIC_MACROS)
+#define CATCH_CONFIG_VARIADIC_MACROS
+#endif
+
+// noexcept support:
+#if defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_NOEXCEPT)
+#  define CATCH_NOEXCEPT noexcept
+#  define CATCH_NOEXCEPT_IS(x) noexcept(x)
+#else
+#  define CATCH_NOEXCEPT throw()
+#  define CATCH_NOEXCEPT_IS(x)
+#endif
+
+namespace Catch {
+
+    class NonCopyable {
+#ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        NonCopyable( NonCopyable const& )              = delete;
+        NonCopyable( NonCopyable && )                  = delete;
+        NonCopyable& operator = ( NonCopyable const& ) = delete;
+        NonCopyable& operator = ( NonCopyable && )     = delete;
+#else
+        NonCopyable( NonCopyable const& info );
+        NonCopyable& operator = ( NonCopyable const& );
+#endif
+
+    protected:
+        NonCopyable() {}
+        virtual ~NonCopyable();
+    };
+
+    class SafeBool {
+    public:
+        typedef void (SafeBool::*type)() const;
+
+        static type makeSafe( bool value ) {
+            return value ? &SafeBool::trueValue : 0;
+        }
+    private:
+        void trueValue() const {}
+    };
+
+    template<typename ContainerT>
+    inline void deleteAll( ContainerT& container ) {
+        typename ContainerT::const_iterator it = container.begin();
+        typename ContainerT::const_iterator itEnd = container.end();
+        for(; it != itEnd; ++it )
+            delete *it;
+    }
+    template<typename AssociativeContainerT>
+    inline void deleteAllValues( AssociativeContainerT& container ) {
+        typename AssociativeContainerT::const_iterator it = container.begin();
+        typename AssociativeContainerT::const_iterator itEnd = container.end();
+        for(; it != itEnd; ++it )
+            delete it->second;
+    }
+
+    bool startsWith( std::string const& s, std::string const& prefix );
+    bool endsWith( std::string const& s, std::string const& suffix );
+    bool contains( std::string const& s, std::string const& infix );
+    void toLowerInPlace( std::string& s );
+    std::string toLower( std::string const& s );
+    std::string trim( std::string const& str );
+    bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis );
+
+    struct pluralise {
+        pluralise( std::size_t count, std::string const& label );
+
+        friend std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser );
+
+        std::size_t m_count;
+        std::string m_label;
+    };
+
+    struct SourceLineInfo {
+
+        SourceLineInfo();
+        SourceLineInfo( char const* _file, std::size_t _line );
+        SourceLineInfo( SourceLineInfo const& other );
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        SourceLineInfo( SourceLineInfo && )                  = default;
+        SourceLineInfo& operator = ( SourceLineInfo const& ) = default;
+        SourceLineInfo& operator = ( SourceLineInfo && )     = default;
+#  endif
+        bool empty() const;
+        bool operator == ( SourceLineInfo const& other ) const;
+        bool operator < ( SourceLineInfo const& other ) const;
+
+        std::string file;
+        std::size_t line;
+    };
+
+    std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info );
+
+    // This is just here to avoid compiler warnings with macro constants and boolean literals
+    inline bool isTrue( bool value ){ return value; }
+    inline bool alwaysTrue() { return true; }
+    inline bool alwaysFalse() { return false; }
+
+    void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo );
+
+    // Use this in variadic streaming macros to allow
+    //    >> +StreamEndStop
+    // as well as
+    //    >> stuff +StreamEndStop
+    struct StreamEndStop {
+        std::string operator+() {
+            return std::string();
+        }
+    };
+    template<typename T>
+    T const& operator + ( T const& value, StreamEndStop ) {
+        return value;
+    }
+}
+
+#define CATCH_INTERNAL_LINEINFO ::Catch::SourceLineInfo( __FILE__, static_cast<std::size_t>( __LINE__ ) )
+#define CATCH_INTERNAL_ERROR( msg ) ::Catch::throwLogicError( msg, CATCH_INTERNAL_LINEINFO );
+
+#include <ostream>
+
+namespace Catch {
+
+    class NotImplementedException : public std::exception
+    {
+    public:
+        NotImplementedException( SourceLineInfo const& lineInfo );
+        NotImplementedException( NotImplementedException const& ) {}
+
+        virtual ~NotImplementedException() CATCH_NOEXCEPT {}
+
+        virtual const char* what() const CATCH_NOEXCEPT;
+
+    private:
+        std::string m_what;
+        SourceLineInfo m_lineInfo;
+    };
+
+} // end namespace Catch
+
+///////////////////////////////////////////////////////////////////////////////
+#define CATCH_NOT_IMPLEMENTED throw Catch::NotImplementedException( CATCH_INTERNAL_LINEINFO )
+
+// #included from: internal/catch_context.h
+#define TWOBLUECUBES_CATCH_CONTEXT_H_INCLUDED
+
+// #included from: catch_interfaces_generators.h
+#define TWOBLUECUBES_CATCH_INTERFACES_GENERATORS_H_INCLUDED
+
+#include <string>
+
+namespace Catch {
+
+    struct IGeneratorInfo {
+        virtual ~IGeneratorInfo();
+        virtual bool moveNext() = 0;
+        virtual std::size_t getCurrentIndex() const = 0;
+    };
+
+    struct IGeneratorsForTest {
+        virtual ~IGeneratorsForTest();
+
+        virtual IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) = 0;
+        virtual bool moveNext() = 0;
+    };
+
+    IGeneratorsForTest* createGeneratorsForTest();
+
+} // end namespace Catch
+
+// #included from: catch_ptr.hpp
+#define TWOBLUECUBES_CATCH_PTR_HPP_INCLUDED
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+namespace Catch {
+
+    // An intrusive reference counting smart pointer.
+    // T must implement addRef() and release() methods
+    // typically implementing the IShared interface
+    template<typename T>
+    class Ptr {
+    public:
+        Ptr() : m_p( NULL ){}
+        Ptr( T* p ) : m_p( p ){
+            if( m_p )
+                m_p->addRef();
+        }
+        Ptr( Ptr const& other ) : m_p( other.m_p ){
+            if( m_p )
+                m_p->addRef();
+        }
+        ~Ptr(){
+            if( m_p )
+                m_p->release();
+        }
+        void reset() {
+            if( m_p )
+                m_p->release();
+            m_p = NULL;
+        }
+        Ptr& operator = ( T* p ){
+            Ptr temp( p );
+            swap( temp );
+            return *this;
+        }
+        Ptr& operator = ( Ptr const& other ){
+            Ptr temp( other );
+            swap( temp );
+            return *this;
+        }
+        void swap( Ptr& other ) { std::swap( m_p, other.m_p ); }
+        T* get() { return m_p; }
+        const T* get() const{ return m_p; }
+        T& operator*() const { return *m_p; }
+        T* operator->() const { return m_p; }
+        bool operator !() const { return m_p == NULL; }
+        operator SafeBool::type() const { return SafeBool::makeSafe( m_p != NULL ); }
+
+    private:
+        T* m_p;
+    };
+
+    struct IShared : NonCopyable {
+        virtual ~IShared();
+        virtual void addRef() const = 0;
+        virtual void release() const = 0;
+    };
+
+    template<typename T = IShared>
+    struct SharedImpl : T {
+
+        SharedImpl() : m_rc( 0 ){}
+
+        virtual void addRef() const {
+            ++m_rc;
+        }
+        virtual void release() const {
+            if( --m_rc == 0 )
+                delete this;
+        }
+
+        mutable unsigned int m_rc;
+    };
+
+} // end namespace Catch
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#include <memory>
+#include <vector>
+#include <stdlib.h>
+
+namespace Catch {
+
+    class TestCase;
+    class Stream;
+    struct IResultCapture;
+    struct IRunner;
+    struct IGeneratorsForTest;
+    struct IConfig;
+
+    struct IContext
+    {
+        virtual ~IContext();
+
+        virtual IResultCapture* getResultCapture() = 0;
+        virtual IRunner* getRunner() = 0;
+        virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) = 0;
+        virtual bool advanceGeneratorsForCurrentTest() = 0;
+        virtual Ptr<IConfig const> getConfig() const = 0;
+    };
+
+    struct IMutableContext : IContext
+    {
+        virtual ~IMutableContext();
+        virtual void setResultCapture( IResultCapture* resultCapture ) = 0;
+        virtual void setRunner( IRunner* runner ) = 0;
+        virtual void setConfig( Ptr<IConfig const> const& config ) = 0;
+    };
+
+    IContext& getCurrentContext();
+    IMutableContext& getCurrentMutableContext();
+    void cleanUpContext();
+    Stream createStream( std::string const& streamName );
+
+}
+
+// #included from: internal/catch_test_registry.hpp
+#define TWOBLUECUBES_CATCH_TEST_REGISTRY_HPP_INCLUDED
+
+// #included from: catch_interfaces_testcase.h
+#define TWOBLUECUBES_CATCH_INTERFACES_TESTCASE_H_INCLUDED
+
+#include <vector>
+
+namespace Catch {
+
+    class TestSpec;
+
+    struct ITestCase : IShared {
+        virtual void invoke () const = 0;
+    protected:
+        virtual ~ITestCase();
+    };
+
+    class TestCase;
+    struct IConfig;
+
+    struct ITestCaseRegistry {
+        virtual ~ITestCaseRegistry();
+        virtual std::vector<TestCase> const& getAllTests() const = 0;
+        virtual void getFilteredTests( TestSpec const& testSpec, IConfig const& config, std::vector<TestCase>& matchingTestCases, bool negated = false ) const = 0;
+
+    };
+}
+
+namespace Catch {
+
+template<typename C>
+class MethodTestCase : public SharedImpl<ITestCase> {
+
+public:
+    MethodTestCase( void (C::*method)() ) : m_method( method ) {}
+
+    virtual void invoke() const {
+        C obj;
+        (obj.*m_method)();
+    }
+
+private:
+    virtual ~MethodTestCase() {}
+
+    void (C::*m_method)();
+};
+
+typedef void(*TestFunction)();
+
+struct NameAndDesc {
+    NameAndDesc( const char* _name = "", const char* _description= "" )
+    : name( _name ), description( _description )
+    {}
+
+    const char* name;
+    const char* description;
+};
+
+struct AutoReg {
+
+    AutoReg(    TestFunction function,
+                SourceLineInfo const& lineInfo,
+                NameAndDesc const& nameAndDesc );
+
+    template<typename C>
+    AutoReg(    void (C::*method)(),
+                char const* className,
+                NameAndDesc const& nameAndDesc,
+                SourceLineInfo const& lineInfo ) {
+        registerTestCase(   new MethodTestCase<C>( method ),
+                            className,
+                            nameAndDesc,
+                            lineInfo );
+    }
+
+    void registerTestCase(  ITestCase* testCase,
+                            char const* className,
+                            NameAndDesc const& nameAndDesc,
+                            SourceLineInfo const& lineInfo );
+
+    ~AutoReg();
+
+private:
+    AutoReg( AutoReg const& );
+    void operator= ( AutoReg const& );
+};
+
+} // end namespace Catch
+
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+    ///////////////////////////////////////////////////////////////////////////////
+    #define INTERNAL_CATCH_TESTCASE( ... ) \
+        static void INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ )(); \
+        namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &INTERNAL_CATCH_UNIQUE_NAME(  ____C_A_T_C_H____T_E_S_T____ ), CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( __VA_ARGS__ ) ); }\
+        static void INTERNAL_CATCH_UNIQUE_NAME(  ____C_A_T_C_H____T_E_S_T____ )()
+
+    ///////////////////////////////////////////////////////////////////////////////
+    #define INTERNAL_CATCH_METHOD_AS_TEST_CASE( QualifiedMethod, ... ) \
+        namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &QualifiedMethod, "&" #QualifiedMethod, Catch::NameAndDesc( __VA_ARGS__ ), CATCH_INTERNAL_LINEINFO ); }
+
+    ///////////////////////////////////////////////////////////////////////////////
+    #define INTERNAL_CATCH_TEST_CASE_METHOD( ClassName, ... )\
+        namespace{ \
+            struct INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ) : ClassName{ \
+                void test(); \
+            }; \
+            Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar ) ( &INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ )::test, #ClassName, Catch::NameAndDesc( __VA_ARGS__ ), CATCH_INTERNAL_LINEINFO ); \
+        } \
+        void INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ )::test()
+
+#else
+    ///////////////////////////////////////////////////////////////////////////////
+    #define INTERNAL_CATCH_TESTCASE( Name, Desc ) \
+        static void INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ )(); \
+        namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &INTERNAL_CATCH_UNIQUE_NAME(  ____C_A_T_C_H____T_E_S_T____ ), CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( Name, Desc ) ); }\
+        static void INTERNAL_CATCH_UNIQUE_NAME(  ____C_A_T_C_H____T_E_S_T____ )()
+
+    ///////////////////////////////////////////////////////////////////////////////
+    #define INTERNAL_CATCH_METHOD_AS_TEST_CASE( QualifiedMethod, Name, Desc ) \
+        namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &QualifiedMethod, "&" #QualifiedMethod, Catch::NameAndDesc( Name, Desc ), CATCH_INTERNAL_LINEINFO ); }
+
+    ///////////////////////////////////////////////////////////////////////////////
+    #define INTERNAL_CATCH_TEST_CASE_METHOD( ClassName, TestName, Desc )\
+        namespace{ \
+            struct INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ) : ClassName{ \
+                void test(); \
+            }; \
+            Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar ) ( &INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ )::test, #ClassName, Catch::NameAndDesc( TestName, Desc ), CATCH_INTERNAL_LINEINFO ); \
+        } \
+        void INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ )::test()
+
+#endif
+
+// #included from: internal/catch_capture.hpp
+#define TWOBLUECUBES_CATCH_CAPTURE_HPP_INCLUDED
+
+// #included from: catch_result_builder.h
+#define TWOBLUECUBES_CATCH_RESULT_BUILDER_H_INCLUDED
+
+// #included from: catch_result_type.h
+#define TWOBLUECUBES_CATCH_RESULT_TYPE_H_INCLUDED
+
+namespace Catch {
+
+    // ResultWas::OfType enum
+    struct ResultWas { enum OfType {
+        Unknown = -1,
+        Ok = 0,
+        Info = 1,
+        Warning = 2,
+
+        FailureBit = 0x10,
+
+        ExpressionFailed = FailureBit | 1,
+        ExplicitFailure = FailureBit | 2,
+
+        Exception = 0x100 | FailureBit,
+
+        ThrewException = Exception | 1,
+        DidntThrowException = Exception | 2,
+
+        FatalErrorCondition = 0x200 | FailureBit
+
+    }; };
+
+    inline bool isOk( ResultWas::OfType resultType ) {
+        return ( resultType & ResultWas::FailureBit ) == 0;
+    }
+    inline bool isJustInfo( int flags ) {
+        return flags == ResultWas::Info;
+    }
+
+    // ResultDisposition::Flags enum
+    struct ResultDisposition { enum Flags {
+        Normal = 0x01,
+
+        ContinueOnFailure = 0x02,   // Failures fail test, but execution continues
+        FalseTest = 0x04,           // Prefix expression with !
+        SuppressFail = 0x08         // Failures are reported but do not fail the test
+    }; };
+
+    inline ResultDisposition::Flags operator | ( ResultDisposition::Flags lhs, ResultDisposition::Flags rhs ) {
+        return static_cast<ResultDisposition::Flags>( static_cast<int>( lhs ) | static_cast<int>( rhs ) );
+    }
+
+    inline bool shouldContinueOnFailure( int flags )    { return ( flags & ResultDisposition::ContinueOnFailure ) != 0; }
+    inline bool isFalseTest( int flags )                { return ( flags & ResultDisposition::FalseTest ) != 0; }
+    inline bool shouldSuppressFailure( int flags )      { return ( flags & ResultDisposition::SuppressFail ) != 0; }
+
+} // end namespace Catch
+
+// #included from: catch_assertionresult.h
+#define TWOBLUECUBES_CATCH_ASSERTIONRESULT_H_INCLUDED
+
+#include <string>
+
+namespace Catch {
+
+    struct AssertionInfo
+    {
+        AssertionInfo() {}
+        AssertionInfo(  std::string const& _macroName,
+                        SourceLineInfo const& _lineInfo,
+                        std::string const& _capturedExpression,
+                        ResultDisposition::Flags _resultDisposition );
+
+        std::string macroName;
+        SourceLineInfo lineInfo;
+        std::string capturedExpression;
+        ResultDisposition::Flags resultDisposition;
+    };
+
+    struct AssertionResultData
+    {
+        AssertionResultData() : resultType( ResultWas::Unknown ) {}
+
+        std::string reconstructedExpression;
+        std::string message;
+        ResultWas::OfType resultType;
+    };
+
+    class AssertionResult {
+    public:
+        AssertionResult();
+        AssertionResult( AssertionInfo const& info, AssertionResultData const& data );
+        ~AssertionResult();
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+         AssertionResult( AssertionResult const& )              = default;
+         AssertionResult( AssertionResult && )                  = default;
+         AssertionResult& operator = ( AssertionResult const& ) = default;
+         AssertionResult& operator = ( AssertionResult && )     = default;
+#  endif
+
+        bool isOk() const;
+        bool succeeded() const;
+        ResultWas::OfType getResultType() const;
+        bool hasExpression() const;
+        bool hasMessage() const;
+        std::string getExpression() const;
+        std::string getExpressionInMacro() const;
+        bool hasExpandedExpression() const;
+        std::string getExpandedExpression() const;
+        std::string getMessage() const;
+        SourceLineInfo getSourceInfo() const;
+        std::string getTestMacroName() const;
+
+    protected:
+        AssertionInfo m_info;
+        AssertionResultData m_resultData;
+    };
+
+} // end namespace Catch
+
+namespace Catch {
+
+    struct TestFailureException{};
+
+    template<typename T> class ExpressionLhs;
+
+    struct STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison;
+
+    struct CopyableStream {
+        CopyableStream() {}
+        CopyableStream( CopyableStream const& other ) {
+            oss << other.oss.str();
+        }
+        CopyableStream& operator=( CopyableStream const& other ) {
+            oss.str("");
+            oss << other.oss.str();
+            return *this;
+        }
+        std::ostringstream oss;
+    };
+
+    class ResultBuilder {
+    public:
+        ResultBuilder(  char const* macroName,
+                        SourceLineInfo const& lineInfo,
+                        char const* capturedExpression,
+                        ResultDisposition::Flags resultDisposition );
+
+        template<typename T>
+        ExpressionLhs<T const&> operator <= ( T const& operand );
+        ExpressionLhs<bool> operator <= ( bool value );
+
+        template<typename T>
+        ResultBuilder& operator << ( T const& value ) {
+            m_stream.oss << value;
+            return *this;
+        }
+
+        template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator && ( RhsT const& );
+        template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator || ( RhsT const& );
+
+        ResultBuilder& setResultType( ResultWas::OfType result );
+        ResultBuilder& setResultType( bool result );
+        ResultBuilder& setLhs( std::string const& lhs );
+        ResultBuilder& setRhs( std::string const& rhs );
+        ResultBuilder& setOp( std::string const& op );
+
+        void endExpression();
+
+        std::string reconstructExpression() const;
+        AssertionResult build() const;
+
+        void useActiveException( ResultDisposition::Flags resultDisposition = ResultDisposition::Normal );
+        void captureResult( ResultWas::OfType resultType );
+        void captureExpression();
+        void react();
+        bool shouldDebugBreak() const;
+        bool allowThrows() const;
+
+    private:
+        AssertionInfo m_assertionInfo;
+        AssertionResultData m_data;
+        struct ExprComponents {
+            ExprComponents() : testFalse( false ) {}
+            bool testFalse;
+            std::string lhs, rhs, op;
+        } m_exprComponents;
+        CopyableStream m_stream;
+
+        bool m_shouldDebugBreak;
+        bool m_shouldThrow;
+    };
+
+} // namespace Catch
+
+// Include after due to circular dependency:
+// #included from: catch_expression_lhs.hpp
+#define TWOBLUECUBES_CATCH_EXPRESSION_LHS_HPP_INCLUDED
+
+// #included from: catch_evaluate.hpp
+#define TWOBLUECUBES_CATCH_EVALUATE_HPP_INCLUDED
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4389) // '==' : signed/unsigned mismatch
+#endif
+
+#include <cstddef>
+
+namespace Catch {
+namespace Internal {
+
+    enum Operator {
+        IsEqualTo,
+        IsNotEqualTo,
+        IsLessThan,
+        IsGreaterThan,
+        IsLessThanOrEqualTo,
+        IsGreaterThanOrEqualTo
+    };
+
+    template<Operator Op> struct OperatorTraits             { static const char* getName(){ return "*error*"; } };
+    template<> struct OperatorTraits<IsEqualTo>             { static const char* getName(){ return "=="; } };
+    template<> struct OperatorTraits<IsNotEqualTo>          { static const char* getName(){ return "!="; } };
+    template<> struct OperatorTraits<IsLessThan>            { static const char* getName(){ return "<"; } };
+    template<> struct OperatorTraits<IsGreaterThan>         { static const char* getName(){ return ">"; } };
+    template<> struct OperatorTraits<IsLessThanOrEqualTo>   { static const char* getName(){ return "<="; } };
+    template<> struct OperatorTraits<IsGreaterThanOrEqualTo>{ static const char* getName(){ return ">="; } };
+
+    template<typename T>
+    inline T& opCast(T const& t) { return const_cast<T&>(t); }
+
+// nullptr_t support based on pull request #154 from Konstantin Baumann
+#ifdef CATCH_CONFIG_CPP11_NULLPTR
+    inline std::nullptr_t opCast(std::nullptr_t) { return nullptr; }
+#endif // CATCH_CONFIG_CPP11_NULLPTR
+
+    // So the compare overloads can be operator agnostic we convey the operator as a template
+    // enum, which is used to specialise an Evaluator for doing the comparison.
+    template<typename T1, typename T2, Operator Op>
+    class Evaluator{};
+
+    template<typename T1, typename T2>
+    struct Evaluator<T1, T2, IsEqualTo> {
+        static bool evaluate( T1 const& lhs, T2 const& rhs) {
+            return opCast( lhs ) ==  opCast( rhs );
+        }
+    };
+    template<typename T1, typename T2>
+    struct Evaluator<T1, T2, IsNotEqualTo> {
+        static bool evaluate( T1 const& lhs, T2 const& rhs ) {
+            return opCast( lhs ) != opCast( rhs );
+        }
+    };
+    template<typename T1, typename T2>
+    struct Evaluator<T1, T2, IsLessThan> {
+        static bool evaluate( T1 const& lhs, T2 const& rhs ) {
+            return opCast( lhs ) < opCast( rhs );
+        }
+    };
+    template<typename T1, typename T2>
+    struct Evaluator<T1, T2, IsGreaterThan> {
+        static bool evaluate( T1 const& lhs, T2 const& rhs ) {
+            return opCast( lhs ) > opCast( rhs );
+        }
+    };
+    template<typename T1, typename T2>
+    struct Evaluator<T1, T2, IsGreaterThanOrEqualTo> {
+        static bool evaluate( T1 const& lhs, T2 const& rhs ) {
+            return opCast( lhs ) >= opCast( rhs );
+        }
+    };
+    template<typename T1, typename T2>
+    struct Evaluator<T1, T2, IsLessThanOrEqualTo> {
+        static bool evaluate( T1 const& lhs, T2 const& rhs ) {
+            return opCast( lhs ) <= opCast( rhs );
+        }
+    };
+
+    template<Operator Op, typename T1, typename T2>
+    bool applyEvaluator( T1 const& lhs, T2 const& rhs ) {
+        return Evaluator<T1, T2, Op>::evaluate( lhs, rhs );
+    }
+
+    // This level of indirection allows us to specialise for integer types
+    // to avoid signed/ unsigned warnings
+
+    // "base" overload
+    template<Operator Op, typename T1, typename T2>
+    bool compare( T1 const& lhs, T2 const& rhs ) {
+        return Evaluator<T1, T2, Op>::evaluate( lhs, rhs );
+    }
+
+    // unsigned X to int
+    template<Operator Op> bool compare( unsigned int lhs, int rhs ) {
+        return applyEvaluator<Op>( lhs, static_cast<unsigned int>( rhs ) );
+    }
+    template<Operator Op> bool compare( unsigned long lhs, int rhs ) {
+        return applyEvaluator<Op>( lhs, static_cast<unsigned int>( rhs ) );
+    }
+    template<Operator Op> bool compare( unsigned char lhs, int rhs ) {
+        return applyEvaluator<Op>( lhs, static_cast<unsigned int>( rhs ) );
+    }
+
+    // unsigned X to long
+    template<Operator Op> bool compare( unsigned int lhs, long rhs ) {
+        return applyEvaluator<Op>( lhs, static_cast<unsigned long>( rhs ) );
+    }
+    template<Operator Op> bool compare( unsigned long lhs, long rhs ) {
+        return applyEvaluator<Op>( lhs, static_cast<unsigned long>( rhs ) );
+    }
+    template<Operator Op> bool compare( unsigned char lhs, long rhs ) {
+        return applyEvaluator<Op>( lhs, static_cast<unsigned long>( rhs ) );
+    }
+
+    // int to unsigned X
+    template<Operator Op> bool compare( int lhs, unsigned int rhs ) {
+        return applyEvaluator<Op>( static_cast<unsigned int>( lhs ), rhs );
+    }
+    template<Operator Op> bool compare( int lhs, unsigned long rhs ) {
+        return applyEvaluator<Op>( static_cast<unsigned int>( lhs ), rhs );
+    }
+    template<Operator Op> bool compare( int lhs, unsigned char rhs ) {
+        return applyEvaluator<Op>( static_cast<unsigned int>( lhs ), rhs );
+    }
+
+    // long to unsigned X
+    template<Operator Op> bool compare( long lhs, unsigned int rhs ) {
+        return applyEvaluator<Op>( static_cast<unsigned long>( lhs ), rhs );
+    }
+    template<Operator Op> bool compare( long lhs, unsigned long rhs ) {
+        return applyEvaluator<Op>( static_cast<unsigned long>( lhs ), rhs );
+    }
+    template<Operator Op> bool compare( long lhs, unsigned char rhs ) {
+        return applyEvaluator<Op>( static_cast<unsigned long>( lhs ), rhs );
+    }
+
+    // pointer to long (when comparing against NULL)
+    template<Operator Op, typename T> bool compare( long lhs, T* rhs ) {
+        return Evaluator<T*, T*, Op>::evaluate( reinterpret_cast<T*>( lhs ), rhs );
+    }
+    template<Operator Op, typename T> bool compare( T* lhs, long rhs ) {
+        return Evaluator<T*, T*, Op>::evaluate( lhs, reinterpret_cast<T*>( rhs ) );
+    }
+
+    // pointer to int (when comparing against NULL)
+    template<Operator Op, typename T> bool compare( int lhs, T* rhs ) {
+        return Evaluator<T*, T*, Op>::evaluate( reinterpret_cast<T*>( lhs ), rhs );
+    }
+    template<Operator Op, typename T> bool compare( T* lhs, int rhs ) {
+        return Evaluator<T*, T*, Op>::evaluate( lhs, reinterpret_cast<T*>( rhs ) );
+    }
+
+#ifdef CATCH_CONFIG_CPP11_NULLPTR
+    // pointer to nullptr_t (when comparing against nullptr)
+    template<Operator Op, typename T> bool compare( std::nullptr_t, T* rhs ) {
+        return Evaluator<T*, T*, Op>::evaluate( NULL, rhs );
+    }
+    template<Operator Op, typename T> bool compare( T* lhs, std::nullptr_t ) {
+        return Evaluator<T*, T*, Op>::evaluate( lhs, NULL );
+    }
+#endif // CATCH_CONFIG_CPP11_NULLPTR
+
+} // end of namespace Internal
+} // end of namespace Catch
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// #included from: catch_tostring.h
+#define TWOBLUECUBES_CATCH_TOSTRING_H_INCLUDED
+
+#include <sstream>
+#include <iomanip>
+#include <limits>
+#include <vector>
+#include <cstddef>
+
+#ifdef __OBJC__
+// #included from: catch_objc_arc.hpp
+#define TWOBLUECUBES_CATCH_OBJC_ARC_HPP_INCLUDED
+
+#import <Foundation/Foundation.h>
+
+#ifdef __has_feature
+#define CATCH_ARC_ENABLED __has_feature(objc_arc)
+#else
+#define CATCH_ARC_ENABLED 0
+#endif
+
+void arcSafeRelease( NSObject* obj );
+id performOptionalSelector( id obj, SEL sel );
+
+#if !CATCH_ARC_ENABLED
+inline void arcSafeRelease( NSObject* obj ) {
+    [obj release];
+}
+inline id performOptionalSelector( id obj, SEL sel ) {
+    if( [obj respondsToSelector: sel] )
+        return [obj performSelector: sel];
+    return nil;
+}
+#define CATCH_UNSAFE_UNRETAINED
+#define CATCH_ARC_STRONG
+#else
+inline void arcSafeRelease( NSObject* ){}
+inline id performOptionalSelector( id obj, SEL sel ) {
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Warc-performSelector-leaks"
+#endif
+    if( [obj respondsToSelector: sel] )
+        return [obj performSelector: sel];
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+    return nil;
+}
+#define CATCH_UNSAFE_UNRETAINED __unsafe_unretained
+#define CATCH_ARC_STRONG __strong
+#endif
+
+#endif
+
+#ifdef CATCH_CONFIG_CPP11_TUPLE
+#include <tuple>
+#endif
+
+#ifdef CATCH_CONFIG_CPP11_IS_ENUM
+#include <type_traits>
+#endif
+
+namespace Catch {
+
+// Why we're here.
+template<typename T>
+std::string toString( T const& value );
+
+// Built in overloads
+
+std::string toString( std::string const& value );
+std::string toString( std::wstring const& value );
+std::string toString( const char* const value );
+std::string toString( char* const value );
+std::string toString( const wchar_t* const value );
+std::string toString( wchar_t* const value );
+std::string toString( int value );
+std::string toString( unsigned long value );
+std::string toString( unsigned int value );
+std::string toString( const double value );
+std::string toString( const float value );
+std::string toString( bool value );
+std::string toString( char value );
+std::string toString( signed char value );
+std::string toString( unsigned char value );
+
+#ifdef CATCH_CONFIG_CPP11_NULLPTR
+std::string toString( std::nullptr_t );
+#endif
+
+#ifdef __OBJC__
+    std::string toString( NSString const * const& nsstring );
+    std::string toString( NSString * CATCH_ARC_STRONG const& nsstring );
+    std::string toString( NSObject* const& nsObject );
+#endif
+
+namespace Detail {
+
+    extern std::string unprintableString;
+
+    struct BorgType {
+        template<typename T> BorgType( T const& );
+    };
+
+    struct TrueType { char sizer[1]; };
+    struct FalseType { char sizer[2]; };
+
+    TrueType& testStreamable( std::ostream& );
+    FalseType testStreamable( FalseType );
+
+    FalseType operator<<( std::ostream const&, BorgType const& );
+
+    template<typename T>
+    struct IsStreamInsertable {
+        static std::ostream &s;
+        static T  const&t;
+        enum { value = sizeof( testStreamable(s << t) ) == sizeof( TrueType ) };
+    };
+
+#if defined(CATCH_CONFIG_CPP11_IS_ENUM)
+    template<typename T,
+             bool IsEnum = std::is_enum<T>::value
+             >
+    struct EnumStringMaker
+    {
+        static std::string convert( T const& ) { return unprintableString; }
+    };
+
+    template<typename T>
+    struct EnumStringMaker<T,true>
+    {
+        static std::string convert( T const& v )
+        {
+            return ::Catch::toString(
+                static_cast<typename std::underlying_type<T>::type>(v)
+                );
+        }
+    };
+#endif
+    template<bool C>
+    struct StringMakerBase {
+#if defined(CATCH_CONFIG_CPP11_IS_ENUM)
+        template<typename T>
+        static std::string convert( T const& v )
+        {
+            return EnumStringMaker<T>::convert( v );
+        }
+#else
+        template<typename T>
+        static std::string convert( T const& ) { return unprintableString; }
+#endif
+    };
+
+    template<>
+    struct StringMakerBase<true> {
+        template<typename T>
+        static std::string convert( T const& _value ) {
+            std::ostringstream oss;
+            oss << _value;
+            return oss.str();
+        }
+    };
+
+    std::string rawMemoryToString( const void *object, std::size_t size );
+
+    template<typename T>
+    inline std::string rawMemoryToString( const T& object ) {
+      return rawMemoryToString( &object, sizeof(object) );
+    }
+
+} // end namespace Detail
+
+template<typename T>
+struct StringMaker :
+    Detail::StringMakerBase<Detail::IsStreamInsertable<T>::value> {};
+
+template<typename T>
+struct StringMaker<T*> {
+    template<typename U>
+    static std::string convert( U* p ) {
+        if( !p )
+            return INTERNAL_CATCH_STRINGIFY( NULL );
+        else
+            return Detail::rawMemoryToString( p );
+    }
+};
+
+template<typename R, typename C>
+struct StringMaker<R C::*> {
+    static std::string convert( R C::* p ) {
+        if( !p )
+            return INTERNAL_CATCH_STRINGIFY( NULL );
+        else
+            return Detail::rawMemoryToString( p );
+    }
+};
+
+namespace Detail {
+    template<typename InputIterator>
+    std::string rangeToString( InputIterator first, InputIterator last );
+}
+
+//template<typename T, typename Allocator>
+//struct StringMaker<std::vector<T, Allocator> > {
+//    static std::string convert( std::vector<T,Allocator> const& v ) {
+//        return Detail::rangeToString( v.begin(), v.end() );
+//    }
+//};
+
+template<typename T, typename Allocator>
+std::string toString( std::vector<T,Allocator> const& v ) {
+    return Detail::rangeToString( v.begin(), v.end() );
+}
+
+#ifdef CATCH_CONFIG_CPP11_TUPLE
+
+// toString for tuples
+namespace TupleDetail {
+  template<
+      typename Tuple,
+      std::size_t N = 0,
+      bool = (N < std::tuple_size<Tuple>::value)
+      >
+  struct ElementPrinter {
+      static void print( const Tuple& tuple, std::ostream& os )
+      {
+          os << ( N ? ", " : " " )
+             << Catch::toString(std::get<N>(tuple));
+          ElementPrinter<Tuple,N+1>::print(tuple,os);
+      }
+  };
+
+  template<
+      typename Tuple,
+      std::size_t N
+      >
+  struct ElementPrinter<Tuple,N,false> {
+      static void print( const Tuple&, std::ostream& ) {}
+  };
+
+}
+
+template<typename ...Types>
+struct StringMaker<std::tuple<Types...>> {
+
+    static std::string convert( const std::tuple<Types...>& tuple )
+    {
+        std::ostringstream os;
+        os << '{';
+        TupleDetail::ElementPrinter<std::tuple<Types...>>::print( tuple, os );
+        os << " }";
+        return os.str();
+    }
+};
+#endif // CATCH_CONFIG_CPP11_TUPLE
+
+namespace Detail {
+    template<typename T>
+    std::string makeString( T const& value ) {
+        return StringMaker<T>::convert( value );
+    }
+} // end namespace Detail
+
+/// \brief converts any type to a string
+///
+/// The default template forwards on to ostringstream - except when an
+/// ostringstream overload does not exist - in which case it attempts to detect
+/// that and writes {?}.
+/// Overload (not specialise) this template for custom typs that you don't want
+/// to provide an ostream overload for.
+template<typename T>
+std::string toString( T const& value ) {
+    return StringMaker<T>::convert( value );
+}
+
+    namespace Detail {
+    template<typename InputIterator>
+    std::string rangeToString( InputIterator first, InputIterator last ) {
+        std::ostringstream oss;
+        oss << "{ ";
+        if( first != last ) {
+            oss << Catch::toString( *first );
+            for( ++first ; first != last ; ++first )
+                oss << ", " << Catch::toString( *first );
+        }
+        oss << " }";
+        return oss.str();
+    }
+}
+
+} // end namespace Catch
+
+namespace Catch {
+
+// Wraps the LHS of an expression and captures the operator and RHS (if any) -
+// wrapping them all in a ResultBuilder object
+template<typename T>
+class ExpressionLhs {
+    ExpressionLhs& operator = ( ExpressionLhs const& );
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+    ExpressionLhs& operator = ( ExpressionLhs && ) = delete;
+#  endif
+
+public:
+    ExpressionLhs( ResultBuilder& rb, T lhs ) : m_rb( rb ), m_lhs( lhs ) {}
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+    ExpressionLhs( ExpressionLhs const& ) = default;
+    ExpressionLhs( ExpressionLhs && )     = default;
+#  endif
+
+    template<typename RhsT>
+    ResultBuilder& operator == ( RhsT const& rhs ) {
+        return captureExpression<Internal::IsEqualTo>( rhs );
+    }
+
+    template<typename RhsT>
+    ResultBuilder& operator != ( RhsT const& rhs ) {
+        return captureExpression<Internal::IsNotEqualTo>( rhs );
+    }
+
+    template<typename RhsT>
+    ResultBuilder& operator < ( RhsT const& rhs ) {
+        return captureExpression<Internal::IsLessThan>( rhs );
+    }
+
+    template<typename RhsT>
+    ResultBuilder& operator > ( RhsT const& rhs ) {
+        return captureExpression<Internal::IsGreaterThan>( rhs );
+    }
+
+    template<typename RhsT>
+    ResultBuilder& operator <= ( RhsT const& rhs ) {
+        return captureExpression<Internal::IsLessThanOrEqualTo>( rhs );
+    }
+
+    template<typename RhsT>
+    ResultBuilder& operator >= ( RhsT const& rhs ) {
+        return captureExpression<Internal::IsGreaterThanOrEqualTo>( rhs );
+    }
+
+    ResultBuilder& operator == ( bool rhs ) {
+        return captureExpression<Internal::IsEqualTo>( rhs );
+    }
+
+    ResultBuilder& operator != ( bool rhs ) {
+        return captureExpression<Internal::IsNotEqualTo>( rhs );
+    }
+
+    void endExpression() {
+        bool value = m_lhs ? true : false;
+        m_rb
+            .setLhs( Catch::toString( value ) )
+            .setResultType( value )
+            .endExpression();
+    }
+
+    // Only simple binary expressions are allowed on the LHS.
+    // If more complex compositions are required then place the sub expression in parentheses
+    template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator + ( RhsT const& );
+    template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator - ( RhsT const& );
+    template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator / ( RhsT const& );
+    template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator * ( RhsT const& );
+    template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator && ( RhsT const& );
+    template<typename RhsT> STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator || ( RhsT const& );
+
+private:
+    template<Internal::Operator Op, typename RhsT>
+    ResultBuilder& captureExpression( RhsT const& rhs ) {
+        return m_rb
+            .setResultType( Internal::compare<Op>( m_lhs, rhs ) )
+            .setLhs( Catch::toString( m_lhs ) )
+            .setRhs( Catch::toString( rhs ) )
+            .setOp( Internal::OperatorTraits<Op>::getName() );
+    }
+
+private:
+    ResultBuilder& m_rb;
+    T m_lhs;
+};
+
+} // end namespace Catch
+
+
+namespace Catch {
+
+    template<typename T>
+    inline ExpressionLhs<T const&> ResultBuilder::operator <= ( T const& operand ) {
+        return ExpressionLhs<T const&>( *this, operand );
+    }
+
+    inline ExpressionLhs<bool> ResultBuilder::operator <= ( bool value ) {
+        return ExpressionLhs<bool>( *this, value );
+    }
+
+} // namespace Catch
+
+// #included from: catch_message.h
+#define TWOBLUECUBES_CATCH_MESSAGE_H_INCLUDED
+
+#include <string>
+
+namespace Catch {
+
+    struct MessageInfo {
+        MessageInfo(    std::string const& _macroName,
+                        SourceLineInfo const& _lineInfo,
+                        ResultWas::OfType _type );
+
+        std::string macroName;
+        SourceLineInfo lineInfo;
+        ResultWas::OfType type;
+        std::string message;
+        unsigned int sequence;
+
+        bool operator == ( MessageInfo const& other ) const {
+            return sequence == other.sequence;
+        }
+        bool operator < ( MessageInfo const& other ) const {
+            return sequence < other.sequence;
+        }
+    private:
+        static unsigned int globalCount;
+    };
+
+    struct MessageBuilder {
+        MessageBuilder( std::string const& macroName,
+                        SourceLineInfo const& lineInfo,
+                        ResultWas::OfType type )
+        : m_info( macroName, lineInfo, type )
+        {}
+
+        template<typename T>
+        MessageBuilder& operator << ( T const& value ) {
+            m_stream << value;
+            return *this;
+        }
+
+        MessageInfo m_info;
+        std::ostringstream m_stream;
+    };
+
+    class ScopedMessage {
+    public:
+        ScopedMessage( MessageBuilder const& builder );
+        ScopedMessage( ScopedMessage const& other );
+        ~ScopedMessage();
+
+        MessageInfo m_info;
+    };
+
+} // end namespace Catch
+
+// #included from: catch_interfaces_capture.h
+#define TWOBLUECUBES_CATCH_INTERFACES_CAPTURE_H_INCLUDED
+
+#include <string>
+
+namespace Catch {
+
+    class TestCase;
+    class AssertionResult;
+    struct AssertionInfo;
+    struct SectionInfo;
+    struct MessageInfo;
+    class ScopedMessageBuilder;
+    struct Counts;
+
+    struct IResultCapture {
+
+        virtual ~IResultCapture();
+
+        virtual void assertionEnded( AssertionResult const& result ) = 0;
+        virtual bool sectionStarted(    SectionInfo const& sectionInfo,
+                                        Counts& assertions ) = 0;
+        virtual void sectionEnded( SectionInfo const& name, Counts const& assertions, double _durationInSeconds ) = 0;
+        virtual void pushScopedMessage( MessageInfo const& message ) = 0;
+        virtual void popScopedMessage( MessageInfo const& message ) = 0;
+
+        virtual std::string getCurrentTestName() const = 0;
+        virtual const AssertionResult* getLastResult() const = 0;
+
+        virtual void handleFatalErrorCondition( std::string const& message ) = 0;
+    };
+
+    IResultCapture& getResultCapture();
+}
+
+// #included from: catch_debugger.h
+#define TWOBLUECUBES_CATCH_DEBUGGER_H_INCLUDED
+
+// #included from: catch_platform.h
+#define TWOBLUECUBES_CATCH_PLATFORM_H_INCLUDED
+
+#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED)
+#define CATCH_PLATFORM_MAC
+#elif  defined(__IPHONE_OS_VERSION_MIN_REQUIRED)
+#define CATCH_PLATFORM_IPHONE
+#elif defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
+#define CATCH_PLATFORM_WINDOWS
+#endif
+
+#include <string>
+
+namespace Catch{
+
+    bool isDebuggerActive();
+    void writeToDebugConsole( std::string const& text );
+}
+
+#ifdef CATCH_PLATFORM_MAC
+
+    // The following code snippet based on:
+    // http://cocoawithlove.com/2008/03/break-into-debugger.html
+    #ifdef DEBUG
+        #if defined(__ppc64__) || defined(__ppc__)
+            #define CATCH_BREAK_INTO_DEBUGGER() \
+                if( Catch::isDebuggerActive() ) { \
+                    __asm__("li r0, 20\nsc\nnop\nli r0, 37\nli r4, 2\nsc\nnop\n" \
+                    : : : "memory","r0","r3","r4" ); \
+                }
+        #else
+            #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) {__asm__("int $3\n" : : );}
+        #endif
+    #endif
+
+#elif defined(_MSC_VER)
+    #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) { __debugbreak(); }
+#elif defined(__MINGW32__)
+    extern "C" __declspec(dllimport) void __stdcall DebugBreak();
+    #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) { DebugBreak(); }
+#endif
+
+#ifndef CATCH_BREAK_INTO_DEBUGGER
+#define CATCH_BREAK_INTO_DEBUGGER() Catch::alwaysTrue();
+#endif
+
+// #included from: catch_interfaces_runner.h
+#define TWOBLUECUBES_CATCH_INTERFACES_RUNNER_H_INCLUDED
+
+namespace Catch {
+    class TestCase;
+
+    struct IRunner {
+        virtual ~IRunner();
+        virtual bool aborting() const = 0;
+    };
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// In the event of a failure works out if the debugger needs to be invoked
+// and/or an exception thrown and takes appropriate action.
+// This needs to be done as a macro so the debugger will stop in the user
+// source code rather than in Catch library code
+#define INTERNAL_CATCH_REACT( resultBuilder ) \
+    if( resultBuilder.shouldDebugBreak() ) CATCH_BREAK_INTO_DEBUGGER(); \
+    resultBuilder.react();
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_TEST( expr, resultDisposition, macroName ) \
+    do { \
+        Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \
+        try { \
+            ( __catchResult <= expr ).endExpression(); \
+        } \
+        catch( ... ) { \
+            __catchResult.useActiveException( Catch::ResultDisposition::Normal ); \
+        } \
+        INTERNAL_CATCH_REACT( __catchResult ) \
+    } while( Catch::isTrue( false && (expr) ) ) // expr here is never evaluated at runtime but it forces the compiler to give it a look
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_IF( expr, resultDisposition, macroName ) \
+    INTERNAL_CATCH_TEST( expr, resultDisposition, macroName ); \
+    if( Catch::getResultCapture().getLastResult()->succeeded() )
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_ELSE( expr, resultDisposition, macroName ) \
+    INTERNAL_CATCH_TEST( expr, resultDisposition, macroName ); \
+    if( !Catch::getResultCapture().getLastResult()->succeeded() )
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_NO_THROW( expr, resultDisposition, macroName ) \
+    do { \
+        Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \
+        try { \
+            expr; \
+            __catchResult.captureResult( Catch::ResultWas::Ok ); \
+        } \
+        catch( ... ) { \
+            __catchResult.useActiveException( resultDisposition ); \
+        } \
+        INTERNAL_CATCH_REACT( __catchResult ) \
+    } while( Catch::alwaysFalse() )
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_THROWS( expr, resultDisposition, macroName ) \
+    do { \
+        Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \
+        if( __catchResult.allowThrows() ) \
+            try { \
+                expr; \
+                __catchResult.captureResult( Catch::ResultWas::DidntThrowException ); \
+            } \
+            catch( ... ) { \
+                __catchResult.captureResult( Catch::ResultWas::Ok ); \
+            } \
+        else \
+            __catchResult.captureResult( Catch::ResultWas::Ok ); \
+        INTERNAL_CATCH_REACT( __catchResult ) \
+    } while( Catch::alwaysFalse() )
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_THROWS_AS( expr, exceptionType, resultDisposition, macroName ) \
+    do { \
+        Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \
+        if( __catchResult.allowThrows() ) \
+            try { \
+                expr; \
+                __catchResult.captureResult( Catch::ResultWas::DidntThrowException ); \
+            } \
+            catch( exceptionType ) { \
+                __catchResult.captureResult( Catch::ResultWas::Ok ); \
+            } \
+            catch( ... ) { \
+                __catchResult.useActiveException( resultDisposition ); \
+            } \
+        else \
+            __catchResult.captureResult( Catch::ResultWas::Ok ); \
+        INTERNAL_CATCH_REACT( __catchResult ) \
+    } while( Catch::alwaysFalse() )
+
+///////////////////////////////////////////////////////////////////////////////
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+    #define INTERNAL_CATCH_MSG( messageType, resultDisposition, macroName, ... ) \
+        do { \
+            Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, "", resultDisposition ); \
+            __catchResult << __VA_ARGS__ + ::Catch::StreamEndStop(); \
+            __catchResult.captureResult( messageType ); \
+            INTERNAL_CATCH_REACT( __catchResult ) \
+        } while( Catch::alwaysFalse() )
+#else
+    #define INTERNAL_CATCH_MSG( messageType, resultDisposition, macroName, log ) \
+        do { \
+            Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, "", resultDisposition ); \
+            __catchResult << log + ::Catch::StreamEndStop(); \
+            __catchResult.captureResult( messageType ); \
+            INTERNAL_CATCH_REACT( __catchResult ) \
+        } while( Catch::alwaysFalse() )
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_INFO( log, macroName ) \
+    Catch::ScopedMessage INTERNAL_CATCH_UNIQUE_NAME( scopedMessage ) = Catch::MessageBuilder( macroName, CATCH_INTERNAL_LINEINFO, Catch::ResultWas::Info ) << log;
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CHECK_THAT( arg, matcher, resultDisposition, macroName ) \
+    do { \
+        Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #arg " " #matcher, resultDisposition ); \
+        try { \
+            std::string matcherAsString = ::Catch::Matchers::matcher.toString(); \
+            __catchResult \
+                .setLhs( Catch::toString( arg ) ) \
+                .setRhs( matcherAsString == Catch::Detail::unprintableString ? #matcher : matcherAsString ) \
+                .setOp( "matches" ) \
+                .setResultType( ::Catch::Matchers::matcher.match( arg ) ); \
+            __catchResult.captureExpression(); \
+        } catch( ... ) { \
+            __catchResult.useActiveException( resultDisposition | Catch::ResultDisposition::ContinueOnFailure ); \
+        } \
+        INTERNAL_CATCH_REACT( __catchResult ) \
+    } while( Catch::alwaysFalse() )
+
+// #included from: internal/catch_section.h
+#define TWOBLUECUBES_CATCH_SECTION_H_INCLUDED
+
+// #included from: catch_section_info.h
+#define TWOBLUECUBES_CATCH_SECTION_INFO_H_INCLUDED
+
+namespace Catch {
+
+    struct SectionInfo {
+        SectionInfo
+            (   SourceLineInfo const& _lineInfo,
+                std::string const& _name,
+                std::string const& _description = std::string() );
+
+        std::string name;
+        std::string description;
+        SourceLineInfo lineInfo;
+    };
+
+} // end namespace Catch
+
+// #included from: catch_totals.hpp
+#define TWOBLUECUBES_CATCH_TOTALS_HPP_INCLUDED
+
+#include <cstddef>
+
+namespace Catch {
+
+    struct Counts {
+        Counts() : passed( 0 ), failed( 0 ), failedButOk( 0 ) {}
+
+        Counts operator - ( Counts const& other ) const {
+            Counts diff;
+            diff.passed = passed - other.passed;
+            diff.failed = failed - other.failed;
+            diff.failedButOk = failedButOk - other.failedButOk;
+            return diff;
+        }
+        Counts& operator += ( Counts const& other ) {
+            passed += other.passed;
+            failed += other.failed;
+            failedButOk += other.failedButOk;
+            return *this;
+        }
+
+        std::size_t total() const {
+            return passed + failed + failedButOk;
+        }
+        bool allPassed() const {
+            return failed == 0 && failedButOk == 0;
+        }
+        bool allOk() const {
+            return failed == 0;
+        }
+
+        std::size_t passed;
+        std::size_t failed;
+        std::size_t failedButOk;
+    };
+
+    struct Totals {
+
+        Totals operator - ( Totals const& other ) const {
+            Totals diff;
+            diff.assertions = assertions - other.assertions;
+            diff.testCases = testCases - other.testCases;
+            return diff;
+        }
+
+        Totals delta( Totals const& prevTotals ) const {
+            Totals diff = *this - prevTotals;
+            if( diff.assertions.failed > 0 )
+                ++diff.testCases.failed;
+            else if( diff.assertions.failedButOk > 0 )
+                ++diff.testCases.failedButOk;
+            else
+                ++diff.testCases.passed;
+            return diff;
+        }
+
+        Totals& operator += ( Totals const& other ) {
+            assertions += other.assertions;
+            testCases += other.testCases;
+            return *this;
+        }
+
+        Counts assertions;
+        Counts testCases;
+    };
+}
+
+// #included from: catch_timer.h
+#define TWOBLUECUBES_CATCH_TIMER_H_INCLUDED
+
+#ifdef CATCH_PLATFORM_WINDOWS
+typedef unsigned long long uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+namespace Catch {
+
+    class Timer {
+    public:
+        Timer() : m_ticks( 0 ) {}
+        void start();
+        unsigned int getElapsedMicroseconds() const;
+        unsigned int getElapsedMilliseconds() const;
+        double getElapsedSeconds() const;
+
+    private:
+        uint64_t m_ticks;
+    };
+
+} // namespace Catch
+
+#include <string>
+
+namespace Catch {
+
+    class Section : NonCopyable {
+    public:
+        Section( SectionInfo const& info );
+        ~Section();
+
+        // This indicates whether the section should be executed or not
+        operator bool() const;
+
+    private:
+        SectionInfo m_info;
+
+        std::string m_name;
+        Counts m_assertions;
+        bool m_sectionIncluded;
+        Timer m_timer;
+    };
+
+} // end namespace Catch
+
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+    #define INTERNAL_CATCH_SECTION( ... ) \
+        if( Catch::Section const& INTERNAL_CATCH_UNIQUE_NAME( catch_internal_Section ) = Catch::SectionInfo( CATCH_INTERNAL_LINEINFO, __VA_ARGS__ ) )
+#else
+    #define INTERNAL_CATCH_SECTION( name, desc ) \
+        if( Catch::Section const& INTERNAL_CATCH_UNIQUE_NAME( catch_internal_Section ) = Catch::SectionInfo( CATCH_INTERNAL_LINEINFO, name, desc ) )
+#endif
+
+// #included from: internal/catch_generators.hpp
+#define TWOBLUECUBES_CATCH_GENERATORS_HPP_INCLUDED
+
+#include <iterator>
+#include <vector>
+#include <string>
+#include <stdlib.h>
+
+namespace Catch {
+
+template<typename T>
+struct IGenerator {
+    virtual ~IGenerator() {}
+    virtual T getValue( std::size_t index ) const = 0;
+    virtual std::size_t size () const = 0;
+};
+
+template<typename T>
+class BetweenGenerator : public IGenerator<T> {
+public:
+    BetweenGenerator( T from, T to ) : m_from( from ), m_to( to ){}
+
+    virtual T getValue( std::size_t index ) const {
+        return m_from+static_cast<int>( index );
+    }
+
+    virtual std::size_t size() const {
+        return static_cast<std::size_t>( 1+m_to-m_from );
+    }
+
+private:
+
+    T m_from;
+    T m_to;
+};
+
+template<typename T>
+class ValuesGenerator : public IGenerator<T> {
+public:
+    ValuesGenerator(){}
+
+    void add( T value ) {
+        m_values.push_back( value );
+    }
+
+    virtual T getValue( std::size_t index ) const {
+        return m_values[index];
+    }
+
+    virtual std::size_t size() const {
+        return m_values.size();
+    }
+
+private:
+    std::vector<T> m_values;
+};
+
+template<typename T>
+class CompositeGenerator {
+public:
+    CompositeGenerator() : m_totalSize( 0 ) {}
+
+    // *** Move semantics, similar to auto_ptr ***
+    CompositeGenerator( CompositeGenerator& other )
+    :   m_fileInfo( other.m_fileInfo ),
+        m_totalSize( 0 )
+    {
+        move( other );
+    }
+
+    CompositeGenerator& setFileInfo( const char* fileInfo ) {
+        m_fileInfo = fileInfo;
+        return *this;
+    }
+
+    ~CompositeGenerator() {
+        deleteAll( m_composed );
+    }
+
+    operator T () const {
+        size_t overallIndex = getCurrentContext().getGeneratorIndex( m_fileInfo, m_totalSize );
+
+        typename std::vector<const IGenerator<T>*>::const_iterator it = m_composed.begin();
+        typename std::vector<const IGenerator<T>*>::const_iterator itEnd = m_composed.end();
+        for( size_t index = 0; it != itEnd; ++it )
+        {
+            const IGenerator<T>* generator = *it;
+            if( overallIndex >= index && overallIndex < index + generator->size() )
+            {
+                return generator->getValue( overallIndex-index );
+            }
+            index += generator->size();
+        }
+        CATCH_INTERNAL_ERROR( "Indexed past end of generated range" );
+        return T(); // Suppress spurious "not all control paths return a value" warning in Visual Studio - if you know how to fix this please do so
+    }
+
+    void add( const IGenerator<T>* generator ) {
+        m_totalSize += generator->size();
+        m_composed.push_back( generator );
+    }
+
+    CompositeGenerator& then( CompositeGenerator& other ) {
+        move( other );
+        return *this;
+    }
+
+    CompositeGenerator& then( T value ) {
+        ValuesGenerator<T>* valuesGen = new ValuesGenerator<T>();
+        valuesGen->add( value );
+        add( valuesGen );
+        return *this;
+    }
+
+private:
+
+    void move( CompositeGenerator& other ) {
+        std::copy( other.m_composed.begin(), other.m_composed.end(), std::back_inserter( m_composed ) );
+        m_totalSize += other.m_totalSize;
+        other.m_composed.clear();
+    }
+
+    std::vector<const IGenerator<T>*> m_composed;
+    std::string m_fileInfo;
+    size_t m_totalSize;
+};
+
+namespace Generators
+{
+    template<typename T>
+    CompositeGenerator<T> between( T from, T to ) {
+        CompositeGenerator<T> generators;
+        generators.add( new BetweenGenerator<T>( from, to ) );
+        return generators;
+    }
+
+    template<typename T>
+    CompositeGenerator<T> values( T val1, T val2 ) {
+        CompositeGenerator<T> generators;
+        ValuesGenerator<T>* valuesGen = new ValuesGenerator<T>();
+        valuesGen->add( val1 );
+        valuesGen->add( val2 );
+        generators.add( valuesGen );
+        return generators;
+    }
+
+    template<typename T>
+    CompositeGenerator<T> values( T val1, T val2, T val3 ){
+        CompositeGenerator<T> generators;
+        ValuesGenerator<T>* valuesGen = new ValuesGenerator<T>();
+        valuesGen->add( val1 );
+        valuesGen->add( val2 );
+        valuesGen->add( val3 );
+        generators.add( valuesGen );
+        return generators;
+    }
+
+    template<typename T>
+    CompositeGenerator<T> values( T val1, T val2, T val3, T val4 ) {
+        CompositeGenerator<T> generators;
+        ValuesGenerator<T>* valuesGen = new ValuesGenerator<T>();
+        valuesGen->add( val1 );
+        valuesGen->add( val2 );
+        valuesGen->add( val3 );
+        valuesGen->add( val4 );
+        generators.add( valuesGen );
+        return generators;
+    }
+
+} // end namespace Generators
+
+using namespace Generators;
+
+} // end namespace Catch
+
+#define INTERNAL_CATCH_LINESTR2( line ) #line
+#define INTERNAL_CATCH_LINESTR( line ) INTERNAL_CATCH_LINESTR2( line )
+
+#define INTERNAL_CATCH_GENERATE( expr ) expr.setFileInfo( __FILE__ "(" INTERNAL_CATCH_LINESTR( __LINE__ ) ")" )
+
+// #included from: internal/catch_interfaces_exception.h
+#define TWOBLUECUBES_CATCH_INTERFACES_EXCEPTION_H_INCLUDED
+
+#include <string>
+// #included from: catch_interfaces_registry_hub.h
+#define TWOBLUECUBES_CATCH_INTERFACES_REGISTRY_HUB_H_INCLUDED
+
+#include <string>
+
+namespace Catch {
+
+    class TestCase;
+    struct ITestCaseRegistry;
+    struct IExceptionTranslatorRegistry;
+    struct IExceptionTranslator;
+    struct IReporterRegistry;
+    struct IReporterFactory;
+
+    struct IRegistryHub {
+        virtual ~IRegistryHub();
+
+        virtual IReporterRegistry const& getReporterRegistry() const = 0;
+        virtual ITestCaseRegistry const& getTestCaseRegistry() const = 0;
+        virtual IExceptionTranslatorRegistry& getExceptionTranslatorRegistry() = 0;
+    };
+
+    struct IMutableRegistryHub {
+        virtual ~IMutableRegistryHub();
+        virtual void registerReporter( std::string const& name, IReporterFactory* factory ) = 0;
+        virtual void registerTest( TestCase const& testInfo ) = 0;
+        virtual void registerTranslator( const IExceptionTranslator* translator ) = 0;
+    };
+
+    IRegistryHub& getRegistryHub();
+    IMutableRegistryHub& getMutableRegistryHub();
+    void cleanUp();
+    std::string translateActiveException();
+
+}
+
+
+namespace Catch {
+
+    typedef std::string(*exceptionTranslateFunction)();
+
+    struct IExceptionTranslator {
+        virtual ~IExceptionTranslator();
+        virtual std::string translate() const = 0;
+    };
+
+    struct IExceptionTranslatorRegistry {
+        virtual ~IExceptionTranslatorRegistry();
+
+        virtual std::string translateActiveException() const = 0;
+    };
+
+    class ExceptionTranslatorRegistrar {
+        template<typename T>
+        class ExceptionTranslator : public IExceptionTranslator {
+        public:
+
+            ExceptionTranslator( std::string(*translateFunction)( T& ) )
+            : m_translateFunction( translateFunction )
+            {}
+
+            virtual std::string translate() const {
+                try {
+                    throw;
+                }
+                catch( T& ex ) {
+                    return m_translateFunction( ex );
+                }
+            }
+
+        protected:
+            std::string(*m_translateFunction)( T& );
+        };
+
+    public:
+        template<typename T>
+        ExceptionTranslatorRegistrar( std::string(*translateFunction)( T& ) ) {
+            getMutableRegistryHub().registerTranslator
+                ( new ExceptionTranslator<T>( translateFunction ) );
+        }
+    };
+}
+
+///////////////////////////////////////////////////////////////////////////////
+#define INTERNAL_CATCH_TRANSLATE_EXCEPTION( signature ) \
+    static std::string INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionTranslator )( signature ); \
+    namespace{ Catch::ExceptionTranslatorRegistrar INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionRegistrar )( &INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionTranslator ) ); }\
+    static std::string INTERNAL_CATCH_UNIQUE_NAME(  catch_internal_ExceptionTranslator )( signature )
+
+// #included from: internal/catch_approx.hpp
+#define TWOBLUECUBES_CATCH_APPROX_HPP_INCLUDED
+
+#include <cmath>
+#include <limits>
+
+namespace Catch {
+namespace Detail {
+
+    class Approx {
+    public:
+        explicit Approx ( double value )
+        :   m_epsilon( std::numeric_limits<float>::epsilon()*100 ),
+            m_scale( 1.0 ),
+            m_value( value )
+        {}
+
+        Approx( Approx const& other )
+        :   m_epsilon( other.m_epsilon ),
+            m_scale( other.m_scale ),
+            m_value( other.m_value )
+        {}
+
+        static Approx custom() {
+            return Approx( 0 );
+        }
+
+        Approx operator()( double value ) {
+            Approx approx( value );
+            approx.epsilon( m_epsilon );
+            approx.scale( m_scale );
+            return approx;
+        }
+
+        friend bool operator == ( double lhs, Approx const& rhs ) {
+            // Thanks to Richard Harris for his help refining this formula
+            return fabs( lhs - rhs.m_value ) < rhs.m_epsilon * (rhs.m_scale + (std::max)( fabs(lhs), fabs(rhs.m_value) ) );
+        }
+
+        friend bool operator == ( Approx const& lhs, double rhs ) {
+            return operator==( rhs, lhs );
+        }
+
+        friend bool operator != ( double lhs, Approx const& rhs ) {
+            return !operator==( lhs, rhs );
+        }
+
+        friend bool operator != ( Approx const& lhs, double rhs ) {
+            return !operator==( rhs, lhs );
+        }
+
+        Approx& epsilon( double newEpsilon ) {
+            m_epsilon = newEpsilon;
+            return *this;
+        }
+
+        Approx& scale( double newScale ) {
+            m_scale = newScale;
+            return *this;
+        }
+
+        std::string toString() const {
+            std::ostringstream oss;
+            oss << "Approx( " << Catch::toString( m_value ) << " )";
+            return oss.str();
+        }
+
+    private:
+        double m_epsilon;
+        double m_scale;
+        double m_value;
+    };
+}
+
+template<>
+inline std::string toString<Detail::Approx>( Detail::Approx const& value ) {
+    return value.toString();
+}
+
+} // end namespace Catch
+
+// #included from: internal/catch_matchers.hpp
+#define TWOBLUECUBES_CATCH_MATCHERS_HPP_INCLUDED
+
+namespace Catch {
+namespace Matchers {
+    namespace Impl {
+
+    template<typename ExpressionT>
+    struct Matcher : SharedImpl<IShared>
+    {
+        typedef ExpressionT ExpressionType;
+
+        virtual ~Matcher() {}
+        virtual Ptr<Matcher> clone() const = 0;
+        virtual bool match( ExpressionT const& expr ) const = 0;
+        virtual std::string toString() const = 0;
+    };
+
+    template<typename DerivedT, typename ExpressionT>
+    struct MatcherImpl : Matcher<ExpressionT> {
+
+        virtual Ptr<Matcher<ExpressionT> > clone() const {
+            return Ptr<Matcher<ExpressionT> >( new DerivedT( static_cast<DerivedT const&>( *this ) ) );
+        }
+    };
+
+    namespace Generic {
+
+        template<typename ExpressionT>
+        class AllOf : public MatcherImpl<AllOf<ExpressionT>, ExpressionT> {
+        public:
+
+            AllOf() {}
+            AllOf( AllOf const& other ) : m_matchers( other.m_matchers ) {}
+
+            AllOf& add( Matcher<ExpressionT> const& matcher ) {
+                m_matchers.push_back( matcher.clone() );
+                return *this;
+            }
+            virtual bool match( ExpressionT const& expr ) const
+            {
+                for( std::size_t i = 0; i < m_matchers.size(); ++i )
+                    if( !m_matchers[i]->match( expr ) )
+                        return false;
+                return true;
+            }
+            virtual std::string toString() const {
+                std::ostringstream oss;
+                oss << "( ";
+                for( std::size_t i = 0; i < m_matchers.size(); ++i ) {
+                    if( i != 0 )
+                        oss << " and ";
+                    oss << m_matchers[i]->toString();
+                }
+                oss << " )";
+                return oss.str();
+            }
+
+        private:
+            std::vector<Ptr<Matcher<ExpressionT> > > m_matchers;
+        };
+
+        template<typename ExpressionT>
+        class AnyOf : public MatcherImpl<AnyOf<ExpressionT>, ExpressionT> {
+        public:
+
+            AnyOf() {}
+            AnyOf( AnyOf const& other ) : m_matchers( other.m_matchers ) {}
+
+            AnyOf& add( Matcher<ExpressionT> const& matcher ) {
+                m_matchers.push_back( matcher.clone() );
+                return *this;
+            }
+            virtual bool match( ExpressionT const& expr ) const
+            {
+                for( std::size_t i = 0; i < m_matchers.size(); ++i )
+                    if( m_matchers[i]->match( expr ) )
+                        return true;
+                return false;
+            }
+            virtual std::string toString() const {
+                std::ostringstream oss;
+                oss << "( ";
+                for( std::size_t i = 0; i < m_matchers.size(); ++i ) {
+                    if( i != 0 )
+                        oss << " or ";
+                    oss << m_matchers[i]->toString();
+                }
+                oss << " )";
+                return oss.str();
+            }
+
+        private:
+            std::vector<Ptr<Matcher<ExpressionT> > > m_matchers;
+        };
+
+    }
+
+    namespace StdString {
+
+        inline std::string makeString( std::string const& str ) { return str; }
+        inline std::string makeString( const char* str ) { return str ? std::string( str ) : std::string(); }
+
+        struct Equals : MatcherImpl<Equals, std::string> {
+            Equals( std::string const& str ) : m_str( str ){}
+            Equals( Equals const& other ) : m_str( other.m_str ){}
+
+            virtual ~Equals();
+
+            virtual bool match( std::string const& expr ) const {
+                return m_str == expr;
+            }
+            virtual std::string toString() const {
+                return "equals: \"" + m_str + "\"";
+            }
+
+            std::string m_str;
+        };
+
+        struct Contains : MatcherImpl<Contains, std::string> {
+            Contains( std::string const& substr ) : m_substr( substr ){}
+            Contains( Contains const& other ) : m_substr( other.m_substr ){}
+
+            virtual ~Contains();
+
+            virtual bool match( std::string const& expr ) const {
+                return expr.find( m_substr ) != std::string::npos;
+            }
+            virtual std::string toString() const {
+                return "contains: \"" + m_substr + "\"";
+            }
+
+            std::string m_substr;
+        };
+
+        struct StartsWith : MatcherImpl<StartsWith, std::string> {
+            StartsWith( std::string const& substr ) : m_substr( substr ){}
+            StartsWith( StartsWith const& other ) : m_substr( other.m_substr ){}
+
+            virtual ~StartsWith();
+
+            virtual bool match( std::string const& expr ) const {
+                return expr.find( m_substr ) == 0;
+            }
+            virtual std::string toString() const {
+                return "starts with: \"" + m_substr + "\"";
+            }
+
+            std::string m_substr;
+        };
+
+        struct EndsWith : MatcherImpl<EndsWith, std::string> {
+            EndsWith( std::string const& substr ) : m_substr( substr ){}
+            EndsWith( EndsWith const& other ) : m_substr( other.m_substr ){}
+
+            virtual ~EndsWith();
+
+            virtual bool match( std::string const& expr ) const {
+                return expr.find( m_substr ) == expr.size() - m_substr.size();
+            }
+            virtual std::string toString() const {
+                return "ends with: \"" + m_substr + "\"";
+            }
+
+            std::string m_substr;
+        };
+    } // namespace StdString
+    } // namespace Impl
+
+    // The following functions create the actual matcher objects.
+    // This allows the types to be inferred
+    template<typename ExpressionT>
+    inline Impl::Generic::AllOf<ExpressionT> AllOf( Impl::Matcher<ExpressionT> const& m1,
+                                                    Impl::Matcher<ExpressionT> const& m2 ) {
+        return Impl::Generic::AllOf<ExpressionT>().add( m1 ).add( m2 );
+    }
+    template<typename ExpressionT>
+    inline Impl::Generic::AllOf<ExpressionT> AllOf( Impl::Matcher<ExpressionT> const& m1,
+                                                    Impl::Matcher<ExpressionT> const& m2,
+                                                    Impl::Matcher<ExpressionT> const& m3 ) {
+        return Impl::Generic::AllOf<ExpressionT>().add( m1 ).add( m2 ).add( m3 );
+    }
+    template<typename ExpressionT>
+    inline Impl::Generic::AnyOf<ExpressionT> AnyOf( Impl::Matcher<ExpressionT> const& m1,
+                                                    Impl::Matcher<ExpressionT> const& m2 ) {
+        return Impl::Generic::AnyOf<ExpressionT>().add( m1 ).add( m2 );
+    }
+    template<typename ExpressionT>
+    inline Impl::Generic::AnyOf<ExpressionT> AnyOf( Impl::Matcher<ExpressionT> const& m1,
+                                                    Impl::Matcher<ExpressionT> const& m2,
+                                                    Impl::Matcher<ExpressionT> const& m3 ) {
+        return Impl::Generic::AnyOf<ExpressionT>().add( m1 ).add( m2 ).add( m3 );
+    }
+
+    inline Impl::StdString::Equals      Equals( std::string const& str ) {
+        return Impl::StdString::Equals( str );
+    }
+    inline Impl::StdString::Equals      Equals( const char* str ) {
+        return Impl::StdString::Equals( Impl::StdString::makeString( str ) );
+    }
+    inline Impl::StdString::Contains    Contains( std::string const& substr ) {
+        return Impl::StdString::Contains( substr );
+    }
+    inline Impl::StdString::Contains    Contains( const char* substr ) {
+        return Impl::StdString::Contains( Impl::StdString::makeString( substr ) );
+    }
+    inline Impl::StdString::StartsWith  StartsWith( std::string const& substr ) {
+        return Impl::StdString::StartsWith( substr );
+    }
+    inline Impl::StdString::StartsWith  StartsWith( const char* substr ) {
+        return Impl::StdString::StartsWith( Impl::StdString::makeString( substr ) );
+    }
+    inline Impl::StdString::EndsWith    EndsWith( std::string const& substr ) {
+        return Impl::StdString::EndsWith( substr );
+    }
+    inline Impl::StdString::EndsWith    EndsWith( const char* substr ) {
+        return Impl::StdString::EndsWith( Impl::StdString::makeString( substr ) );
+    }
+
+} // namespace Matchers
+
+using namespace Matchers;
+
+} // namespace Catch
+
+// #included from: internal/catch_interfaces_tag_alias_registry.h
+#define TWOBLUECUBES_CATCH_INTERFACES_TAG_ALIAS_REGISTRY_H_INCLUDED
+
+// #included from: catch_tag_alias.h
+#define TWOBLUECUBES_CATCH_TAG_ALIAS_H_INCLUDED
+
+#include <string>
+
+namespace Catch {
+
+    struct TagAlias {
+        TagAlias( std::string _tag, SourceLineInfo _lineInfo ) : tag( _tag ), lineInfo( _lineInfo ) {}
+
+        std::string tag;
+        SourceLineInfo lineInfo;
+    };
+
+    struct RegistrarForTagAliases {
+        RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo );
+    };
+
+} // end namespace Catch
+
+#define CATCH_REGISTER_TAG_ALIAS( alias, spec ) namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); }
+// #included from: catch_option.hpp
+#define TWOBLUECUBES_CATCH_OPTION_HPP_INCLUDED
+
+namespace Catch {
+
+    // An optional type
+    template<typename T>
+    class Option {
+    public:
+        Option() : nullableValue( NULL ) {}
+        Option( T const& _value )
+        : nullableValue( new( storage ) T( _value ) )
+        {}
+        Option( Option const& _other )
+        : nullableValue( _other ? new( storage ) T( *_other ) : NULL )
+        {}
+
+        ~Option() {
+            reset();
+        }
+
+        Option& operator= ( Option const& _other ) {
+            if( &_other != this ) {
+                reset();
+                if( _other )
+                    nullableValue = new( storage ) T( *_other );
+            }
+            return *this;
+        }
+        Option& operator = ( T const& _value ) {
+            reset();
+            nullableValue = new( storage ) T( _value );
+            return *this;
+        }
+
+        void reset() {
+            if( nullableValue )
+                nullableValue->~T();
+            nullableValue = NULL;
+        }
+
+        T& operator*() { return *nullableValue; }
+        T const& operator*() const { return *nullableValue; }
+        T* operator->() { return nullableValue; }
+        const T* operator->() const { return nullableValue; }
+
+        T valueOr( T const& defaultValue ) const {
+            return nullableValue ? *nullableValue : defaultValue;
+        }
+
+        bool some() const { return nullableValue != NULL; }
+        bool none() const { return nullableValue == NULL; }
+
+        bool operator !() const { return nullableValue == NULL; }
+        operator SafeBool::type() const {
+            return SafeBool::makeSafe( some() );
+        }
+
+    private:
+        T* nullableValue;
+        char storage[sizeof(T)];
+    };
+
+} // end namespace Catch
+
+namespace Catch {
+
+    struct ITagAliasRegistry {
+        virtual ~ITagAliasRegistry();
+        virtual Option<TagAlias> find( std::string const& alias ) const = 0;
+        virtual std::string expandAliases( std::string const& unexpandedTestSpec ) const = 0;
+
+        static ITagAliasRegistry const& get();
+    };
+
+} // end namespace Catch
+
+// These files are included here so the single_include script doesn't put them
+// in the conditionally compiled sections
+// #included from: internal/catch_test_case_info.h
+#define TWOBLUECUBES_CATCH_TEST_CASE_INFO_H_INCLUDED
+
+#include <string>
+#include <set>
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+namespace Catch {
+
+    struct ITestCase;
+
+    struct TestCaseInfo {
+        enum SpecialProperties{
+            None = 0,
+            IsHidden = 1 << 1,
+            ShouldFail = 1 << 2,
+            MayFail = 1 << 3,
+            Throws = 1 << 4
+        };
+
+        TestCaseInfo(   std::string const& _name,
+                        std::string const& _className,
+                        std::string const& _description,
+                        std::set<std::string> const& _tags,
+                        SourceLineInfo const& _lineInfo );
+
+        TestCaseInfo( TestCaseInfo const& other );
+
+        bool isHidden() const;
+        bool throws() const;
+        bool okToFail() const;
+        bool expectedToFail() const;
+
+        std::string name;
+        std::string className;
+        std::string description;
+        std::set<std::string> tags;
+        std::set<std::string> lcaseTags;
+        std::string tagsAsString;
+        SourceLineInfo lineInfo;
+        SpecialProperties properties;
+    };
+
+    class TestCase : public TestCaseInfo {
+    public:
+
+        TestCase( ITestCase* testCase, TestCaseInfo const& info );
+        TestCase( TestCase const& other );
+
+        TestCase withName( std::string const& _newName ) const;
+
+        void invoke() const;
+
+        TestCaseInfo const& getTestCaseInfo() const;
+
+        void swap( TestCase& other );
+        bool operator == ( TestCase const& other ) const;
+        bool operator < ( TestCase const& other ) const;
+        TestCase& operator = ( TestCase const& other );
+
+    private:
+        Ptr<ITestCase> test;
+    };
+
+    TestCase makeTestCase(  ITestCase* testCase,
+                            std::string const& className,
+                            std::string const& name,
+                            std::string const& description,
+                            SourceLineInfo const& lineInfo );
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+
+#ifdef __OBJC__
+// #included from: internal/catch_objc.hpp
+#define TWOBLUECUBES_CATCH_OBJC_HPP_INCLUDED
+
+#import <objc/runtime.h>
+
+#include <string>
+
+// NB. Any general catch headers included here must be included
+// in catch.hpp first to make sure they are included by the single
+// header for non obj-usage
+
+///////////////////////////////////////////////////////////////////////////////
+// This protocol is really only here for (self) documenting purposes, since
+// all its methods are optional.
+ at protocol OcFixture
+
+ at optional
+
+-(void) setUp;
+-(void) tearDown;
+
+ at end
+
+namespace Catch {
+
+    class OcMethod : public SharedImpl<ITestCase> {
+
+    public:
+        OcMethod( Class cls, SEL sel ) : m_cls( cls ), m_sel( sel ) {}
+
+        virtual void invoke() const {
+            id obj = [[m_cls alloc] init];
+
+            performOptionalSelector( obj, @selector(setUp)  );
+            performOptionalSelector( obj, m_sel );
+            performOptionalSelector( obj, @selector(tearDown)  );
+
+            arcSafeRelease( obj );
+        }
+    private:
+        virtual ~OcMethod() {}
+
+        Class m_cls;
+        SEL m_sel;
+    };
+
+    namespace Detail{
+
+        inline std::string getAnnotation(   Class cls,
+                                            std::string const& annotationName,
+                                            std::string const& testCaseName ) {
+            NSString* selStr = [[NSString alloc] initWithFormat:@"Catch_%s_%s", annotationName.c_str(), testCaseName.c_str()];
+            SEL sel = NSSelectorFromString( selStr );
+            arcSafeRelease( selStr );
+            id value = performOptionalSelector( cls, sel );
+            if( value )
+                return [(NSString*)value UTF8String];
+            return "";
+        }
+    }
+
+    inline size_t registerTestMethods() {
+        size_t noTestMethods = 0;
+        int noClasses = objc_getClassList( NULL, 0 );
+
+        Class* classes = (CATCH_UNSAFE_UNRETAINED Class *)malloc( sizeof(Class) * noClasses);
+        objc_getClassList( classes, noClasses );
+
+        for( int c = 0; c < noClasses; c++ ) {
+            Class cls = classes[c];
+            {
+                u_int count;
+                Method* methods = class_copyMethodList( cls, &count );
+                for( u_int m = 0; m < count ; m++ ) {
+                    SEL selector = method_getName(methods[m]);
+                    std::string methodName = sel_getName(selector);
+                    if( startsWith( methodName, "Catch_TestCase_" ) ) {
+                        std::string testCaseName = methodName.substr( 15 );
+                        std::string name = Detail::getAnnotation( cls, "Name", testCaseName );
+                        std::string desc = Detail::getAnnotation( cls, "Description", testCaseName );
+                        const char* className = class_getName( cls );
+
+                        getMutableRegistryHub().registerTest( makeTestCase( new OcMethod( cls, selector ), className, name.c_str(), desc.c_str(), SourceLineInfo() ) );
+                        noTestMethods++;
+                    }
+                }
+                free(methods);
+            }
+        }
+        return noTestMethods;
+    }
+
+    namespace Matchers {
+        namespace Impl {
+        namespace NSStringMatchers {
+
+            template<typename MatcherT>
+            struct StringHolder : MatcherImpl<MatcherT, NSString*>{
+                StringHolder( NSString* substr ) : m_substr( [substr copy] ){}
+                StringHolder( StringHolder const& other ) : m_substr( [other.m_substr copy] ){}
+                StringHolder() {
+                    arcSafeRelease( m_substr );
+                }
+
+                NSString* m_substr;
+            };
+
+            struct Equals : StringHolder<Equals> {
+                Equals( NSString* substr ) : StringHolder( substr ){}
+
+                virtual bool match( ExpressionType const& str ) const {
+                    return  (str != nil || m_substr == nil ) &&
+                            [str isEqualToString:m_substr];
+                }
+
+                virtual std::string toString() const {
+                    return "equals string: " + Catch::toString( m_substr );
+                }
+            };
+
+            struct Contains : StringHolder<Contains> {
+                Contains( NSString* substr ) : StringHolder( substr ){}
+
+                virtual bool match( ExpressionType const& str ) const {
+                    return  (str != nil || m_substr == nil ) &&
+                            [str rangeOfString:m_substr].location != NSNotFound;
+                }
+
+                virtual std::string toString() const {
+                    return "contains string: " + Catch::toString( m_substr );
+                }
+            };
+
+            struct StartsWith : StringHolder<StartsWith> {
+                StartsWith( NSString* substr ) : StringHolder( substr ){}
+
+                virtual bool match( ExpressionType const& str ) const {
+                    return  (str != nil || m_substr == nil ) &&
+                            [str rangeOfString:m_substr].location == 0;
+                }
+
+                virtual std::string toString() const {
+                    return "starts with: " + Catch::toString( m_substr );
+                }
+            };
+            struct EndsWith : StringHolder<EndsWith> {
+                EndsWith( NSString* substr ) : StringHolder( substr ){}
+
+                virtual bool match( ExpressionType const& str ) const {
+                    return  (str != nil || m_substr == nil ) &&
+                            [str rangeOfString:m_substr].location == [str length] - [m_substr length];
+                }
+
+                virtual std::string toString() const {
+                    return "ends with: " + Catch::toString( m_substr );
+                }
+            };
+
+        } // namespace NSStringMatchers
+        } // namespace Impl
+
+        inline Impl::NSStringMatchers::Equals
+            Equals( NSString* substr ){ return Impl::NSStringMatchers::Equals( substr ); }
+
+        inline Impl::NSStringMatchers::Contains
+            Contains( NSString* substr ){ return Impl::NSStringMatchers::Contains( substr ); }
+
+        inline Impl::NSStringMatchers::StartsWith
+            StartsWith( NSString* substr ){ return Impl::NSStringMatchers::StartsWith( substr ); }
+
+        inline Impl::NSStringMatchers::EndsWith
+            EndsWith( NSString* substr ){ return Impl::NSStringMatchers::EndsWith( substr ); }
+
+    } // namespace Matchers
+
+    using namespace Matchers;
+
+} // namespace Catch
+
+///////////////////////////////////////////////////////////////////////////////
+#define OC_TEST_CASE( name, desc )\
++(NSString*) INTERNAL_CATCH_UNIQUE_NAME( Catch_Name_test ) \
+{\
+return @ name; \
+}\
++(NSString*) INTERNAL_CATCH_UNIQUE_NAME( Catch_Description_test ) \
+{ \
+return @ desc; \
+} \
+-(void) INTERNAL_CATCH_UNIQUE_NAME( Catch_TestCase_test )
+
+#endif
+
+#ifdef CATCH_IMPL
+// #included from: internal/catch_impl.hpp
+#define TWOBLUECUBES_CATCH_IMPL_HPP_INCLUDED
+
+// Collect all the implementation files together here
+// These are the equivalent of what would usually be cpp files
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wweak-vtables"
+#endif
+
+// #included from: ../catch_runner.hpp
+#define TWOBLUECUBES_CATCH_RUNNER_HPP_INCLUDED
+
+// #included from: internal/catch_commandline.hpp
+#define TWOBLUECUBES_CATCH_COMMANDLINE_HPP_INCLUDED
+
+// #included from: catch_config.hpp
+#define TWOBLUECUBES_CATCH_CONFIG_HPP_INCLUDED
+
+// #included from: catch_test_spec_parser.hpp
+#define TWOBLUECUBES_CATCH_TEST_SPEC_PARSER_HPP_INCLUDED
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+// #included from: catch_test_spec.hpp
+#define TWOBLUECUBES_CATCH_TEST_SPEC_HPP_INCLUDED
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+#include <string>
+#include <vector>
+
+namespace Catch {
+
+    class TestSpec {
+        struct Pattern : SharedImpl<> {
+            virtual ~Pattern();
+            virtual bool matches( TestCaseInfo const& testCase ) const = 0;
+        };
+        class NamePattern : public Pattern {
+            enum WildcardPosition {
+                NoWildcard = 0,
+                WildcardAtStart = 1,
+                WildcardAtEnd = 2,
+                WildcardAtBothEnds = WildcardAtStart | WildcardAtEnd
+            };
+
+        public:
+            NamePattern( std::string const& name ) : m_name( toLower( name ) ), m_wildcard( NoWildcard ) {
+                if( startsWith( m_name, "*" ) ) {
+                    m_name = m_name.substr( 1 );
+                    m_wildcard = WildcardAtStart;
+                }
+                if( endsWith( m_name, "*" ) ) {
+                    m_name = m_name.substr( 0, m_name.size()-1 );
+                    m_wildcard = static_cast<WildcardPosition>( m_wildcard | WildcardAtEnd );
+                }
+            }
+            virtual ~NamePattern();
+            virtual bool matches( TestCaseInfo const& testCase ) const {
+                switch( m_wildcard ) {
+                    case NoWildcard:
+                        return m_name == toLower( testCase.name );
+                    case WildcardAtStart:
+                        return endsWith( toLower( testCase.name ), m_name );
+                    case WildcardAtEnd:
+                        return startsWith( toLower( testCase.name ), m_name );
+                    case WildcardAtBothEnds:
+                        return contains( toLower( testCase.name ), m_name );
+                }
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code"
+#endif
+                throw std::logic_error( "Unknown enum" );
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+            }
+        private:
+            std::string m_name;
+            WildcardPosition m_wildcard;
+        };
+        class TagPattern : public Pattern {
+        public:
+            TagPattern( std::string const& tag ) : m_tag( toLower( tag ) ) {}
+            virtual ~TagPattern();
+            virtual bool matches( TestCaseInfo const& testCase ) const {
+                return testCase.lcaseTags.find( m_tag ) != testCase.lcaseTags.end();
+            }
+        private:
+            std::string m_tag;
+        };
+        class ExcludedPattern : public Pattern {
+        public:
+            ExcludedPattern( Ptr<Pattern> const& underlyingPattern ) : m_underlyingPattern( underlyingPattern ) {}
+            virtual ~ExcludedPattern();
+            virtual bool matches( TestCaseInfo const& testCase ) const { return !m_underlyingPattern->matches( testCase ); }
+        private:
+            Ptr<Pattern> m_underlyingPattern;
+        };
+
+        struct Filter {
+            std::vector<Ptr<Pattern> > m_patterns;
+
+            bool matches( TestCaseInfo const& testCase ) const {
+                // All patterns in a filter must match for the filter to be a match
+                for( std::vector<Ptr<Pattern> >::const_iterator it = m_patterns.begin(), itEnd = m_patterns.end(); it != itEnd; ++it )
+                    if( !(*it)->matches( testCase ) )
+                        return false;
+                    return true;
+            }
+        };
+
+    public:
+        bool hasFilters() const {
+            return !m_filters.empty();
+        }
+        bool matches( TestCaseInfo const& testCase ) const {
+            // A TestSpec matches if any filter matches
+            for( std::vector<Filter>::const_iterator it = m_filters.begin(), itEnd = m_filters.end(); it != itEnd; ++it )
+                if( it->matches( testCase ) )
+                    return true;
+            return false;
+        }
+
+    private:
+        std::vector<Filter> m_filters;
+
+        friend class TestSpecParser;
+    };
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+namespace Catch {
+
+    class TestSpecParser {
+        enum Mode{ None, Name, QuotedName, Tag };
+        Mode m_mode;
+        bool m_exclusion;
+        std::size_t m_start, m_pos;
+        std::string m_arg;
+        TestSpec::Filter m_currentFilter;
+        TestSpec m_testSpec;
+        ITagAliasRegistry const* m_tagAliases;
+
+    public:
+        TestSpecParser( ITagAliasRegistry const& tagAliases ) : m_tagAliases( &tagAliases ) {}
+
+        TestSpecParser& parse( std::string const& arg ) {
+            m_mode = None;
+            m_exclusion = false;
+            m_start = std::string::npos;
+            m_arg = m_tagAliases->expandAliases( arg );
+            for( m_pos = 0; m_pos < m_arg.size(); ++m_pos )
+                visitChar( m_arg[m_pos] );
+            if( m_mode == Name )
+                addPattern<TestSpec::NamePattern>();
+            return *this;
+        }
+        TestSpec testSpec() {
+            addFilter();
+            return m_testSpec;
+        }
+    private:
+        void visitChar( char c ) {
+            if( m_mode == None ) {
+                switch( c ) {
+                case ' ': return;
+                case '~': m_exclusion = true; return;
+                case '[': return startNewMode( Tag, ++m_pos );
+                case '"': return startNewMode( QuotedName, ++m_pos );
+                default: startNewMode( Name, m_pos ); break;
+                }
+            }
+            if( m_mode == Name ) {
+                if( c == ',' ) {
+                    addPattern<TestSpec::NamePattern>();
+                    addFilter();
+                }
+                else if( c == '[' ) {
+                    if( subString() == "exclude:" )
+                        m_exclusion = true;
+                    else
+                        addPattern<TestSpec::NamePattern>();
+                    startNewMode( Tag, ++m_pos );
+                }
+            }
+            else if( m_mode == QuotedName && c == '"' )
+                addPattern<TestSpec::NamePattern>();
+            else if( m_mode == Tag && c == ']' )
+                addPattern<TestSpec::TagPattern>();
+        }
+        void startNewMode( Mode mode, std::size_t start ) {
+            m_mode = mode;
+            m_start = start;
+        }
+        std::string subString() const { return m_arg.substr( m_start, m_pos - m_start ); }
+        template<typename T>
+        void addPattern() {
+            std::string token = subString();
+            if( startsWith( token, "exclude:" ) ) {
+                m_exclusion = true;
+                token = token.substr( 8 );
+            }
+            if( !token.empty() ) {
+                Ptr<TestSpec::Pattern> pattern = new T( token );
+                if( m_exclusion )
+                    pattern = new TestSpec::ExcludedPattern( pattern );
+                m_currentFilter.m_patterns.push_back( pattern );
+            }
+            m_exclusion = false;
+            m_mode = None;
+        }
+        void addFilter() {
+            if( !m_currentFilter.m_patterns.empty() ) {
+                m_testSpec.m_filters.push_back( m_currentFilter );
+                m_currentFilter = TestSpec::Filter();
+            }
+        }
+    };
+    inline TestSpec parseTestSpec( std::string const& arg ) {
+        return TestSpecParser( ITagAliasRegistry::get() ).parse( arg ).testSpec();
+    }
+
+} // namespace Catch
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+// #included from: catch_interfaces_config.h
+#define TWOBLUECUBES_CATCH_INTERFACES_CONFIG_H_INCLUDED
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace Catch {
+
+    struct Verbosity { enum Level {
+        NoOutput = 0,
+        Quiet,
+        Normal
+    }; };
+
+    struct WarnAbout { enum What {
+        Nothing = 0x00,
+        NoAssertions = 0x01
+    }; };
+
+    struct ShowDurations { enum OrNot {
+        DefaultForReporter,
+        Always,
+        Never
+    }; };
+    struct RunTests { enum InWhatOrder {
+        InDeclarationOrder,
+        InLexicographicalOrder,
+        InRandomOrder
+    }; };
+
+    class TestSpec;
+
+    struct IConfig : IShared {
+
+        virtual ~IConfig();
+
+        virtual bool allowThrows() const = 0;
+        virtual std::ostream& stream() const = 0;
+        virtual std::string name() const = 0;
+        virtual bool includeSuccessfulResults() const = 0;
+        virtual bool shouldDebugBreak() const = 0;
+        virtual bool warnAboutMissingAssertions() const = 0;
+        virtual int abortAfter() const = 0;
+        virtual bool showInvisibles() const = 0;
+        virtual ShowDurations::OrNot showDurations() const = 0;
+        virtual TestSpec const& testSpec() const = 0;
+        virtual RunTests::InWhatOrder runOrder() const = 0;
+        virtual unsigned int rngSeed() const = 0;
+        virtual bool forceColour() const = 0;
+    };
+}
+
+// #included from: catch_stream.h
+#define TWOBLUECUBES_CATCH_STREAM_H_INCLUDED
+
+#include <streambuf>
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+namespace Catch {
+
+    class Stream {
+    public:
+        Stream();
+        Stream( std::streambuf* _streamBuf, bool _isOwned );
+        void release();
+
+        std::streambuf* streamBuf;
+
+    private:
+        bool isOwned;
+    };
+
+    std::ostream& cout();
+    std::ostream& cerr();
+}
+
+#include <memory>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <ctime>
+
+#ifndef CATCH_CONFIG_CONSOLE_WIDTH
+#define CATCH_CONFIG_CONSOLE_WIDTH 80
+#endif
+
+namespace Catch {
+
+    struct ConfigData {
+
+        ConfigData()
+        :   listTests( false ),
+            listTags( false ),
+            listReporters( false ),
+            listTestNamesOnly( false ),
+            showSuccessfulTests( false ),
+            shouldDebugBreak( false ),
+            noThrow( false ),
+            showHelp( false ),
+            showInvisibles( false ),
+            forceColour( false ),
+            abortAfter( -1 ),
+            rngSeed( 0 ),
+            verbosity( Verbosity::Normal ),
+            warnings( WarnAbout::Nothing ),
+            showDurations( ShowDurations::DefaultForReporter ),
+            runOrder( RunTests::InDeclarationOrder )
+        {}
+
+        bool listTests;
+        bool listTags;
+        bool listReporters;
+        bool listTestNamesOnly;
+
+        bool showSuccessfulTests;
+        bool shouldDebugBreak;
+        bool noThrow;
+        bool showHelp;
+        bool showInvisibles;
+        bool forceColour;
+
+        int abortAfter;
+        unsigned int rngSeed;
+
+        Verbosity::Level verbosity;
+        WarnAbout::What warnings;
+        ShowDurations::OrNot showDurations;
+        RunTests::InWhatOrder runOrder;
+
+        std::string reporterName;
+        std::string outputFilename;
+        std::string name;
+        std::string processName;
+
+        std::vector<std::string> testsOrTags;
+    };
+
+    class Config : public SharedImpl<IConfig> {
+    private:
+        Config( Config const& other );
+        Config& operator = ( Config const& other );
+        virtual void dummy();
+    public:
+
+        Config()
+        :   m_os( Catch::cout().rdbuf() )
+        {}
+
+        Config( ConfigData const& data )
+        :   m_data( data ),
+            m_os( Catch::cout().rdbuf() )
+        {
+            if( !data.testsOrTags.empty() ) {
+                TestSpecParser parser( ITagAliasRegistry::get() );
+                for( std::size_t i = 0; i < data.testsOrTags.size(); ++i )
+                    parser.parse( data.testsOrTags[i] );
+                m_testSpec = parser.testSpec();
+            }
+        }
+
+        virtual ~Config() {
+            m_os.rdbuf( Catch::cout().rdbuf() );
+            m_stream.release();
+        }
+
+        void setFilename( std::string const& filename ) {
+            m_data.outputFilename = filename;
+        }
+
+        std::string const& getFilename() const {
+            return m_data.outputFilename ;
+        }
+
+        bool listTests() const { return m_data.listTests; }
+        bool listTestNamesOnly() const { return m_data.listTestNamesOnly; }
+        bool listTags() const { return m_data.listTags; }
+        bool listReporters() const { return m_data.listReporters; }
+
+        std::string getProcessName() const { return m_data.processName; }
+
+        bool shouldDebugBreak() const { return m_data.shouldDebugBreak; }
+
+        void setStreamBuf( std::streambuf* buf ) {
+            m_os.rdbuf( buf ? buf : Catch::cout().rdbuf() );
+        }
+
+        void useStream( std::string const& streamName ) {
+            Stream stream = createStream( streamName );
+            setStreamBuf( stream.streamBuf );
+            m_stream.release();
+            m_stream = stream;
+        }
+
+        std::string getReporterName() const { return m_data.reporterName; }
+
+        int abortAfter() const { return m_data.abortAfter; }
+
+        TestSpec const& testSpec() const { return m_testSpec; }
+
+        bool showHelp() const { return m_data.showHelp; }
+        bool showInvisibles() const { return m_data.showInvisibles; }
+
+        // IConfig interface
+        virtual bool allowThrows() const        { return !m_data.noThrow; }
+        virtual std::ostream& stream() const    { return m_os; }
+        virtual std::string name() const        { return m_data.name.empty() ? m_data.processName : m_data.name; }
+        virtual bool includeSuccessfulResults() const   { return m_data.showSuccessfulTests; }
+        virtual bool warnAboutMissingAssertions() const { return m_data.warnings & WarnAbout::NoAssertions; }
+        virtual ShowDurations::OrNot showDurations() const { return m_data.showDurations; }
+        virtual RunTests::InWhatOrder runOrder() const  { return m_data.runOrder; }
+        virtual unsigned int rngSeed() const    { return m_data.rngSeed; }
+        virtual bool forceColour() const { return m_data.forceColour; }
+
+    private:
+        ConfigData m_data;
+
+        Stream m_stream;
+        mutable std::ostream m_os;
+        TestSpec m_testSpec;
+    };
+
+} // end namespace Catch
+
+// #included from: catch_clara.h
+#define TWOBLUECUBES_CATCH_CLARA_H_INCLUDED
+
+// Use Catch's value for console width (store Clara's off to the side, if present)
+#ifdef CLARA_CONFIG_CONSOLE_WIDTH
+#define CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH CLARA_CONFIG_CONSOLE_WIDTH
+#undef CLARA_CONFIG_CONSOLE_WIDTH
+#endif
+#define CLARA_CONFIG_CONSOLE_WIDTH CATCH_CONFIG_CONSOLE_WIDTH
+
+// Declare Clara inside the Catch namespace
+#define STITCH_CLARA_OPEN_NAMESPACE namespace Catch {
+// #included from: ../external/clara.h
+
+// Only use header guard if we are not using an outer namespace
+#if !defined(TWOBLUECUBES_CLARA_H_INCLUDED) || defined(STITCH_CLARA_OPEN_NAMESPACE)
+
+#ifndef STITCH_CLARA_OPEN_NAMESPACE
+#define TWOBLUECUBES_CLARA_H_INCLUDED
+#define STITCH_CLARA_OPEN_NAMESPACE
+#define STITCH_CLARA_CLOSE_NAMESPACE
+#else
+#define STITCH_CLARA_CLOSE_NAMESPACE }
+#endif
+
+#define STITCH_TBC_TEXT_FORMAT_OPEN_NAMESPACE STITCH_CLARA_OPEN_NAMESPACE
+
+// ----------- #included from tbc_text_format.h -----------
+
+// Only use header guard if we are not using an outer namespace
+#if !defined(TBC_TEXT_FORMAT_H_INCLUDED) || defined(STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE)
+#ifndef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+#define TBC_TEXT_FORMAT_H_INCLUDED
+#endif
+
+#include <string>
+#include <vector>
+#include <sstream>
+
+// Use optional outer namespace
+#ifdef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+namespace STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE {
+#endif
+
+namespace Tbc {
+
+#ifdef TBC_TEXT_FORMAT_CONSOLE_WIDTH
+    const unsigned int consoleWidth = TBC_TEXT_FORMAT_CONSOLE_WIDTH;
+#else
+    const unsigned int consoleWidth = 80;
+#endif
+
+    struct TextAttributes {
+        TextAttributes()
+        :   initialIndent( std::string::npos ),
+            indent( 0 ),
+            width( consoleWidth-1 ),
+            tabChar( '\t' )
+        {}
+
+        TextAttributes& setInitialIndent( std::size_t _value )  { initialIndent = _value; return *this; }
+        TextAttributes& setIndent( std::size_t _value )         { indent = _value; return *this; }
+        TextAttributes& setWidth( std::size_t _value )          { width = _value; return *this; }
+        TextAttributes& setTabChar( char _value )               { tabChar = _value; return *this; }
+
+        std::size_t initialIndent;  // indent of first line, or npos
+        std::size_t indent;         // indent of subsequent lines, or all if initialIndent is npos
+        std::size_t width;          // maximum width of text, including indent. Longer text will wrap
+        char tabChar;               // If this char is seen the indent is changed to current pos
+    };
+
+    class Text {
+    public:
+        Text( std::string const& _str, TextAttributes const& _attr = TextAttributes() )
+        : attr( _attr )
+        {
+            std::string wrappableChars = " [({.,/|\\-";
+            std::size_t indent = _attr.initialIndent != std::string::npos
+                ? _attr.initialIndent
+                : _attr.indent;
+            std::string remainder = _str;
+
+            while( !remainder.empty() ) {
+                if( lines.size() >= 1000 ) {
+                    lines.push_back( "... message truncated due to excessive size" );
+                    return;
+                }
+                std::size_t tabPos = std::string::npos;
+                std::size_t width = (std::min)( remainder.size(), _attr.width - indent );
+                std::size_t pos = remainder.find_first_of( '\n' );
+                if( pos <= width ) {
+                    width = pos;
+                }
+                pos = remainder.find_last_of( _attr.tabChar, width );
+                if( pos != std::string::npos ) {
+                    tabPos = pos;
+                    if( remainder[width] == '\n' )
+                        width--;
+                    remainder = remainder.substr( 0, tabPos ) + remainder.substr( tabPos+1 );
+                }
+
+                if( width == remainder.size() ) {
+                    spliceLine( indent, remainder, width );
+                }
+                else if( remainder[width] == '\n' ) {
+                    spliceLine( indent, remainder, width );
+                    if( width <= 1 || remainder.size() != 1 )
+                        remainder = remainder.substr( 1 );
+                    indent = _attr.indent;
+                }
+                else {
+                    pos = remainder.find_last_of( wrappableChars, width );
+                    if( pos != std::string::npos && pos > 0 ) {
+                        spliceLine( indent, remainder, pos );
+                        if( remainder[0] == ' ' )
+                            remainder = remainder.substr( 1 );
+                    }
+                    else {
+                        spliceLine( indent, remainder, width-1 );
+                        lines.back() += "-";
+                    }
+                    if( lines.size() == 1 )
+                        indent = _attr.indent;
+                    if( tabPos != std::string::npos )
+                        indent += tabPos;
+                }
+            }
+        }
+
+        void spliceLine( std::size_t _indent, std::string& _remainder, std::size_t _pos ) {
+            lines.push_back( std::string( _indent, ' ' ) + _remainder.substr( 0, _pos ) );
+            _remainder = _remainder.substr( _pos );
+        }
+
+        typedef std::vector<std::string>::const_iterator const_iterator;
+
+        const_iterator begin() const { return lines.begin(); }
+        const_iterator end() const { return lines.end(); }
+        std::string const& last() const { return lines.back(); }
+        std::size_t size() const { return lines.size(); }
+        std::string const& operator[]( std::size_t _index ) const { return lines[_index]; }
+        std::string toString() const {
+            std::ostringstream oss;
+            oss << *this;
+            return oss.str();
+        }
+
+        inline friend std::ostream& operator << ( std::ostream& _stream, Text const& _text ) {
+            for( Text::const_iterator it = _text.begin(), itEnd = _text.end();
+                it != itEnd; ++it ) {
+                if( it != _text.begin() )
+                    _stream << "\n";
+                _stream << *it;
+            }
+            return _stream;
+        }
+
+    private:
+        std::string str;
+        TextAttributes attr;
+        std::vector<std::string> lines;
+    };
+
+} // end namespace Tbc
+
+#ifdef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+} // end outer namespace
+#endif
+
+#endif // TBC_TEXT_FORMAT_H_INCLUDED
+
+// ----------- end of #include from tbc_text_format.h -----------
+// ........... back in /Users/philnash/Dev/OSS/Clara/srcs/clara.h
+
+#undef STITCH_TBC_TEXT_FORMAT_OPEN_NAMESPACE
+
+#include <map>
+#include <algorithm>
+#include <stdexcept>
+#include <memory>
+
+// Use optional outer namespace
+#ifdef STITCH_CLARA_OPEN_NAMESPACE
+STITCH_CLARA_OPEN_NAMESPACE
+#endif
+
+namespace Clara {
+
+    struct UnpositionalTag {};
+
+    extern UnpositionalTag _;
+
+#ifdef CLARA_CONFIG_MAIN
+    UnpositionalTag _;
+#endif
+
+    namespace Detail {
+
+#ifdef CLARA_CONSOLE_WIDTH
+    const unsigned int consoleWidth = CLARA_CONFIG_CONSOLE_WIDTH;
+#else
+    const unsigned int consoleWidth = 80;
+#endif
+
+        using namespace Tbc;
+
+        inline bool startsWith( std::string const& str, std::string const& prefix ) {
+            return str.size() >= prefix.size() && str.substr( 0, prefix.size() ) == prefix;
+        }
+
+        template<typename T> struct RemoveConstRef{ typedef T type; };
+        template<typename T> struct RemoveConstRef<T&>{ typedef T type; };
+        template<typename T> struct RemoveConstRef<T const&>{ typedef T type; };
+        template<typename T> struct RemoveConstRef<T const>{ typedef T type; };
+
+        template<typename T>    struct IsBool       { static const bool value = false; };
+        template<>              struct IsBool<bool> { static const bool value = true; };
+
+        template<typename T>
+        void convertInto( std::string const& _source, T& _dest ) {
+            std::stringstream ss;
+            ss << _source;
+            ss >> _dest;
+            if( ss.fail() )
+                throw std::runtime_error( "Unable to convert " + _source + " to destination type" );
+        }
+        inline void convertInto( std::string const& _source, std::string& _dest ) {
+            _dest = _source;
+        }
+        inline void convertInto( std::string const& _source, bool& _dest ) {
+            std::string sourceLC = _source;
+            std::transform( sourceLC.begin(), sourceLC.end(), sourceLC.begin(), ::tolower );
+            if( sourceLC == "y" || sourceLC == "1" || sourceLC == "true" || sourceLC == "yes" || sourceLC == "on" )
+                _dest = true;
+            else if( sourceLC == "n" || sourceLC == "0" || sourceLC == "false" || sourceLC == "no" || sourceLC == "off" )
+                _dest = false;
+            else
+                throw std::runtime_error( "Expected a boolean value but did not recognise:\n  '" + _source + "'" );
+        }
+        inline void convertInto( bool _source, bool& _dest ) {
+            _dest = _source;
+        }
+        template<typename T>
+        inline void convertInto( bool, T& ) {
+            throw std::runtime_error( "Invalid conversion" );
+        }
+
+        template<typename ConfigT>
+        struct IArgFunction {
+            virtual ~IArgFunction() {}
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+            IArgFunction()                      = default;
+            IArgFunction( IArgFunction const& ) = default;
+#  endif
+            virtual void set( ConfigT& config, std::string const& value ) const = 0;
+            virtual void setFlag( ConfigT& config ) const = 0;
+            virtual bool takesArg() const = 0;
+            virtual IArgFunction* clone() const = 0;
+        };
+
+        template<typename ConfigT>
+        class BoundArgFunction {
+        public:
+            BoundArgFunction() : functionObj( NULL ) {}
+            BoundArgFunction( IArgFunction<ConfigT>* _functionObj ) : functionObj( _functionObj ) {}
+            BoundArgFunction( BoundArgFunction const& other ) : functionObj( other.functionObj ? other.functionObj->clone() : NULL ) {}
+            BoundArgFunction& operator = ( BoundArgFunction const& other ) {
+                IArgFunction<ConfigT>* newFunctionObj = other.functionObj ? other.functionObj->clone() : NULL;
+                delete functionObj;
+                functionObj = newFunctionObj;
+                return *this;
+            }
+            ~BoundArgFunction() { delete functionObj; }
+
+            void set( ConfigT& config, std::string const& value ) const {
+                functionObj->set( config, value );
+            }
+            void setFlag( ConfigT& config ) const {
+                functionObj->setFlag( config );
+            }
+            bool takesArg() const { return functionObj->takesArg(); }
+
+            bool isSet() const {
+                return functionObj != NULL;
+            }
+        private:
+            IArgFunction<ConfigT>* functionObj;
+        };
+
+        template<typename C>
+        struct NullBinder : IArgFunction<C>{
+            virtual void set( C&, std::string const& ) const {}
+            virtual void setFlag( C& ) const {}
+            virtual bool takesArg() const { return true; }
+            virtual IArgFunction<C>* clone() const { return new NullBinder( *this ); }
+        };
+
+        template<typename C, typename M>
+        struct BoundDataMember : IArgFunction<C>{
+            BoundDataMember( M C::* _member ) : member( _member ) {}
+            virtual void set( C& p, std::string const& stringValue ) const {
+                convertInto( stringValue, p.*member );
+            }
+            virtual void setFlag( C& p ) const {
+                convertInto( true, p.*member );
+            }
+            virtual bool takesArg() const { return !IsBool<M>::value; }
+            virtual IArgFunction<C>* clone() const { return new BoundDataMember( *this ); }
+            M C::* member;
+        };
+        template<typename C, typename M>
+        struct BoundUnaryMethod : IArgFunction<C>{
+            BoundUnaryMethod( void (C::*_member)( M ) ) : member( _member ) {}
+            virtual void set( C& p, std::string const& stringValue ) const {
+                typename RemoveConstRef<M>::type value;
+                convertInto( stringValue, value );
+                (p.*member)( value );
+            }
+            virtual void setFlag( C& p ) const {
+                typename RemoveConstRef<M>::type value;
+                convertInto( true, value );
+                (p.*member)( value );
+            }
+            virtual bool takesArg() const { return !IsBool<M>::value; }
+            virtual IArgFunction<C>* clone() const { return new BoundUnaryMethod( *this ); }
+            void (C::*member)( M );
+        };
+        template<typename C>
+        struct BoundNullaryMethod : IArgFunction<C>{
+            BoundNullaryMethod( void (C::*_member)() ) : member( _member ) {}
+            virtual void set( C& p, std::string const& stringValue ) const {
+                bool value;
+                convertInto( stringValue, value );
+                if( value )
+                    (p.*member)();
+            }
+            virtual void setFlag( C& p ) const {
+                (p.*member)();
+            }
+            virtual bool takesArg() const { return false; }
+            virtual IArgFunction<C>* clone() const { return new BoundNullaryMethod( *this ); }
+            void (C::*member)();
+        };
+
+        template<typename C>
+        struct BoundUnaryFunction : IArgFunction<C>{
+            BoundUnaryFunction( void (*_function)( C& ) ) : function( _function ) {}
+            virtual void set( C& obj, std::string const& stringValue ) const {
+                bool value;
+                convertInto( stringValue, value );
+                if( value )
+                    function( obj );
+            }
+            virtual void setFlag( C& p ) const {
+                function( p );
+            }
+            virtual bool takesArg() const { return false; }
+            virtual IArgFunction<C>* clone() const { return new BoundUnaryFunction( *this ); }
+            void (*function)( C& );
+        };
+
+        template<typename C, typename T>
+        struct BoundBinaryFunction : IArgFunction<C>{
+            BoundBinaryFunction( void (*_function)( C&, T ) ) : function( _function ) {}
+            virtual void set( C& obj, std::string const& stringValue ) const {
+                typename RemoveConstRef<T>::type value;
+                convertInto( stringValue, value );
+                function( obj, value );
+            }
+            virtual void setFlag( C& obj ) const {
+                typename RemoveConstRef<T>::type value;
+                convertInto( true, value );
+                function( obj, value );
+            }
+            virtual bool takesArg() const { return !IsBool<T>::value; }
+            virtual IArgFunction<C>* clone() const { return new BoundBinaryFunction( *this ); }
+            void (*function)( C&, T );
+        };
+
+    } // namespace Detail
+
+    struct Parser {
+        Parser() : separators( " \t=:" ) {}
+
+        struct Token {
+            enum Type { Positional, ShortOpt, LongOpt };
+            Token( Type _type, std::string const& _data ) : type( _type ), data( _data ) {}
+            Type type;
+            std::string data;
+        };
+
+        void parseIntoTokens( int argc, char const * const * argv, std::vector<Parser::Token>& tokens ) const {
+            const std::string doubleDash = "--";
+            for( int i = 1; i < argc && argv[i] != doubleDash; ++i )
+                parseIntoTokens( argv[i] , tokens);
+        }
+        void parseIntoTokens( std::string arg, std::vector<Parser::Token>& tokens ) const {
+            while( !arg.empty() ) {
+                Parser::Token token( Parser::Token::Positional, arg );
+                arg = "";
+                if( token.data[0] == '-' ) {
+                    if( token.data.size() > 1 && token.data[1] == '-' ) {
+                        token = Parser::Token( Parser::Token::LongOpt, token.data.substr( 2 ) );
+                    }
+                    else {
+                        token = Parser::Token( Parser::Token::ShortOpt, token.data.substr( 1 ) );
+                        if( token.data.size() > 1 && separators.find( token.data[1] ) == std::string::npos ) {
+                            arg = "-" + token.data.substr( 1 );
+                            token.data = token.data.substr( 0, 1 );
+                        }
+                    }
+                }
+                if( token.type != Parser::Token::Positional ) {
+                    std::size_t pos = token.data.find_first_of( separators );
+                    if( pos != std::string::npos ) {
+                        arg = token.data.substr( pos+1 );
+                        token.data = token.data.substr( 0, pos );
+                    }
+                }
+                tokens.push_back( token );
+            }
+        }
+        std::string separators;
+    };
+
+    template<typename ConfigT>
+    struct CommonArgProperties {
+        CommonArgProperties() {}
+        CommonArgProperties( Detail::BoundArgFunction<ConfigT> const& _boundField ) : boundField( _boundField ) {}
+
+        Detail::BoundArgFunction<ConfigT> boundField;
+        std::string description;
+        std::string detail;
+        std::string placeholder; // Only value if boundField takes an arg
+
+        bool takesArg() const {
+            return !placeholder.empty();
+        }
+        void validate() const {
+            if( !boundField.isSet() )
+                throw std::logic_error( "option not bound" );
+        }
+    };
+    struct OptionArgProperties {
+        std::vector<std::string> shortNames;
+        std::string longName;
+
+        bool hasShortName( std::string const& shortName ) const {
+            return std::find( shortNames.begin(), shortNames.end(), shortName ) != shortNames.end();
+        }
+        bool hasLongName( std::string const& _longName ) const {
+            return _longName == longName;
+        }
+    };
+    struct PositionalArgProperties {
+        PositionalArgProperties() : position( -1 ) {}
+        int position; // -1 means non-positional (floating)
+
+        bool isFixedPositional() const {
+            return position != -1;
+        }
+    };
+
+    template<typename ConfigT>
+    class CommandLine {
+
+        struct Arg : CommonArgProperties<ConfigT>, OptionArgProperties, PositionalArgProperties {
+            Arg() {}
+            Arg( Detail::BoundArgFunction<ConfigT> const& _boundField ) : CommonArgProperties<ConfigT>( _boundField ) {}
+
+            using CommonArgProperties<ConfigT>::placeholder; // !TBD
+
+            std::string dbgName() const {
+                if( !longName.empty() )
+                    return "--" + longName;
+                if( !shortNames.empty() )
+                    return "-" + shortNames[0];
+                return "positional args";
+            }
+            std::string commands() const {
+                std::ostringstream oss;
+                bool first = true;
+                std::vector<std::string>::const_iterator it = shortNames.begin(), itEnd = shortNames.end();
+                for(; it != itEnd; ++it ) {
+                    if( first )
+                        first = false;
+                    else
+                        oss << ", ";
+                    oss << "-" << *it;
+                }
+                if( !longName.empty() ) {
+                    if( !first )
+                        oss << ", ";
+                    oss << "--" << longName;
+                }
+                if( !placeholder.empty() )
+                    oss << " <" << placeholder << ">";
+                return oss.str();
+            }
+        };
+
+        // NOTE: std::auto_ptr is deprecated in c++11/c++0x
+#if defined(__cplusplus) && __cplusplus > 199711L
+        typedef std::unique_ptr<Arg> ArgAutoPtr;
+#else
+        typedef std::auto_ptr<Arg> ArgAutoPtr;
+#endif
+
+        friend void addOptName( Arg& arg, std::string const& optName )
+        {
+            if( optName.empty() )
+                return;
+            if( Detail::startsWith( optName, "--" ) ) {
+                if( !arg.longName.empty() )
+                    throw std::logic_error( "Only one long opt may be specified. '"
+                        + arg.longName
+                        + "' already specified, now attempting to add '"
+                        + optName + "'" );
+                arg.longName = optName.substr( 2 );
+            }
+            else if( Detail::startsWith( optName, "-" ) )
+                arg.shortNames.push_back( optName.substr( 1 ) );
+            else
+                throw std::logic_error( "option must begin with - or --. Option was: '" + optName + "'" );
+        }
+        friend void setPositionalArg( Arg& arg, int position )
+        {
+            arg.position = position;
+        }
+
+        class ArgBuilder {
+        public:
+            ArgBuilder( Arg* arg ) : m_arg( arg ) {}
+
+            // Bind a non-boolean data member (requires placeholder string)
+            template<typename C, typename M>
+            void bind( M C::* field, std::string const& placeholder ) {
+                m_arg->boundField = new Detail::BoundDataMember<C,M>( field );
+                m_arg->placeholder = placeholder;
+            }
+            // Bind a boolean data member (no placeholder required)
+            template<typename C>
+            void bind( bool C::* field ) {
+                m_arg->boundField = new Detail::BoundDataMember<C,bool>( field );
+            }
+
+            // Bind a method taking a single, non-boolean argument (requires a placeholder string)
+            template<typename C, typename M>
+            void bind( void (C::* unaryMethod)( M ), std::string const& placeholder ) {
+                m_arg->boundField = new Detail::BoundUnaryMethod<C,M>( unaryMethod );
+                m_arg->placeholder = placeholder;
+            }
+
+            // Bind a method taking a single, boolean argument (no placeholder string required)
+            template<typename C>
+            void bind( void (C::* unaryMethod)( bool ) ) {
+                m_arg->boundField = new Detail::BoundUnaryMethod<C,bool>( unaryMethod );
+            }
+
+            // Bind a method that takes no arguments (will be called if opt is present)
+            template<typename C>
+            void bind( void (C::* nullaryMethod)() ) {
+                m_arg->boundField = new Detail::BoundNullaryMethod<C>( nullaryMethod );
+            }
+
+            // Bind a free function taking a single argument - the object to operate on (no placeholder string required)
+            template<typename C>
+            void bind( void (* unaryFunction)( C& ) ) {
+                m_arg->boundField = new Detail::BoundUnaryFunction<C>( unaryFunction );
+            }
+
+            // Bind a free function taking a single argument - the object to operate on (requires a placeholder string)
+            template<typename C, typename T>
+            void bind( void (* binaryFunction)( C&, T ), std::string const& placeholder ) {
+                m_arg->boundField = new Detail::BoundBinaryFunction<C, T>( binaryFunction );
+                m_arg->placeholder = placeholder;
+            }
+
+            ArgBuilder& describe( std::string const& description ) {
+                m_arg->description = description;
+                return *this;
+            }
+            ArgBuilder& detail( std::string const& detail ) {
+                m_arg->detail = detail;
+                return *this;
+            }
+
+        protected:
+            Arg* m_arg;
+        };
+
+        class OptBuilder : public ArgBuilder {
+        public:
+            OptBuilder( Arg* arg ) : ArgBuilder( arg ) {}
+            OptBuilder( OptBuilder& other ) : ArgBuilder( other ) {}
+
+            OptBuilder& operator[]( std::string const& optName ) {
+                addOptName( *ArgBuilder::m_arg, optName );
+                return *this;
+            }
+        };
+
+    public:
+
+        CommandLine()
+        :   m_boundProcessName( new Detail::NullBinder<ConfigT>() ),
+            m_highestSpecifiedArgPosition( 0 ),
+            m_throwOnUnrecognisedTokens( false )
+        {}
+        CommandLine( CommandLine const& other )
+        :   m_boundProcessName( other.m_boundProcessName ),
+            m_options ( other.m_options ),
+            m_positionalArgs( other.m_positionalArgs ),
+            m_highestSpecifiedArgPosition( other.m_highestSpecifiedArgPosition ),
+            m_throwOnUnrecognisedTokens( other.m_throwOnUnrecognisedTokens )
+        {
+            if( other.m_floatingArg.get() )
+                m_floatingArg.reset( new Arg( *other.m_floatingArg ) );
+        }
+
+        CommandLine& setThrowOnUnrecognisedTokens( bool shouldThrow = true ) {
+            m_throwOnUnrecognisedTokens = shouldThrow;
+            return *this;
+        }
+
+        OptBuilder operator[]( std::string const& optName ) {
+            m_options.push_back( Arg() );
+            addOptName( m_options.back(), optName );
+            OptBuilder builder( &m_options.back() );
+            return builder;
+        }
+
+        ArgBuilder operator[]( int position ) {
+            m_positionalArgs.insert( std::make_pair( position, Arg() ) );
+            if( position > m_highestSpecifiedArgPosition )
+                m_highestSpecifiedArgPosition = position;
+            setPositionalArg( m_positionalArgs[position], position );
+            ArgBuilder builder( &m_positionalArgs[position] );
+            return builder;
+        }
+
+        // Invoke this with the _ instance
+        ArgBuilder operator[]( UnpositionalTag ) {
+            if( m_floatingArg.get() )
+                throw std::logic_error( "Only one unpositional argument can be added" );
+            m_floatingArg.reset( new Arg() );
+            ArgBuilder builder( m_floatingArg.get() );
+            return builder;
+        }
+
+        template<typename C, typename M>
+        void bindProcessName( M C::* field ) {
+            m_boundProcessName = new Detail::BoundDataMember<C,M>( field );
+        }
+        template<typename C, typename M>
+        void bindProcessName( void (C::*_unaryMethod)( M ) ) {
+            m_boundProcessName = new Detail::BoundUnaryMethod<C,M>( _unaryMethod );
+        }
+
+        void optUsage( std::ostream& os, std::size_t indent = 0, std::size_t width = Detail::consoleWidth ) const {
+            typename std::vector<Arg>::const_iterator itBegin = m_options.begin(), itEnd = m_options.end(), it;
+            std::size_t maxWidth = 0;
+            for( it = itBegin; it != itEnd; ++it )
+                maxWidth = (std::max)( maxWidth, it->commands().size() );
+
+            for( it = itBegin; it != itEnd; ++it ) {
+                Detail::Text usage( it->commands(), Detail::TextAttributes()
+                                                        .setWidth( maxWidth+indent )
+                                                        .setIndent( indent ) );
+                Detail::Text desc( it->description, Detail::TextAttributes()
+                                                        .setWidth( width - maxWidth - 3 ) );
+
+                for( std::size_t i = 0; i < (std::max)( usage.size(), desc.size() ); ++i ) {
+                    std::string usageCol = i < usage.size() ? usage[i] : "";
+                    os << usageCol;
+
+                    if( i < desc.size() && !desc[i].empty() )
+                        os  << std::string( indent + 2 + maxWidth - usageCol.size(), ' ' )
+                            << desc[i];
+                    os << "\n";
+                }
+            }
+        }
+        std::string optUsage() const {
+            std::ostringstream oss;
+            optUsage( oss );
+            return oss.str();
+        }
+
+        void argSynopsis( std::ostream& os ) const {
+            for( int i = 1; i <= m_highestSpecifiedArgPosition; ++i ) {
+                if( i > 1 )
+                    os << " ";
+                typename std::map<int, Arg>::const_iterator it = m_positionalArgs.find( i );
+                if( it != m_positionalArgs.end() )
+                    os << "<" << it->second.placeholder << ">";
+                else if( m_floatingArg.get() )
+                    os << "<" << m_floatingArg->placeholder << ">";
+                else
+                    throw std::logic_error( "non consecutive positional arguments with no floating args" );
+            }
+            // !TBD No indication of mandatory args
+            if( m_floatingArg.get() ) {
+                if( m_highestSpecifiedArgPosition > 1 )
+                    os << " ";
+                os << "[<" << m_floatingArg->placeholder << "> ...]";
+            }
+        }
+        std::string argSynopsis() const {
+            std::ostringstream oss;
+            argSynopsis( oss );
+            return oss.str();
+        }
+
+        void usage( std::ostream& os, std::string const& procName ) const {
+            validate();
+            os << "usage:\n  " << procName << " ";
+            argSynopsis( os );
+            if( !m_options.empty() ) {
+                os << " [options]\n\nwhere options are: \n";
+                optUsage( os, 2 );
+            }
+            os << "\n";
+        }
+        std::string usage( std::string const& procName ) const {
+            std::ostringstream oss;
+            usage( oss, procName );
+            return oss.str();
+        }
+
+        ConfigT parse( int argc, char const * const * argv ) const {
+            ConfigT config;
+            parseInto( argc, argv, config );
+            return config;
+        }
+
+        std::vector<Parser::Token> parseInto( int argc, char const * const * argv, ConfigT& config ) const {
+            std::string processName = argv[0];
+            std::size_t lastSlash = processName.find_last_of( "/\\" );
+            if( lastSlash != std::string::npos )
+                processName = processName.substr( lastSlash+1 );
+            m_boundProcessName.set( config, processName );
+            std::vector<Parser::Token> tokens;
+            Parser parser;
+            parser.parseIntoTokens( argc, argv, tokens );
+            return populate( tokens, config );
+        }
+
+        std::vector<Parser::Token> populate( std::vector<Parser::Token> const& tokens, ConfigT& config ) const {
+            validate();
+            std::vector<Parser::Token> unusedTokens = populateOptions( tokens, config );
+            unusedTokens = populateFixedArgs( unusedTokens, config );
+            unusedTokens = populateFloatingArgs( unusedTokens, config );
+            return unusedTokens;
+        }
+
+        std::vector<Parser::Token> populateOptions( std::vector<Parser::Token> const& tokens, ConfigT& config ) const {
+            std::vector<Parser::Token> unusedTokens;
+            std::vector<std::string> errors;
+            for( std::size_t i = 0; i < tokens.size(); ++i ) {
+                Parser::Token const& token = tokens[i];
+                typename std::vector<Arg>::const_iterator it = m_options.begin(), itEnd = m_options.end();
+                for(; it != itEnd; ++it ) {
+                    Arg const& arg = *it;
+
+                    try {
+                        if( ( token.type == Parser::Token::ShortOpt && arg.hasShortName( token.data ) ) ||
+                            ( token.type == Parser::Token::LongOpt && arg.hasLongName( token.data ) ) ) {
+                            if( arg.takesArg() ) {
+                                if( i == tokens.size()-1 || tokens[i+1].type != Parser::Token::Positional )
+                                    errors.push_back( "Expected argument to option: " + token.data );
+                                else
+                                    arg.boundField.set( config, tokens[++i].data );
+                            }
+                            else {
+                                arg.boundField.setFlag( config );
+                            }
+                            break;
+                        }
+                    }
+                    catch( std::exception& ex ) {
+                        errors.push_back( std::string( ex.what() ) + "\n- while parsing: (" + arg.commands() + ")" );
+                    }
+                }
+                if( it == itEnd ) {
+                    if( token.type == Parser::Token::Positional || !m_throwOnUnrecognisedTokens )
+                        unusedTokens.push_back( token );
+                    else if( errors.empty() && m_throwOnUnrecognisedTokens )
+                        errors.push_back( "unrecognised option: " + token.data );
+                }
+            }
+            if( !errors.empty() ) {
+                std::ostringstream oss;
+                for( std::vector<std::string>::const_iterator it = errors.begin(), itEnd = errors.end();
+                        it != itEnd;
+                        ++it ) {
+                    if( it != errors.begin() )
+                        oss << "\n";
+                    oss << *it;
+                }
+                throw std::runtime_error( oss.str() );
+            }
+            return unusedTokens;
+        }
+        std::vector<Parser::Token> populateFixedArgs( std::vector<Parser::Token> const& tokens, ConfigT& config ) const {
+            std::vector<Parser::Token> unusedTokens;
+            int position = 1;
+            for( std::size_t i = 0; i < tokens.size(); ++i ) {
+                Parser::Token const& token = tokens[i];
+                typename std::map<int, Arg>::const_iterator it = m_positionalArgs.find( position );
+                if( it != m_positionalArgs.end() )
+                    it->second.boundField.set( config, token.data );
+                else
+                    unusedTokens.push_back( token );
+                if( token.type == Parser::Token::Positional )
+                    position++;
+            }
+            return unusedTokens;
+        }
+        std::vector<Parser::Token> populateFloatingArgs( std::vector<Parser::Token> const& tokens, ConfigT& config ) const {
+            if( !m_floatingArg.get() )
+                return tokens;
+            std::vector<Parser::Token> unusedTokens;
+            for( std::size_t i = 0; i < tokens.size(); ++i ) {
+                Parser::Token const& token = tokens[i];
+                if( token.type == Parser::Token::Positional )
+                    m_floatingArg->boundField.set( config, token.data );
+                else
+                    unusedTokens.push_back( token );
+            }
+            return unusedTokens;
+        }
+
+        void validate() const
+        {
+            if( m_options.empty() && m_positionalArgs.empty() && !m_floatingArg.get() )
+                throw std::logic_error( "No options or arguments specified" );
+
+            for( typename std::vector<Arg>::const_iterator  it = m_options.begin(),
+                                                            itEnd = m_options.end();
+                    it != itEnd; ++it )
+                it->validate();
+        }
+
+    private:
+        Detail::BoundArgFunction<ConfigT> m_boundProcessName;
+        std::vector<Arg> m_options;
+        std::map<int, Arg> m_positionalArgs;
+        ArgAutoPtr m_floatingArg;
+        int m_highestSpecifiedArgPosition;
+        bool m_throwOnUnrecognisedTokens;
+    };
+
+} // end namespace Clara
+
+STITCH_CLARA_CLOSE_NAMESPACE
+#undef STITCH_CLARA_OPEN_NAMESPACE
+#undef STITCH_CLARA_CLOSE_NAMESPACE
+
+#endif // TWOBLUECUBES_CLARA_H_INCLUDED
+#undef STITCH_CLARA_OPEN_NAMESPACE
+
+// Restore Clara's value for console width, if present
+#ifdef CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH
+#define CLARA_CONFIG_CONSOLE_WIDTH CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH
+#undef CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH
+#endif
+
+#include <fstream>
+
+namespace Catch {
+
+    inline void abortAfterFirst( ConfigData& config ) { config.abortAfter = 1; }
+    inline void abortAfterX( ConfigData& config, int x ) {
+        if( x < 1 )
+            throw std::runtime_error( "Value after -x or --abortAfter must be greater than zero" );
+        config.abortAfter = x;
+    }
+    inline void addTestOrTags( ConfigData& config, std::string const& _testSpec ) { config.testsOrTags.push_back( _testSpec ); }
+
+    inline void addWarning( ConfigData& config, std::string const& _warning ) {
+        if( _warning == "NoAssertions" )
+            config.warnings = static_cast<WarnAbout::What>( config.warnings | WarnAbout::NoAssertions );
+        else
+            throw std::runtime_error( "Unrecognised warning: '" + _warning + "'" );
+    }
+    inline void setOrder( ConfigData& config, std::string const& order ) {
+        if( startsWith( "declared", order ) )
+            config.runOrder = RunTests::InDeclarationOrder;
+        else if( startsWith( "lexical", order ) )
+            config.runOrder = RunTests::InLexicographicalOrder;
+        else if( startsWith( "random", order ) )
+            config.runOrder = RunTests::InRandomOrder;
+        else
+            throw std::runtime_error( "Unrecognised ordering: '" + order + "'" );
+    }
+    inline void setRngSeed( ConfigData& config, std::string const& seed ) {
+        if( seed == "time" ) {
+            config.rngSeed = static_cast<unsigned int>( std::time(0) );
+        }
+        else {
+            std::stringstream ss;
+            ss << seed;
+            ss >> config.rngSeed;
+            if( ss.fail() )
+                throw std::runtime_error( "Argment to --rng-seed should be the word 'time' or a number" );
+        }
+    }
+    inline void setVerbosity( ConfigData& config, int level ) {
+        // !TBD: accept strings?
+        config.verbosity = static_cast<Verbosity::Level>( level );
+    }
+    inline void setShowDurations( ConfigData& config, bool _showDurations ) {
+        config.showDurations = _showDurations
+            ? ShowDurations::Always
+            : ShowDurations::Never;
+    }
+    inline void loadTestNamesFromFile( ConfigData& config, std::string const& _filename ) {
+        std::ifstream f( _filename.c_str() );
+        if( !f.is_open() )
+            throw std::domain_error( "Unable to load input file: " + _filename );
+
+        std::string line;
+        while( std::getline( f, line ) ) {
+            line = trim(line);
+            if( !line.empty() && !startsWith( line, "#" ) )
+                addTestOrTags( config, "\"" + line + "\"," );
+        }
+    }
+
+    inline Clara::CommandLine<ConfigData> makeCommandLineParser() {
+
+        using namespace Clara;
+        CommandLine<ConfigData> cli;
+
+        cli.bindProcessName( &ConfigData::processName );
+
+        cli["-?"]["-h"]["--help"]
+            .describe( "display usage information" )
+            .bind( &ConfigData::showHelp );
+
+        cli["-l"]["--list-tests"]
+            .describe( "list all/matching test cases" )
+            .bind( &ConfigData::listTests );
+
+        cli["-t"]["--list-tags"]
+            .describe( "list all/matching tags" )
+            .bind( &ConfigData::listTags );
+
+        cli["-s"]["--success"]
+            .describe( "include successful tests in output" )
+            .bind( &ConfigData::showSuccessfulTests );
+
+        cli["-b"]["--break"]
+            .describe( "break into debugger on failure" )
+            .bind( &ConfigData::shouldDebugBreak );
+
+        cli["-e"]["--nothrow"]
+            .describe( "skip exception tests" )
+            .bind( &ConfigData::noThrow );
+
+        cli["-i"]["--invisibles"]
+            .describe( "show invisibles (tabs, newlines)" )
+            .bind( &ConfigData::showInvisibles );
+
+        cli["-o"]["--out"]
+            .describe( "output filename" )
+            .bind( &ConfigData::outputFilename, "filename" );
+
+        cli["-r"]["--reporter"]
+//            .placeholder( "name[:filename]" )
+            .describe( "reporter to use (defaults to console)" )
+            .bind( &ConfigData::reporterName, "name" );
+
+        cli["-n"]["--name"]
+            .describe( "suite name" )
+            .bind( &ConfigData::name, "name" );
+
+        cli["-a"]["--abort"]
+            .describe( "abort at first failure" )
+            .bind( &abortAfterFirst );
+
+        cli["-x"]["--abortx"]
+            .describe( "abort after x failures" )
+            .bind( &abortAfterX, "no. failures" );
+
+        cli["-w"]["--warn"]
+            .describe( "enable warnings" )
+            .bind( &addWarning, "warning name" );
+
+// - needs updating if reinstated
+//        cli.into( &setVerbosity )
+//            .describe( "level of verbosity (0=no output)" )
+//            .shortOpt( "v")
+//            .longOpt( "verbosity" )
+//            .placeholder( "level" );
+
+        cli[_]
+            .describe( "which test or tests to use" )
+            .bind( &addTestOrTags, "test name, pattern or tags" );
+
+        cli["-d"]["--durations"]
+            .describe( "show test durations" )
+            .bind( &setShowDurations, "yes/no" );
+
+        cli["-f"]["--input-file"]
+            .describe( "load test names to run from a file" )
+            .bind( &loadTestNamesFromFile, "filename" );
+
+        // Less common commands which don't have a short form
+        cli["--list-test-names-only"]
+            .describe( "list all/matching test cases names only" )
+            .bind( &ConfigData::listTestNamesOnly );
+
+        cli["--list-reporters"]
+            .describe( "list all reporters" )
+            .bind( &ConfigData::listReporters );
+
+        cli["--order"]
+            .describe( "test case order (defaults to decl)" )
+            .bind( &setOrder, "decl|lex|rand" );
+
+        cli["--rng-seed"]
+            .describe( "set a specific seed for random numbers" )
+            .bind( &setRngSeed, "'time'|number" );
+
+        cli["--force-colour"]
+            .describe( "force colourised output" )
+            .bind( &ConfigData::forceColour );
+
+        return cli;
+    }
+
+} // end namespace Catch
+
+// #included from: internal/catch_list.hpp
+#define TWOBLUECUBES_CATCH_LIST_HPP_INCLUDED
+
+// #included from: catch_text.h
+#define TWOBLUECUBES_CATCH_TEXT_H_INCLUDED
+
+#define TBC_TEXT_FORMAT_CONSOLE_WIDTH CATCH_CONFIG_CONSOLE_WIDTH
+
+#define CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE Catch
+// #included from: ../external/tbc_text_format.h
+// Only use header guard if we are not using an outer namespace
+#ifndef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+# ifdef TWOBLUECUBES_TEXT_FORMAT_H_INCLUDED
+#  ifndef TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED
+#   define TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED
+#  endif
+# else
+#  define TWOBLUECUBES_TEXT_FORMAT_H_INCLUDED
+# endif
+#endif
+#ifndef TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED
+#include <string>
+#include <vector>
+#include <sstream>
+
+// Use optional outer namespace
+#ifdef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+namespace CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE {
+#endif
+
+namespace Tbc {
+
+#ifdef TBC_TEXT_FORMAT_CONSOLE_WIDTH
+    const unsigned int consoleWidth = TBC_TEXT_FORMAT_CONSOLE_WIDTH;
+#else
+    const unsigned int consoleWidth = 80;
+#endif
+
+    struct TextAttributes {
+        TextAttributes()
+        :   initialIndent( std::string::npos ),
+            indent( 0 ),
+            width( consoleWidth-1 ),
+            tabChar( '\t' )
+        {}
+
+        TextAttributes& setInitialIndent( std::size_t _value )  { initialIndent = _value; return *this; }
+        TextAttributes& setIndent( std::size_t _value )         { indent = _value; return *this; }
+        TextAttributes& setWidth( std::size_t _value )          { width = _value; return *this; }
+        TextAttributes& setTabChar( char _value )               { tabChar = _value; return *this; }
+
+        std::size_t initialIndent;  // indent of first line, or npos
+        std::size_t indent;         // indent of subsequent lines, or all if initialIndent is npos
+        std::size_t width;          // maximum width of text, including indent. Longer text will wrap
+        char tabChar;               // If this char is seen the indent is changed to current pos
+    };
+
+    class Text {
+    public:
+        Text( std::string const& _str, TextAttributes const& _attr = TextAttributes() )
+        : attr( _attr )
+        {
+            std::string wrappableChars = " [({.,/|\\-";
+            std::size_t indent = _attr.initialIndent != std::string::npos
+                ? _attr.initialIndent
+                : _attr.indent;
+            std::string remainder = _str;
+
+            while( !remainder.empty() ) {
+                if( lines.size() >= 1000 ) {
+                    lines.push_back( "... message truncated due to excessive size" );
+                    return;
+                }
+                std::size_t tabPos = std::string::npos;
+                std::size_t width = (std::min)( remainder.size(), _attr.width - indent );
+                std::size_t pos = remainder.find_first_of( '\n' );
+                if( pos <= width ) {
+                    width = pos;
+                }
+                pos = remainder.find_last_of( _attr.tabChar, width );
+                if( pos != std::string::npos ) {
+                    tabPos = pos;
+                    if( remainder[width] == '\n' )
+                        width--;
+                    remainder = remainder.substr( 0, tabPos ) + remainder.substr( tabPos+1 );
+                }
+
+                if( width == remainder.size() ) {
+                    spliceLine( indent, remainder, width );
+                }
+                else if( remainder[width] == '\n' ) {
+                    spliceLine( indent, remainder, width );
+                    if( width <= 1 || remainder.size() != 1 )
+                        remainder = remainder.substr( 1 );
+                    indent = _attr.indent;
+                }
+                else {
+                    pos = remainder.find_last_of( wrappableChars, width );
+                    if( pos != std::string::npos && pos > 0 ) {
+                        spliceLine( indent, remainder, pos );
+                        if( remainder[0] == ' ' )
+                            remainder = remainder.substr( 1 );
+                    }
+                    else {
+                        spliceLine( indent, remainder, width-1 );
+                        lines.back() += "-";
+                    }
+                    if( lines.size() == 1 )
+                        indent = _attr.indent;
+                    if( tabPos != std::string::npos )
+                        indent += tabPos;
+                }
+            }
+        }
+
+        void spliceLine( std::size_t _indent, std::string& _remainder, std::size_t _pos ) {
+            lines.push_back( std::string( _indent, ' ' ) + _remainder.substr( 0, _pos ) );
+            _remainder = _remainder.substr( _pos );
+        }
+
+        typedef std::vector<std::string>::const_iterator const_iterator;
+
+        const_iterator begin() const { return lines.begin(); }
+        const_iterator end() const { return lines.end(); }
+        std::string const& last() const { return lines.back(); }
+        std::size_t size() const { return lines.size(); }
+        std::string const& operator[]( std::size_t _index ) const { return lines[_index]; }
+        std::string toString() const {
+            std::ostringstream oss;
+            oss << *this;
+            return oss.str();
+        }
+
+        inline friend std::ostream& operator << ( std::ostream& _stream, Text const& _text ) {
+            for( Text::const_iterator it = _text.begin(), itEnd = _text.end();
+                it != itEnd; ++it ) {
+                if( it != _text.begin() )
+                    _stream << "\n";
+                _stream << *it;
+            }
+            return _stream;
+        }
+
+    private:
+        std::string str;
+        TextAttributes attr;
+        std::vector<std::string> lines;
+    };
+
+} // end namespace Tbc
+
+#ifdef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+} // end outer namespace
+#endif
+
+#endif // TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED
+#undef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE
+
+namespace Catch {
+    using Tbc::Text;
+    using Tbc::TextAttributes;
+}
+
+// #included from: catch_console_colour.hpp
+#define TWOBLUECUBES_CATCH_CONSOLE_COLOUR_HPP_INCLUDED
+
+namespace Catch {
+
+    struct Colour {
+        enum Code {
+            None = 0,
+
+            White,
+            Red,
+            Green,
+            Blue,
+            Cyan,
+            Yellow,
+            Grey,
+
+            Bright = 0x10,
+
+            BrightRed = Bright | Red,
+            BrightGreen = Bright | Green,
+            LightGrey = Bright | Grey,
+            BrightWhite = Bright | White,
+
+            // By intention
+            FileName = LightGrey,
+            Warning = Yellow,
+            ResultError = BrightRed,
+            ResultSuccess = BrightGreen,
+            ResultExpectedFailure = Warning,
+
+            Error = BrightRed,
+            Success = Green,
+
+            OriginalExpression = Cyan,
+            ReconstructedExpression = Yellow,
+
+            SecondaryText = LightGrey,
+            Headers = White
+        };
+
+        // Use constructed object for RAII guard
+        Colour( Code _colourCode );
+        Colour( Colour const& other );
+        ~Colour();
+
+        // Use static method for one-shot changes
+        static void use( Code _colourCode );
+
+    private:
+        bool m_moved;
+    };
+
+    inline std::ostream& operator << ( std::ostream& os, Colour const& ) { return os; }
+
+} // end namespace Catch
+
+// #included from: catch_interfaces_reporter.h
+#define TWOBLUECUBES_CATCH_INTERFACES_REPORTER_H_INCLUDED
+
+#include <string>
+#include <ostream>
+#include <map>
+#include <assert.h>
+
+namespace Catch
+{
+    struct ReporterConfig {
+        explicit ReporterConfig( Ptr<IConfig> const& _fullConfig )
+        :   m_stream( &_fullConfig->stream() ), m_fullConfig( _fullConfig ) {}
+
+        ReporterConfig( Ptr<IConfig> const& _fullConfig, std::ostream& _stream )
+        :   m_stream( &_stream ), m_fullConfig( _fullConfig ) {}
+
+        std::ostream& stream() const    { return *m_stream; }
+        Ptr<IConfig> fullConfig() const { return m_fullConfig; }
+
+    private:
+        std::ostream* m_stream;
+        Ptr<IConfig> m_fullConfig;
+    };
+
+    struct ReporterPreferences {
+        ReporterPreferences()
+        : shouldRedirectStdOut( false )
+        {}
+
+        bool shouldRedirectStdOut;
+    };
+
+    template<typename T>
+    struct LazyStat : Option<T> {
+        LazyStat() : used( false ) {}
+        LazyStat& operator=( T const& _value ) {
+            Option<T>::operator=( _value );
+            used = false;
+            return *this;
+        }
+        void reset() {
+            Option<T>::reset();
+            used = false;
+        }
+        bool used;
+    };
+
+    struct TestRunInfo {
+        TestRunInfo( std::string const& _name ) : name( _name ) {}
+        std::string name;
+    };
+    struct GroupInfo {
+        GroupInfo(  std::string const& _name,
+                    std::size_t _groupIndex,
+                    std::size_t _groupsCount )
+        :   name( _name ),
+            groupIndex( _groupIndex ),
+            groupsCounts( _groupsCount )
+        {}
+
+        std::string name;
+        std::size_t groupIndex;
+        std::size_t groupsCounts;
+    };
+
+    struct AssertionStats {
+        AssertionStats( AssertionResult const& _assertionResult,
+                        std::vector<MessageInfo> const& _infoMessages,
+                        Totals const& _totals )
+        :   assertionResult( _assertionResult ),
+            infoMessages( _infoMessages ),
+            totals( _totals )
+        {
+            if( assertionResult.hasMessage() ) {
+                // Copy message into messages list.
+                // !TBD This should have been done earlier, somewhere
+                MessageBuilder builder( assertionResult.getTestMacroName(), assertionResult.getSourceInfo(), assertionResult.getResultType() );
+                builder << assertionResult.getMessage();
+                builder.m_info.message = builder.m_stream.str();
+
+                infoMessages.push_back( builder.m_info );
+            }
+        }
+        virtual ~AssertionStats();
+
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        AssertionStats( AssertionStats const& )              = default;
+        AssertionStats( AssertionStats && )                  = default;
+        AssertionStats& operator = ( AssertionStats const& ) = default;
+        AssertionStats& operator = ( AssertionStats && )     = default;
+#  endif
+
+        AssertionResult assertionResult;
+        std::vector<MessageInfo> infoMessages;
+        Totals totals;
+    };
+
+    struct SectionStats {
+        SectionStats(   SectionInfo const& _sectionInfo,
+                        Counts const& _assertions,
+                        double _durationInSeconds,
+                        bool _missingAssertions )
+        :   sectionInfo( _sectionInfo ),
+            assertions( _assertions ),
+            durationInSeconds( _durationInSeconds ),
+            missingAssertions( _missingAssertions )
+        {}
+        virtual ~SectionStats();
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        SectionStats( SectionStats const& )              = default;
+        SectionStats( SectionStats && )                  = default;
+        SectionStats& operator = ( SectionStats const& ) = default;
+        SectionStats& operator = ( SectionStats && )     = default;
+#  endif
+
+        SectionInfo sectionInfo;
+        Counts assertions;
+        double durationInSeconds;
+        bool missingAssertions;
+    };
+
+    struct TestCaseStats {
+        TestCaseStats(  TestCaseInfo const& _testInfo,
+                        Totals const& _totals,
+                        std::string const& _stdOut,
+                        std::string const& _stdErr,
+                        bool _aborting )
+        : testInfo( _testInfo ),
+            totals( _totals ),
+            stdOut( _stdOut ),
+            stdErr( _stdErr ),
+            aborting( _aborting )
+        {}
+        virtual ~TestCaseStats();
+
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        TestCaseStats( TestCaseStats const& )              = default;
+        TestCaseStats( TestCaseStats && )                  = default;
+        TestCaseStats& operator = ( TestCaseStats const& ) = default;
+        TestCaseStats& operator = ( TestCaseStats && )     = default;
+#  endif
+
+        TestCaseInfo testInfo;
+        Totals totals;
+        std::string stdOut;
+        std::string stdErr;
+        bool aborting;
+    };
+
+    struct TestGroupStats {
+        TestGroupStats( GroupInfo const& _groupInfo,
+                        Totals const& _totals,
+                        bool _aborting )
+        :   groupInfo( _groupInfo ),
+            totals( _totals ),
+            aborting( _aborting )
+        {}
+        TestGroupStats( GroupInfo const& _groupInfo )
+        :   groupInfo( _groupInfo ),
+            aborting( false )
+        {}
+        virtual ~TestGroupStats();
+
+#  ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        TestGroupStats( TestGroupStats const& )              = default;
+        TestGroupStats( TestGroupStats && )                  = default;
+        TestGroupStats& operator = ( TestGroupStats const& ) = default;
+        TestGroupStats& operator = ( TestGroupStats && )     = default;
+#  endif
+
+        GroupInfo groupInfo;
+        Totals totals;
+        bool aborting;
+    };
+
+    struct TestRunStats {
+        TestRunStats(   TestRunInfo const& _runInfo,
+                        Totals const& _totals,
+                        bool _aborting )
+        :   runInfo( _runInfo ),
+            totals( _totals ),
+            aborting( _aborting )
+        {}
+        virtual ~TestRunStats();
+
+#  ifndef CATCH_CONFIG_CPP11_GENERATED_METHODS
+        TestRunStats( TestRunStats const& _other )
+        :   runInfo( _other.runInfo ),
+            totals( _other.totals ),
+            aborting( _other.aborting )
+        {}
+#  else
+        TestRunStats( TestRunStats const& )              = default;
+        TestRunStats( TestRunStats && )                  = default;
+        TestRunStats& operator = ( TestRunStats const& ) = default;
+        TestRunStats& operator = ( TestRunStats && )     = default;
+#  endif
+
+        TestRunInfo runInfo;
+        Totals totals;
+        bool aborting;
+    };
+
+    struct IStreamingReporter : IShared {
+        virtual ~IStreamingReporter();
+
+        // Implementing class must also provide the following static method:
+        // static std::string getDescription();
+
+        virtual ReporterPreferences getPreferences() const = 0;
+
+        virtual void noMatchingTestCases( std::string const& spec ) = 0;
+
+        virtual void testRunStarting( TestRunInfo const& testRunInfo ) = 0;
+        virtual void testGroupStarting( GroupInfo const& groupInfo ) = 0;
+
+        virtual void testCaseStarting( TestCaseInfo const& testInfo ) = 0;
+        virtual void sectionStarting( SectionInfo const& sectionInfo ) = 0;
+
+        virtual void assertionStarting( AssertionInfo const& assertionInfo ) = 0;
+
+        // The return value indicates if the messages buffer should be cleared:
+        virtual bool assertionEnded( AssertionStats const& assertionStats ) = 0;
+        virtual void sectionEnded( SectionStats const& sectionStats ) = 0;
+        virtual void testCaseEnded( TestCaseStats const& testCaseStats ) = 0;
+        virtual void testGroupEnded( TestGroupStats const& testGroupStats ) = 0;
+        virtual void testRunEnded( TestRunStats const& testRunStats ) = 0;
+
+        virtual void skipTest( TestCaseInfo const& testInfo ) = 0;
+    };
+
+    struct IReporterFactory {
+        virtual ~IReporterFactory();
+        virtual IStreamingReporter* create( ReporterConfig const& config ) const = 0;
+        virtual std::string getDescription() const = 0;
+    };
+
+    struct IReporterRegistry {
+        typedef std::map<std::string, IReporterFactory*> FactoryMap;
+
+        virtual ~IReporterRegistry();
+        virtual IStreamingReporter* create( std::string const& name, Ptr<IConfig> const& config ) const = 0;
+        virtual FactoryMap const& getFactories() const = 0;
+    };
+
+}
+
+#include <limits>
+#include <algorithm>
+
+namespace Catch {
+
+    inline std::size_t listTests( Config const& config ) {
+
+        TestSpec testSpec = config.testSpec();
+        if( config.testSpec().hasFilters() )
+            Catch::cout() << "Matching test cases:\n";
+        else {
+            Catch::cout() << "All available test cases:\n";
+            testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec();
+        }
+
+        std::size_t matchedTests = 0;
+        TextAttributes nameAttr, tagsAttr;
+        nameAttr.setInitialIndent( 2 ).setIndent( 4 );
+        tagsAttr.setIndent( 6 );
+
+        std::vector<TestCase> matchedTestCases;
+        getRegistryHub().getTestCaseRegistry().getFilteredTests( testSpec, config, matchedTestCases );
+        for( std::vector<TestCase>::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end();
+                it != itEnd;
+                ++it ) {
+            matchedTests++;
+            TestCaseInfo const& testCaseInfo = it->getTestCaseInfo();
+            Colour::Code colour = testCaseInfo.isHidden()
+                ? Colour::SecondaryText
+                : Colour::None;
+            Colour colourGuard( colour );
+
+            Catch::cout() << Text( testCaseInfo.name, nameAttr ) << std::endl;
+            if( !testCaseInfo.tags.empty() )
+                Catch::cout() << Text( testCaseInfo.tagsAsString, tagsAttr ) << std::endl;
+        }
+
+        if( !config.testSpec().hasFilters() )
+            Catch::cout() << pluralise( matchedTests, "test case" ) << "\n" << std::endl;
+        else
+            Catch::cout() << pluralise( matchedTests, "matching test case" ) << "\n" << std::endl;
+        return matchedTests;
+    }
+
+    inline std::size_t listTestsNamesOnly( Config const& config ) {
+        TestSpec testSpec = config.testSpec();
+        if( !config.testSpec().hasFilters() )
+            testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec();
+        std::size_t matchedTests = 0;
+        std::vector<TestCase> matchedTestCases;
+        getRegistryHub().getTestCaseRegistry().getFilteredTests( testSpec, config, matchedTestCases );
+        for( std::vector<TestCase>::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end();
+                it != itEnd;
+                ++it ) {
+            matchedTests++;
+            TestCaseInfo const& testCaseInfo = it->getTestCaseInfo();
+            Catch::cout() << testCaseInfo.name << std::endl;
+        }
+        return matchedTests;
+    }
+
+    struct TagInfo {
+        TagInfo() : count ( 0 ) {}
+        void add( std::string const& spelling ) {
+            ++count;
+            spellings.insert( spelling );
+        }
+        std::string all() const {
+            std::string out;
+            for( std::set<std::string>::const_iterator it = spellings.begin(), itEnd = spellings.end();
+                        it != itEnd;
+                        ++it )
+                out += "[" + *it + "]";
+            return out;
+        }
+        std::set<std::string> spellings;
+        std::size_t count;
+    };
+
+    inline std::size_t listTags( Config const& config ) {
+        TestSpec testSpec = config.testSpec();
+        if( config.testSpec().hasFilters() )
+            Catch::cout() << "Tags for matching test cases:\n";
+        else {
+            Catch::cout() << "All available tags:\n";
+            testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec();
+        }
+
+        std::map<std::string, TagInfo> tagCounts;
+
+        std::vector<TestCase> matchedTestCases;
+        getRegistryHub().getTestCaseRegistry().getFilteredTests( testSpec, config, matchedTestCases );
+        for( std::vector<TestCase>::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end();
+                it != itEnd;
+                ++it ) {
+            for( std::set<std::string>::const_iterator  tagIt = it->getTestCaseInfo().tags.begin(),
+                                                        tagItEnd = it->getTestCaseInfo().tags.end();
+                    tagIt != tagItEnd;
+                    ++tagIt ) {
+                std::string tagName = *tagIt;
+                std::string lcaseTagName = toLower( tagName );
+                std::map<std::string, TagInfo>::iterator countIt = tagCounts.find( lcaseTagName );
+                if( countIt == tagCounts.end() )
+                    countIt = tagCounts.insert( std::make_pair( lcaseTagName, TagInfo() ) ).first;
+                countIt->second.add( tagName );
+            }
+        }
+
+        for( std::map<std::string, TagInfo>::const_iterator countIt = tagCounts.begin(),
+                                                            countItEnd = tagCounts.end();
+                countIt != countItEnd;
+                ++countIt ) {
+            std::ostringstream oss;
+            oss << "  " << std::setw(2) << countIt->second.count << "  ";
+            Text wrapper( countIt->second.all(), TextAttributes()
+                                                    .setInitialIndent( 0 )
+                                                    .setIndent( oss.str().size() )
+                                                    .setWidth( CATCH_CONFIG_CONSOLE_WIDTH-10 ) );
+            Catch::cout() << oss.str() << wrapper << "\n";
+        }
+        Catch::cout() << pluralise( tagCounts.size(), "tag" ) << "\n" << std::endl;
+        return tagCounts.size();
+    }
+
+    inline std::size_t listReporters( Config const& /*config*/ ) {
+        Catch::cout() << "Available reporters:\n";
+        IReporterRegistry::FactoryMap const& factories = getRegistryHub().getReporterRegistry().getFactories();
+        IReporterRegistry::FactoryMap::const_iterator itBegin = factories.begin(), itEnd = factories.end(), it;
+        std::size_t maxNameLen = 0;
+        for(it = itBegin; it != itEnd; ++it )
+            maxNameLen = (std::max)( maxNameLen, it->first.size() );
+
+        for(it = itBegin; it != itEnd; ++it ) {
+            Text wrapper( it->second->getDescription(), TextAttributes()
+                                                        .setInitialIndent( 0 )
+                                                        .setIndent( 7+maxNameLen )
+                                                        .setWidth( CATCH_CONFIG_CONSOLE_WIDTH - maxNameLen-8 ) );
+            Catch::cout() << "  "
+                    << it->first
+                    << ":"
+                    << std::string( maxNameLen - it->first.size() + 2, ' ' )
+                    << wrapper << "\n";
+        }
+        Catch::cout() << std::endl;
+        return factories.size();
+    }
+
+    inline Option<std::size_t> list( Config const& config ) {
+        Option<std::size_t> listedCount;
+        if( config.listTests() )
+            listedCount = listedCount.valueOr(0) + listTests( config );
+        if( config.listTestNamesOnly() )
+            listedCount = listedCount.valueOr(0) + listTestsNamesOnly( config );
+        if( config.listTags() )
+            listedCount = listedCount.valueOr(0) + listTags( config );
+        if( config.listReporters() )
+            listedCount = listedCount.valueOr(0) + listReporters( config );
+        return listedCount;
+    }
+
+} // end namespace Catch
+
+// #included from: internal/catch_runner_impl.hpp
+#define TWOBLUECUBES_CATCH_RUNNER_IMPL_HPP_INCLUDED
+
+// #included from: catch_test_case_tracker.hpp
+#define TWOBLUECUBES_CATCH_TEST_CASE_TRACKER_HPP_INCLUDED
+
+#include <map>
+#include <string>
+#include <assert.h>
+
+namespace Catch {
+namespace SectionTracking {
+
+    class TrackedSection {
+
+        typedef std::map<std::string, TrackedSection> TrackedSections;
+
+    public:
+        enum RunState {
+            NotStarted,
+            Executing,
+            ExecutingChildren,
+            Completed
+        };
+
+        TrackedSection( std::string const& name, TrackedSection* parent )
+        :   m_name( name ), m_runState( NotStarted ), m_parent( parent )
+        {}
+
+        RunState runState() const { return m_runState; }
+
+        TrackedSection* findChild( std::string const& childName );
+        TrackedSection* acquireChild( std::string const& childName );
+
+        void enter() {
+            if( m_runState == NotStarted )
+                m_runState = Executing;
+        }
+        void leave();
+
+        TrackedSection* getParent() {
+            return m_parent;
+        }
+        bool hasChildren() const {
+            return !m_children.empty();
+        }
+
+    private:
+        std::string m_name;
+        RunState m_runState;
+        TrackedSections m_children;
+        TrackedSection* m_parent;
+    };
+
+    inline TrackedSection* TrackedSection::findChild( std::string const& childName ) {
+        TrackedSections::iterator it = m_children.find( childName );
+        return it != m_children.end()
+            ? &it->second
+            : NULL;
+    }
+    inline TrackedSection* TrackedSection::acquireChild( std::string const& childName ) {
+        if( TrackedSection* child = findChild( childName ) )
+            return child;
+        m_children.insert( std::make_pair( childName, TrackedSection( childName, this ) ) );
+        return findChild( childName );
+    }
+    inline void TrackedSection::leave() {
+        for( TrackedSections::const_iterator it = m_children.begin(), itEnd = m_children.end();
+                it != itEnd;
+                ++it )
+            if( it->second.runState() != Completed ) {
+                m_runState = ExecutingChildren;
+                return;
+            }
+        m_runState = Completed;
+    }
+
+    class TestCaseTracker {
+    public:
+        TestCaseTracker( std::string const& testCaseName )
+        :   m_testCase( testCaseName, NULL ),
+            m_currentSection( &m_testCase ),
+            m_completedASectionThisRun( false )
+        {}
+
+        bool enterSection( std::string const& name ) {
+            TrackedSection* child = m_currentSection->acquireChild( name );
+            if( m_completedASectionThisRun || child->runState() == TrackedSection::Completed )
+                return false;
+
+            m_currentSection = child;
+            m_currentSection->enter();
+            return true;
+        }
+        void leaveSection() {
+            m_currentSection->leave();
+            m_currentSection = m_currentSection->getParent();
+            assert( m_currentSection != NULL );
+            m_completedASectionThisRun = true;
+        }
+
+        bool currentSectionHasChildren() const {
+            return m_currentSection->hasChildren();
+        }
+        bool isCompleted() const {
+            return m_testCase.runState() == TrackedSection::Completed;
+        }
+
+        class Guard {
+        public:
+            Guard( TestCaseTracker& tracker ) : m_tracker( tracker ) {
+                m_tracker.enterTestCase();
+            }
+            ~Guard() {
+                m_tracker.leaveTestCase();
+            }
+        private:
+            Guard( Guard const& );
+            void operator = ( Guard const& );
+            TestCaseTracker& m_tracker;
+        };
+
+    private:
+        void enterTestCase() {
+            m_currentSection = &m_testCase;
+            m_completedASectionThisRun = false;
+            m_testCase.enter();
+        }
+        void leaveTestCase() {
+            m_testCase.leave();
+        }
+
+        TrackedSection m_testCase;
+        TrackedSection* m_currentSection;
+        bool m_completedASectionThisRun;
+    };
+
+} // namespace SectionTracking
+
+using SectionTracking::TestCaseTracker;
+
+} // namespace Catch
+
+// #included from: catch_fatal_condition.hpp
+#define TWOBLUECUBES_CATCH_FATAL_CONDITION_H_INCLUDED
+
+namespace Catch {
+
+    // Report the error condition then exit the process
+    inline void fatal( std::string const& message, int exitCode ) {
+        IContext& context = Catch::getCurrentContext();
+        IResultCapture* resultCapture = context.getResultCapture();
+        resultCapture->handleFatalErrorCondition( message );
+
+		if( Catch::alwaysTrue() ) // avoids "no return" warnings
+            exit( exitCode );
+    }
+
+} // namespace Catch
+
+#if defined ( CATCH_PLATFORM_WINDOWS ) /////////////////////////////////////////
+
+namespace Catch {
+
+    struct FatalConditionHandler {
+		void reset() {}
+	};
+
+} // namespace Catch
+
+#else // Not Windows - assumed to be POSIX compatible //////////////////////////
+
+#include <signal.h>
+
+namespace Catch {
+
+    struct SignalDefs { int id; const char* name; };
+    extern SignalDefs signalDefs[];
+    SignalDefs signalDefs[] = {
+            { SIGINT,  "SIGINT - Terminal interrupt signal" },
+            { SIGILL,  "SIGILL - Illegal instruction signal" },
+            { SIGFPE,  "SIGFPE - Floating point error signal" },
+            { SIGSEGV, "SIGSEGV - Segmentation violation signal" },
+            { SIGTERM, "SIGTERM - Termination request signal" },
+            { SIGABRT, "SIGABRT - Abort (abnormal termination) signal" }
+        };
+
+    struct FatalConditionHandler {
+
+        static void handleSignal( int sig ) {
+            for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i )
+                if( sig == signalDefs[i].id )
+                    fatal( signalDefs[i].name, -sig );
+            fatal( "<unknown signal>", -sig );
+        }
+
+        FatalConditionHandler() : m_isSet( true ) {
+            for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i )
+                signal( signalDefs[i].id, handleSignal );
+        }
+        ~FatalConditionHandler() {
+            reset();
+        }
+        void reset() {
+            if( m_isSet ) {
+                for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i )
+                    signal( signalDefs[i].id, SIG_DFL );
+                m_isSet = false;
+            }
+        }
+
+        bool m_isSet;
+    };
+
+} // namespace Catch
+
+#endif // not Windows
+
+#include <set>
+#include <string>
+
+namespace Catch {
+
+    class StreamRedirect {
+
+    public:
+        StreamRedirect( std::ostream& stream, std::string& targetString )
+        :   m_stream( stream ),
+            m_prevBuf( stream.rdbuf() ),
+            m_targetString( targetString )
+        {
+            stream.rdbuf( m_oss.rdbuf() );
+        }
+
+        ~StreamRedirect() {
+            m_targetString += m_oss.str();
+            m_stream.rdbuf( m_prevBuf );
+        }
+
+    private:
+        std::ostream& m_stream;
+        std::streambuf* m_prevBuf;
+        std::ostringstream m_oss;
+        std::string& m_targetString;
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    class RunContext : public IResultCapture, public IRunner {
+
+        RunContext( RunContext const& );
+        void operator =( RunContext const& );
+
+    public:
+
+        explicit RunContext( Ptr<IConfig const> const& config, Ptr<IStreamingReporter> const& reporter )
+        :   m_runInfo( config->name() ),
+            m_context( getCurrentMutableContext() ),
+            m_activeTestCase( NULL ),
+            m_config( config ),
+            m_reporter( reporter ),
+            m_prevRunner( m_context.getRunner() ),
+            m_prevResultCapture( m_context.getResultCapture() ),
+            m_prevConfig( m_context.getConfig() )
+        {
+            m_context.setRunner( this );
+            m_context.setConfig( m_config );
+            m_context.setResultCapture( this );
+            m_reporter->testRunStarting( m_runInfo );
+        }
+
+        virtual ~RunContext() {
+            m_reporter->testRunEnded( TestRunStats( m_runInfo, m_totals, aborting() ) );
+            m_context.setRunner( m_prevRunner );
+            m_context.setConfig( NULL );
+            m_context.setResultCapture( m_prevResultCapture );
+            m_context.setConfig( m_prevConfig );
+        }
+
+        void testGroupStarting( std::string const& testSpec, std::size_t groupIndex, std::size_t groupsCount ) {
+            m_reporter->testGroupStarting( GroupInfo( testSpec, groupIndex, groupsCount ) );
+        }
+        void testGroupEnded( std::string const& testSpec, Totals const& totals, std::size_t groupIndex, std::size_t groupsCount ) {
+            m_reporter->testGroupEnded( TestGroupStats( GroupInfo( testSpec, groupIndex, groupsCount ), totals, aborting() ) );
+        }
+
+        Totals runTest( TestCase const& testCase ) {
+            Totals prevTotals = m_totals;
+
+            std::string redirectedCout;
+            std::string redirectedCerr;
+
+            TestCaseInfo testInfo = testCase.getTestCaseInfo();
+
+            m_reporter->testCaseStarting( testInfo );
+
+            m_activeTestCase = &testCase;
+            m_testCaseTracker = TestCaseTracker( testInfo.name );
+
+            do {
+                do {
+                    runCurrentTest( redirectedCout, redirectedCerr );
+                }
+                while( !m_testCaseTracker->isCompleted() && !aborting() );
+            }
+            while( getCurrentContext().advanceGeneratorsForCurrentTest() && !aborting() );
+
+            Totals deltaTotals = m_totals.delta( prevTotals );
+            m_totals.testCases += deltaTotals.testCases;
+            m_reporter->testCaseEnded( TestCaseStats(   testInfo,
+                                                        deltaTotals,
+                                                        redirectedCout,
+                                                        redirectedCerr,
+                                                        aborting() ) );
+
+            m_activeTestCase = NULL;
+            m_testCaseTracker.reset();
+
+            return deltaTotals;
+        }
+
+        Ptr<IConfig const> config() const {
+            return m_config;
+        }
+
+    private: // IResultCapture
+
+        virtual void assertionEnded( AssertionResult const& result ) {
+            if( result.getResultType() == ResultWas::Ok ) {
+                m_totals.assertions.passed++;
+            }
+            else if( !result.isOk() ) {
+                m_totals.assertions.failed++;
+            }
+
+            if( m_reporter->assertionEnded( AssertionStats( result, m_messages, m_totals ) ) )
+                m_messages.clear();
+
+            // Reset working state
+            m_lastAssertionInfo = AssertionInfo( "", m_lastAssertionInfo.lineInfo, "{Unknown expression after the reported line}" , m_lastAssertionInfo.resultDisposition );
+            m_lastResult = result;
+        }
+
+        virtual bool sectionStarted (
+            SectionInfo const& sectionInfo,
+            Counts& assertions
+        )
+        {
+            std::ostringstream oss;
+            oss << sectionInfo.name << "@" << sectionInfo.lineInfo;
+
+            if( !m_testCaseTracker->enterSection( oss.str() ) )
+                return false;
+
+            m_lastAssertionInfo.lineInfo = sectionInfo.lineInfo;
+
+            m_reporter->sectionStarting( sectionInfo );
+
+            assertions = m_totals.assertions;
+
+            return true;
+        }
+        bool testForMissingAssertions( Counts& assertions ) {
+            if( assertions.total() != 0 ||
+                    !m_config->warnAboutMissingAssertions() ||
+                    m_testCaseTracker->currentSectionHasChildren() )
+                return false;
+            m_totals.assertions.failed++;
+            assertions.failed++;
+            return true;
+        }
+
+        virtual void sectionEnded( SectionInfo const& info, Counts const& prevAssertions, double _durationInSeconds ) {
+            if( std::uncaught_exception() ) {
+                m_unfinishedSections.push_back( UnfinishedSections( info, prevAssertions, _durationInSeconds ) );
+                return;
+            }
+
+            Counts assertions = m_totals.assertions - prevAssertions;
+            bool missingAssertions = testForMissingAssertions( assertions );
+
+            m_testCaseTracker->leaveSection();
+
+            m_reporter->sectionEnded( SectionStats( info, assertions, _durationInSeconds, missingAssertions ) );
+            m_messages.clear();
+        }
+
+        virtual void pushScopedMessage( MessageInfo const& message ) {
+            m_messages.push_back( message );
+        }
+
+        virtual void popScopedMessage( MessageInfo const& message ) {
+            m_messages.erase( std::remove( m_messages.begin(), m_messages.end(), message ), m_messages.end() );
+        }
+
+        virtual std::string getCurrentTestName() const {
+            return m_activeTestCase
+                ? m_activeTestCase->getTestCaseInfo().name
+                : "";
+        }
+
+        virtual const AssertionResult* getLastResult() const {
+            return &m_lastResult;
+        }
+
+        virtual void handleFatalErrorCondition( std::string const& message ) {
+            ResultBuilder resultBuilder = makeUnexpectedResultBuilder();
+            resultBuilder.setResultType( ResultWas::FatalErrorCondition );
+            resultBuilder << message;
+            resultBuilder.captureExpression();
+
+            handleUnfinishedSections();
+
+            // Recreate section for test case (as we will lose the one that was in scope)
+            TestCaseInfo const& testCaseInfo = m_activeTestCase->getTestCaseInfo();
+            SectionInfo testCaseSection( testCaseInfo.lineInfo, testCaseInfo.name, testCaseInfo.description );
+
+            Counts assertions;
+            assertions.failed = 1;
+            SectionStats testCaseSectionStats( testCaseSection, assertions, 0, false );
+            m_reporter->sectionEnded( testCaseSectionStats );
+
+            TestCaseInfo testInfo = m_activeTestCase->getTestCaseInfo();
+
+            Totals deltaTotals;
+            deltaTotals.testCases.failed = 1;
+            m_reporter->testCaseEnded( TestCaseStats(   testInfo,
+                                                        deltaTotals,
+                                                        "",
+                                                        "",
+                                                        false ) );
+            m_totals.testCases.failed++;
+            testGroupEnded( "", m_totals, 1, 1 );
+            m_reporter->testRunEnded( TestRunStats( m_runInfo, m_totals, false ) );
+        }
+
+    public:
+        // !TBD We need to do this another way!
+        bool aborting() const {
+            return m_totals.assertions.failed == static_cast<std::size_t>( m_config->abortAfter() );
+        }
+
+    private:
+
+        void runCurrentTest( std::string& redirectedCout, std::string& redirectedCerr ) {
+            TestCaseInfo const& testCaseInfo = m_activeTestCase->getTestCaseInfo();
+            SectionInfo testCaseSection( testCaseInfo.lineInfo, testCaseInfo.name, testCaseInfo.description );
+            m_reporter->sectionStarting( testCaseSection );
+            Counts prevAssertions = m_totals.assertions;
+            double duration = 0;
+            try {
+                m_lastAssertionInfo = AssertionInfo( "TEST_CASE", testCaseInfo.lineInfo, "", ResultDisposition::Normal );
+                TestCaseTracker::Guard guard( *m_testCaseTracker );
+
+                Timer timer;
+                timer.start();
+                if( m_reporter->getPreferences().shouldRedirectStdOut ) {
+                    StreamRedirect coutRedir( Catch::cout(), redirectedCout );
+                    StreamRedirect cerrRedir( Catch::cerr(), redirectedCerr );
+                    invokeActiveTestCase();
+                }
+                else {
+                    invokeActiveTestCase();
+                }
+                duration = timer.getElapsedSeconds();
+            }
+            catch( TestFailureException& ) {
+                // This just means the test was aborted due to failure
+            }
+            catch(...) {
+                makeUnexpectedResultBuilder().useActiveException();
+            }
+            handleUnfinishedSections();
+            m_messages.clear();
+
+            Counts assertions = m_totals.assertions - prevAssertions;
+            bool missingAssertions = testForMissingAssertions( assertions );
+
+            if( testCaseInfo.okToFail() ) {
+                std::swap( assertions.failedButOk, assertions.failed );
+                m_totals.assertions.failed -= assertions.failedButOk;
+                m_totals.assertions.failedButOk += assertions.failedButOk;
+            }
+
+            SectionStats testCaseSectionStats( testCaseSection, assertions, duration, missingAssertions );
+            m_reporter->sectionEnded( testCaseSectionStats );
+        }
+
+        void invokeActiveTestCase() {
+            FatalConditionHandler fatalConditionHandler; // Handle signals
+            m_activeTestCase->invoke();
+            fatalConditionHandler.reset();
+        }
+
+    private:
+
+        ResultBuilder makeUnexpectedResultBuilder() const {
+            return ResultBuilder(   m_lastAssertionInfo.macroName.c_str(),
+                                    m_lastAssertionInfo.lineInfo,
+                                    m_lastAssertionInfo.capturedExpression.c_str(),
+                                    m_lastAssertionInfo.resultDisposition );
+        }
+
+        void handleUnfinishedSections() {
+            // If sections ended prematurely due to an exception we stored their
+            // infos here so we can tear them down outside the unwind process.
+            for( std::vector<UnfinishedSections>::const_reverse_iterator it = m_unfinishedSections.rbegin(),
+                        itEnd = m_unfinishedSections.rend();
+                    it != itEnd;
+                    ++it )
+                sectionEnded( it->info, it->prevAssertions, it->durationInSeconds );
+            m_unfinishedSections.clear();
+        }
+
+        struct UnfinishedSections {
+            UnfinishedSections( SectionInfo const& _info, Counts const& _prevAssertions, double _durationInSeconds )
+            : info( _info ), prevAssertions( _prevAssertions ), durationInSeconds( _durationInSeconds )
+            {}
+
+            SectionInfo info;
+            Counts prevAssertions;
+            double durationInSeconds;
+        };
+
+        TestRunInfo m_runInfo;
+        IMutableContext& m_context;
+        TestCase const* m_activeTestCase;
+        Option<TestCaseTracker> m_testCaseTracker;
+        AssertionResult m_lastResult;
+
+        Ptr<IConfig const> m_config;
+        Totals m_totals;
+        Ptr<IStreamingReporter> m_reporter;
+        std::vector<MessageInfo> m_messages;
+        IRunner* m_prevRunner;
+        IResultCapture* m_prevResultCapture;
+        Ptr<IConfig const> m_prevConfig;
+        AssertionInfo m_lastAssertionInfo;
+        std::vector<UnfinishedSections> m_unfinishedSections;
+    };
+
+    IResultCapture& getResultCapture() {
+        if( IResultCapture* capture = getCurrentContext().getResultCapture() )
+            return *capture;
+        else
+            throw std::logic_error( "No result capture instance" );
+    }
+
+} // end namespace Catch
+
+// #included from: internal/catch_version.h
+#define TWOBLUECUBES_CATCH_VERSION_H_INCLUDED
+
+namespace Catch {
+
+    // Versioning information
+    struct Version {
+        Version(    unsigned int _majorVersion,
+                    unsigned int _minorVersion,
+                    unsigned int _patchNumber,
+                    std::string const& _branchName,
+                    unsigned int _buildNumber );
+
+        unsigned int const majorVersion;
+        unsigned int const minorVersion;
+        unsigned int const patchNumber;
+
+        // buildNumber is only used if branchName is not null
+        std::string const branchName;
+        unsigned int const buildNumber;
+
+        friend std::ostream& operator << ( std::ostream& os, Version const& version );
+
+    private:
+        void operator=( Version const& );
+    };
+
+    extern Version libraryVersion;
+}
+
+#include <fstream>
+#include <stdlib.h>
+#include <limits>
+
+namespace Catch {
+
+    class Runner {
+
+    public:
+        Runner( Ptr<Config> const& config )
+        :   m_config( config )
+        {
+            openStream();
+            makeReporter();
+        }
+
+        Totals runTests() {
+
+            RunContext context( m_config.get(), m_reporter );
+
+            Totals totals;
+
+            context.testGroupStarting( "all tests", 1, 1 ); // deprecated?
+
+            TestSpec testSpec = m_config->testSpec();
+            if( !testSpec.hasFilters() )
+                testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "~[.]" ).testSpec(); // All not hidden tests
+
+            std::vector<TestCase> testCases;
+            getRegistryHub().getTestCaseRegistry().getFilteredTests( testSpec, *m_config, testCases );
+
+            int testsRunForGroup = 0;
+            for( std::vector<TestCase>::const_iterator it = testCases.begin(), itEnd = testCases.end();
+                    it != itEnd;
+                    ++it ) {
+                testsRunForGroup++;
+                if( m_testsAlreadyRun.find( *it ) == m_testsAlreadyRun.end() ) {
+
+                    if( context.aborting() )
+                        break;
+
+                    totals += context.runTest( *it );
+                    m_testsAlreadyRun.insert( *it );
+                }
+            }
+            std::vector<TestCase> skippedTestCases;
+            getRegistryHub().getTestCaseRegistry().getFilteredTests( testSpec, *m_config, skippedTestCases, true );
+
+            for( std::vector<TestCase>::const_iterator it = skippedTestCases.begin(), itEnd = skippedTestCases.end();
+                    it != itEnd;
+                    ++it )
+                m_reporter->skipTest( *it );
+
+            context.testGroupEnded( "all tests", totals, 1, 1 );
+            return totals;
+        }
+
+    private:
+        void openStream() {
+            // Open output file, if specified
+            if( !m_config->getFilename().empty() ) {
+                m_ofs.open( m_config->getFilename().c_str() );
+                if( m_ofs.fail() ) {
+                    std::ostringstream oss;
+                    oss << "Unable to open file: '" << m_config->getFilename() << "'";
+                    throw std::domain_error( oss.str() );
+                }
+                m_config->setStreamBuf( m_ofs.rdbuf() );
+            }
+        }
+        void makeReporter() {
+            std::string reporterName = m_config->getReporterName().empty()
+                ? "console"
+                : m_config->getReporterName();
+
+            m_reporter = getRegistryHub().getReporterRegistry().create( reporterName, m_config.get() );
+            if( !m_reporter ) {
+                std::ostringstream oss;
+                oss << "No reporter registered with name: '" << reporterName << "'";
+                throw std::domain_error( oss.str() );
+            }
+        }
+
+    private:
+        Ptr<Config> m_config;
+        std::ofstream m_ofs;
+        Ptr<IStreamingReporter> m_reporter;
+        std::set<TestCase> m_testsAlreadyRun;
+    };
+
+    class Session : NonCopyable {
+        static bool alreadyInstantiated;
+
+    public:
+
+        struct OnUnusedOptions { enum DoWhat { Ignore, Fail }; };
+
+        Session()
+        : m_cli( makeCommandLineParser() ) {
+            if( alreadyInstantiated ) {
+                std::string msg = "Only one instance of Catch::Session can ever be used";
+                Catch::cerr() << msg << std::endl;
+                throw std::logic_error( msg );
+            }
+            alreadyInstantiated = true;
+        }
+        ~Session() {
+            Catch::cleanUp();
+        }
+
+        void showHelp( std::string const& processName ) {
+            Catch::cout() << "\nCatch v" << libraryVersion << "\n";
+
+            m_cli.usage( Catch::cout(), processName );
+            Catch::cout() << "For more detail usage please see the project docs\n" << std::endl;
+        }
+
+        int applyCommandLine( int argc, char* const argv[], OnUnusedOptions::DoWhat unusedOptionBehaviour = OnUnusedOptions::Fail ) {
+            try {
+                m_cli.setThrowOnUnrecognisedTokens( unusedOptionBehaviour == OnUnusedOptions::Fail );
+                m_unusedTokens = m_cli.parseInto( argc, argv, m_configData );
+                if( m_configData.showHelp )
+                    showHelp( m_configData.processName );
+                m_config.reset();
+            }
+            catch( std::exception& ex ) {
+                {
+                    Colour colourGuard( Colour::Red );
+                    Catch::cerr()
+                        << "\nError(s) in input:\n"
+                        << Text( ex.what(), TextAttributes().setIndent(2) )
+                        << "\n\n";
+                }
+                m_cli.usage( Catch::cout(), m_configData.processName );
+                return (std::numeric_limits<int>::max)();
+            }
+            return 0;
+        }
+
+        void useConfigData( ConfigData const& _configData ) {
+            m_configData = _configData;
+            m_config.reset();
+        }
+
+        int run( int argc, char* const argv[] ) {
+
+            int returnCode = applyCommandLine( argc, argv );
+            if( returnCode == 0 )
+                returnCode = run();
+            return returnCode;
+        }
+
+        int run() {
+            if( m_configData.showHelp )
+                return 0;
+
+            try
+            {
+                config(); // Force config to be constructed
+
+                std::srand( m_configData.rngSeed );
+
+                Runner runner( m_config );
+
+                // Handle list request
+                if( Option<std::size_t> listed = list( config() ) )
+                    return static_cast<int>( *listed );
+
+                return static_cast<int>( runner.runTests().assertions.failed );
+            }
+            catch( std::exception& ex ) {
+                Catch::cerr() << ex.what() << std::endl;
+                return (std::numeric_limits<int>::max)();
+            }
+        }
+
+        Clara::CommandLine<ConfigData> const& cli() const {
+            return m_cli;
+        }
+        std::vector<Clara::Parser::Token> const& unusedTokens() const {
+            return m_unusedTokens;
+        }
+        ConfigData& configData() {
+            return m_configData;
+        }
+        Config& config() {
+            if( !m_config )
+                m_config = new Config( m_configData );
+            return *m_config;
+        }
+
+    private:
+        Clara::CommandLine<ConfigData> m_cli;
+        std::vector<Clara::Parser::Token> m_unusedTokens;
+        ConfigData m_configData;
+        Ptr<Config> m_config;
+    };
+
+    bool Session::alreadyInstantiated = false;
+
+} // end namespace Catch
+
+// #included from: catch_registry_hub.hpp
+#define TWOBLUECUBES_CATCH_REGISTRY_HUB_HPP_INCLUDED
+
+// #included from: catch_test_case_registry_impl.hpp
+#define TWOBLUECUBES_CATCH_TEST_CASE_REGISTRY_IMPL_HPP_INCLUDED
+
+#include <vector>
+#include <set>
+#include <sstream>
+#include <iostream>
+#include <algorithm>
+
+namespace Catch {
+
+    class TestRegistry : public ITestCaseRegistry {
+        struct LexSort {
+            bool operator() (TestCase i,TestCase j) const { return (i<j);}
+        };
+        struct RandomNumberGenerator {
+            int operator()( int n ) const { return std::rand() % n; }
+        };
+
+    public:
+        TestRegistry() : m_unnamedCount( 0 ) {}
+        virtual ~TestRegistry();
+
+        virtual void registerTest( TestCase const& testCase ) {
+            std::string name = testCase.getTestCaseInfo().name;
+            if( name == "" ) {
+                std::ostringstream oss;
+                oss << "Anonymous test case " << ++m_unnamedCount;
+                return registerTest( testCase.withName( oss.str() ) );
+            }
+
+            if( m_functions.find( testCase ) == m_functions.end() ) {
+                m_functions.insert( testCase );
+                m_functionsInOrder.push_back( testCase );
+                if( !testCase.isHidden() )
+                    m_nonHiddenFunctions.push_back( testCase );
+            }
+            else {
+                TestCase const& prev = *m_functions.find( testCase );
+                {
+                    Colour colourGuard( Colour::Red );
+                    Catch::cerr()   << "error: TEST_CASE( \"" << name << "\" ) already defined.\n"
+                                << "\tFirst seen at " << prev.getTestCaseInfo().lineInfo << "\n"
+                                << "\tRedefined at " << testCase.getTestCaseInfo().lineInfo << std::endl;
+                }
+                exit(1);
+            }
+        }
+
+        virtual std::vector<TestCase> const& getAllTests() const {
+            return m_functionsInOrder;
+        }
+
+        virtual std::vector<TestCase> const& getAllNonHiddenTests() const {
+            return m_nonHiddenFunctions;
+        }
+
+        virtual void getFilteredTests( TestSpec const& testSpec, IConfig const& config, std::vector<TestCase>& matchingTestCases, bool negated = false ) const {
+
+            for( std::vector<TestCase>::const_iterator  it = m_functionsInOrder.begin(),
+                                                        itEnd = m_functionsInOrder.end();
+                    it != itEnd;
+                    ++it ) {
+                bool includeTest = testSpec.matches( *it ) && ( config.allowThrows() || !it->throws() );
+                if( includeTest != negated )
+                    matchingTestCases.push_back( *it );
+            }
+            sortTests( config, matchingTestCases );
+        }
+
+    private:
+
+        static void sortTests( IConfig const& config, std::vector<TestCase>& matchingTestCases ) {
+
+            switch( config.runOrder() ) {
+                case RunTests::InLexicographicalOrder:
+                    std::sort( matchingTestCases.begin(), matchingTestCases.end(), LexSort() );
+                    break;
+                case RunTests::InRandomOrder:
+                {
+                    RandomNumberGenerator rng;
+                    std::random_shuffle( matchingTestCases.begin(), matchingTestCases.end(), rng );
+                }
+                    break;
+                case RunTests::InDeclarationOrder:
+                    // already in declaration order
+                    break;
+            }
+        }
+        std::set<TestCase> m_functions;
+        std::vector<TestCase> m_functionsInOrder;
+        std::vector<TestCase> m_nonHiddenFunctions;
+        size_t m_unnamedCount;
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    class FreeFunctionTestCase : public SharedImpl<ITestCase> {
+    public:
+
+        FreeFunctionTestCase( TestFunction fun ) : m_fun( fun ) {}
+
+        virtual void invoke() const {
+            m_fun();
+        }
+
+    private:
+        virtual ~FreeFunctionTestCase();
+
+        TestFunction m_fun;
+    };
+
+    inline std::string extractClassName( std::string const& classOrQualifiedMethodName ) {
+        std::string className = classOrQualifiedMethodName;
+        if( startsWith( className, "&" ) )
+        {
+            std::size_t lastColons = className.rfind( "::" );
+            std::size_t penultimateColons = className.rfind( "::", lastColons-1 );
+            if( penultimateColons == std::string::npos )
+                penultimateColons = 1;
+            className = className.substr( penultimateColons, lastColons-penultimateColons );
+        }
+        return className;
+    }
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    AutoReg::AutoReg(   TestFunction function,
+                        SourceLineInfo const& lineInfo,
+                        NameAndDesc const& nameAndDesc ) {
+        registerTestCase( new FreeFunctionTestCase( function ), "", nameAndDesc, lineInfo );
+    }
+
+    AutoReg::~AutoReg() {}
+
+    void AutoReg::registerTestCase( ITestCase* testCase,
+                                    char const* classOrQualifiedMethodName,
+                                    NameAndDesc const& nameAndDesc,
+                                    SourceLineInfo const& lineInfo ) {
+
+        getMutableRegistryHub().registerTest
+            ( makeTestCase( testCase,
+                            extractClassName( classOrQualifiedMethodName ),
+                            nameAndDesc.name,
+                            nameAndDesc.description,
+                            lineInfo ) );
+    }
+
+} // end namespace Catch
+
+// #included from: catch_reporter_registry.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_REGISTRY_HPP_INCLUDED
+
+#include <map>
+
+namespace Catch {
+
+    class ReporterRegistry : public IReporterRegistry {
+
+    public:
+
+        virtual ~ReporterRegistry() {
+            deleteAllValues( m_factories );
+        }
+
+        virtual IStreamingReporter* create( std::string const& name, Ptr<IConfig> const& config ) const {
+            FactoryMap::const_iterator it =  m_factories.find( name );
+            if( it == m_factories.end() )
+                return NULL;
+            return it->second->create( ReporterConfig( config ) );
+        }
+
+        void registerReporter( std::string const& name, IReporterFactory* factory ) {
+            m_factories.insert( std::make_pair( name, factory ) );
+        }
+
+        FactoryMap const& getFactories() const {
+            return m_factories;
+        }
+
+    private:
+        FactoryMap m_factories;
+    };
+}
+
+// #included from: catch_exception_translator_registry.hpp
+#define TWOBLUECUBES_CATCH_EXCEPTION_TRANSLATOR_REGISTRY_HPP_INCLUDED
+
+#ifdef __OBJC__
+#import "Foundation/Foundation.h"
+#endif
+
+namespace Catch {
+
+    class ExceptionTranslatorRegistry : public IExceptionTranslatorRegistry {
+    public:
+        ~ExceptionTranslatorRegistry() {
+            deleteAll( m_translators );
+        }
+
+        virtual void registerTranslator( const IExceptionTranslator* translator ) {
+            m_translators.push_back( translator );
+        }
+
+        virtual std::string translateActiveException() const {
+            try {
+#ifdef __OBJC__
+                // In Objective-C try objective-c exceptions first
+                @try {
+                    throw;
+                }
+                @catch (NSException *exception) {
+                    return Catch::toString( [exception description] );
+                }
+#else
+                throw;
+#endif
+            }
+            catch( TestFailureException& ) {
+                throw;
+            }
+            catch( std::exception& ex ) {
+                return ex.what();
+            }
+            catch( std::string& msg ) {
+                return msg;
+            }
+            catch( const char* msg ) {
+                return msg;
+            }
+            catch(...) {
+                return tryTranslators( m_translators.begin() );
+            }
+        }
+
+        std::string tryTranslators( std::vector<const IExceptionTranslator*>::const_iterator it ) const {
+            if( it == m_translators.end() )
+                return "Unknown exception";
+
+            try {
+                return (*it)->translate();
+            }
+            catch(...) {
+                return tryTranslators( it+1 );
+            }
+        }
+
+    private:
+        std::vector<const IExceptionTranslator*> m_translators;
+    };
+}
+
+namespace Catch {
+
+    namespace {
+
+        class RegistryHub : public IRegistryHub, public IMutableRegistryHub {
+
+            RegistryHub( RegistryHub const& );
+            void operator=( RegistryHub const& );
+
+        public: // IRegistryHub
+            RegistryHub() {
+            }
+            virtual IReporterRegistry const& getReporterRegistry() const {
+                return m_reporterRegistry;
+            }
+            virtual ITestCaseRegistry const& getTestCaseRegistry() const {
+                return m_testCaseRegistry;
+            }
+            virtual IExceptionTranslatorRegistry& getExceptionTranslatorRegistry() {
+                return m_exceptionTranslatorRegistry;
+            }
+
+        public: // IMutableRegistryHub
+            virtual void registerReporter( std::string const& name, IReporterFactory* factory ) {
+                m_reporterRegistry.registerReporter( name, factory );
+            }
+            virtual void registerTest( TestCase const& testInfo ) {
+                m_testCaseRegistry.registerTest( testInfo );
+            }
+            virtual void registerTranslator( const IExceptionTranslator* translator ) {
+                m_exceptionTranslatorRegistry.registerTranslator( translator );
+            }
+
+        private:
+            TestRegistry m_testCaseRegistry;
+            ReporterRegistry m_reporterRegistry;
+            ExceptionTranslatorRegistry m_exceptionTranslatorRegistry;
+        };
+
+        // Single, global, instance
+        inline RegistryHub*& getTheRegistryHub() {
+            static RegistryHub* theRegistryHub = NULL;
+            if( !theRegistryHub )
+                theRegistryHub = new RegistryHub();
+            return theRegistryHub;
+        }
+    }
+
+    IRegistryHub& getRegistryHub() {
+        return *getTheRegistryHub();
+    }
+    IMutableRegistryHub& getMutableRegistryHub() {
+        return *getTheRegistryHub();
+    }
+    void cleanUp() {
+        delete getTheRegistryHub();
+        getTheRegistryHub() = NULL;
+        cleanUpContext();
+    }
+    std::string translateActiveException() {
+        return getRegistryHub().getExceptionTranslatorRegistry().translateActiveException();
+    }
+
+} // end namespace Catch
+
+// #included from: catch_notimplemented_exception.hpp
+#define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_HPP_INCLUDED
+
+#include <ostream>
+
+namespace Catch {
+
+    NotImplementedException::NotImplementedException( SourceLineInfo const& lineInfo )
+    :   m_lineInfo( lineInfo ) {
+        std::ostringstream oss;
+        oss << lineInfo << ": function ";
+        oss << "not implemented";
+        m_what = oss.str();
+    }
+
+    const char* NotImplementedException::what() const CATCH_NOEXCEPT {
+        return m_what.c_str();
+    }
+
+} // end namespace Catch
+
+// #included from: catch_context_impl.hpp
+#define TWOBLUECUBES_CATCH_CONTEXT_IMPL_HPP_INCLUDED
+
+// #included from: catch_stream.hpp
+#define TWOBLUECUBES_CATCH_STREAM_HPP_INCLUDED
+
+// #included from: catch_streambuf.h
+#define TWOBLUECUBES_CATCH_STREAMBUF_H_INCLUDED
+
+#include <streambuf>
+
+namespace Catch {
+
+    class StreamBufBase : public std::streambuf {
+    public:
+        virtual ~StreamBufBase() CATCH_NOEXCEPT;
+    };
+}
+
+#include <stdexcept>
+#include <cstdio>
+#include <iostream>
+
+namespace Catch {
+
+    template<typename WriterF, size_t bufferSize=256>
+    class StreamBufImpl : public StreamBufBase {
+        char data[bufferSize];
+        WriterF m_writer;
+
+    public:
+        StreamBufImpl() {
+            setp( data, data + sizeof(data) );
+        }
+
+        ~StreamBufImpl() CATCH_NOEXCEPT {
+            sync();
+        }
+
+    private:
+        int overflow( int c ) {
+            sync();
+
+            if( c != EOF ) {
+                if( pbase() == epptr() )
+                    m_writer( std::string( 1, static_cast<char>( c ) ) );
+                else
+                    sputc( static_cast<char>( c ) );
+            }
+            return 0;
+        }
+
+        int sync() {
+            if( pbase() != pptr() ) {
+                m_writer( std::string( pbase(), static_cast<std::string::size_type>( pptr() - pbase() ) ) );
+                setp( pbase(), epptr() );
+            }
+            return 0;
+        }
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    struct OutputDebugWriter {
+
+        void operator()( std::string const&str ) {
+            writeToDebugConsole( str );
+        }
+    };
+
+    Stream::Stream()
+    : streamBuf( NULL ), isOwned( false )
+    {}
+
+    Stream::Stream( std::streambuf* _streamBuf, bool _isOwned )
+    : streamBuf( _streamBuf ), isOwned( _isOwned )
+    {}
+
+    void Stream::release() {
+        if( isOwned ) {
+            delete streamBuf;
+            streamBuf = NULL;
+            isOwned = false;
+        }
+    }
+
+#ifndef CATCH_CONFIG_NOSTDOUT // If you #define this you must implement this functions
+    std::ostream& cout() {
+        return std::cout;
+    }
+    std::ostream& cerr() {
+        return std::cerr;
+    }
+#endif
+}
+
+namespace Catch {
+
+    class Context : public IMutableContext {
+
+        Context() : m_config( NULL ), m_runner( NULL ), m_resultCapture( NULL ) {}
+        Context( Context const& );
+        void operator=( Context const& );
+
+    public: // IContext
+        virtual IResultCapture* getResultCapture() {
+            return m_resultCapture;
+        }
+        virtual IRunner* getRunner() {
+            return m_runner;
+        }
+        virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) {
+            return getGeneratorsForCurrentTest()
+            .getGeneratorInfo( fileInfo, totalSize )
+            .getCurrentIndex();
+        }
+        virtual bool advanceGeneratorsForCurrentTest() {
+            IGeneratorsForTest* generators = findGeneratorsForCurrentTest();
+            return generators && generators->moveNext();
+        }
+
+        virtual Ptr<IConfig const> getConfig() const {
+            return m_config;
+        }
+
+    public: // IMutableContext
+        virtual void setResultCapture( IResultCapture* resultCapture ) {
+            m_resultCapture = resultCapture;
+        }
+        virtual void setRunner( IRunner* runner ) {
+            m_runner = runner;
+        }
+        virtual void setConfig( Ptr<IConfig const> const& config ) {
+            m_config = config;
+        }
+
+        friend IMutableContext& getCurrentMutableContext();
+
+    private:
+        IGeneratorsForTest* findGeneratorsForCurrentTest() {
+            std::string testName = getResultCapture()->getCurrentTestName();
+
+            std::map<std::string, IGeneratorsForTest*>::const_iterator it =
+                m_generatorsByTestName.find( testName );
+            return it != m_generatorsByTestName.end()
+                ? it->second
+                : NULL;
+        }
+
+        IGeneratorsForTest& getGeneratorsForCurrentTest() {
+            IGeneratorsForTest* generators = findGeneratorsForCurrentTest();
+            if( !generators ) {
+                std::string testName = getResultCapture()->getCurrentTestName();
+                generators = createGeneratorsForTest();
+                m_generatorsByTestName.insert( std::make_pair( testName, generators ) );
+            }
+            return *generators;
+        }
+
+    private:
+        Ptr<IConfig const> m_config;
+        IRunner* m_runner;
+        IResultCapture* m_resultCapture;
+        std::map<std::string, IGeneratorsForTest*> m_generatorsByTestName;
+    };
+
+    namespace {
+        Context* currentContext = NULL;
+    }
+    IMutableContext& getCurrentMutableContext() {
+        if( !currentContext )
+            currentContext = new Context();
+        return *currentContext;
+    }
+    IContext& getCurrentContext() {
+        return getCurrentMutableContext();
+    }
+
+    Stream createStream( std::string const& streamName ) {
+        if( streamName == "stdout" ) return Stream( Catch::cout().rdbuf(), false );
+        if( streamName == "stderr" ) return Stream( Catch::cerr().rdbuf(), false );
+        if( streamName == "debug" ) return Stream( new StreamBufImpl<OutputDebugWriter>, true );
+
+        throw std::domain_error( "Unknown stream: " + streamName );
+    }
+
+    void cleanUpContext() {
+        delete currentContext;
+        currentContext = NULL;
+    }
+}
+
+// #included from: catch_console_colour_impl.hpp
+#define TWOBLUECUBES_CATCH_CONSOLE_COLOUR_IMPL_HPP_INCLUDED
+
+namespace Catch {
+    namespace {
+
+        struct IColourImpl {
+            virtual ~IColourImpl() {}
+            virtual void use( Colour::Code _colourCode ) = 0;
+        };
+
+        struct NoColourImpl : IColourImpl {
+            void use( Colour::Code ) {}
+
+            static IColourImpl* instance() {
+                static NoColourImpl s_instance;
+                return &s_instance;
+            }
+        };
+
+    } // anon namespace
+} // namespace Catch
+
+#if !defined( CATCH_CONFIG_COLOUR_NONE ) && !defined( CATCH_CONFIG_COLOUR_WINDOWS ) && !defined( CATCH_CONFIG_COLOUR_ANSI )
+#   ifdef CATCH_PLATFORM_WINDOWS
+#       define CATCH_CONFIG_COLOUR_WINDOWS
+#   else
+#       define CATCH_CONFIG_COLOUR_ANSI
+#   endif
+#endif
+
+#if defined ( CATCH_CONFIG_COLOUR_WINDOWS ) /////////////////////////////////////////
+
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+
+#ifdef __AFXDLL
+#include <AfxWin.h>
+#else
+#include <windows.h>
+#endif
+
+namespace Catch {
+namespace {
+
+    class Win32ColourImpl : public IColourImpl {
+    public:
+        Win32ColourImpl() : stdoutHandle( GetStdHandle(STD_OUTPUT_HANDLE) )
+        {
+            CONSOLE_SCREEN_BUFFER_INFO csbiInfo;
+            GetConsoleScreenBufferInfo( stdoutHandle, &csbiInfo );
+            originalAttributes = csbiInfo.wAttributes;
+        }
+
+        virtual void use( Colour::Code _colourCode ) {
+            switch( _colourCode ) {
+                case Colour::None:      return setTextAttribute( originalAttributes );
+                case Colour::White:     return setTextAttribute( FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE );
+                case Colour::Red:       return setTextAttribute( FOREGROUND_RED );
+                case Colour::Green:     return setTextAttribute( FOREGROUND_GREEN );
+                case Colour::Blue:      return setTextAttribute( FOREGROUND_BLUE );
+                case Colour::Cyan:      return setTextAttribute( FOREGROUND_BLUE | FOREGROUND_GREEN );
+                case Colour::Yellow:    return setTextAttribute( FOREGROUND_RED | FOREGROUND_GREEN );
+                case Colour::Grey:      return setTextAttribute( 0 );
+
+                case Colour::LightGrey:     return setTextAttribute( FOREGROUND_INTENSITY );
+                case Colour::BrightRed:     return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_RED );
+                case Colour::BrightGreen:   return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_GREEN );
+                case Colour::BrightWhite:   return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE );
+
+                case Colour::Bright: throw std::logic_error( "not a colour" );
+            }
+        }
+
+    private:
+        void setTextAttribute( WORD _textAttribute ) {
+            SetConsoleTextAttribute( stdoutHandle, _textAttribute );
+        }
+        HANDLE stdoutHandle;
+        WORD originalAttributes;
+    };
+
+    IColourImpl* platformColourInstance() {
+        static Win32ColourImpl s_instance;
+        return &s_instance;
+    }
+
+} // end anon namespace
+} // end namespace Catch
+
+#elif defined( CATCH_CONFIG_COLOUR_ANSI ) //////////////////////////////////////
+
+#include <unistd.h>
+
+namespace Catch {
+namespace {
+
+    // use POSIX/ ANSI console terminal codes
+    // Thanks to Adam Strzelecki for original contribution
+    // (http://github.com/nanoant)
+    // https://github.com/philsquared/Catch/pull/131
+    class PosixColourImpl : public IColourImpl {
+    public:
+        virtual void use( Colour::Code _colourCode ) {
+            switch( _colourCode ) {
+                case Colour::None:
+                case Colour::White:     return setColour( "[0m" );
+                case Colour::Red:       return setColour( "[0;31m" );
+                case Colour::Green:     return setColour( "[0;32m" );
+                case Colour::Blue:      return setColour( "[0:34m" );
+                case Colour::Cyan:      return setColour( "[0;36m" );
+                case Colour::Yellow:    return setColour( "[0;33m" );
+                case Colour::Grey:      return setColour( "[1;30m" );
+
+                case Colour::LightGrey:     return setColour( "[0;37m" );
+                case Colour::BrightRed:     return setColour( "[1;31m" );
+                case Colour::BrightGreen:   return setColour( "[1;32m" );
+                case Colour::BrightWhite:   return setColour( "[1;37m" );
+
+                case Colour::Bright: throw std::logic_error( "not a colour" );
+            }
+        }
+        static IColourImpl* instance() {
+            static PosixColourImpl s_instance;
+            return &s_instance;
+        }
+
+    private:
+        void setColour( const char* _escapeCode ) {
+            Catch::cout() << '\033' << _escapeCode;
+        }
+    };
+
+    IColourImpl* platformColourInstance() {
+        Ptr<IConfig const> config = getCurrentContext().getConfig();
+        return (config && config->forceColour()) || isatty(STDOUT_FILENO)
+            ? PosixColourImpl::instance()
+            : NoColourImpl::instance();
+    }
+
+} // end anon namespace
+} // end namespace Catch
+
+#else  // not Windows or ANSI ///////////////////////////////////////////////
+
+namespace Catch {
+
+    static IColourImpl* platformColourInstance() { return NoColourImpl::instance(); }
+
+} // end namespace Catch
+
+#endif // Windows/ ANSI/ None
+
+namespace Catch {
+
+    Colour::Colour( Code _colourCode ) : m_moved( false ) { use( _colourCode ); }
+    Colour::Colour( Colour const& _other ) : m_moved( false ) { const_cast<Colour&>( _other ).m_moved = true; }
+    Colour::~Colour(){ if( !m_moved ) use( None ); }
+
+    void Colour::use( Code _colourCode ) {
+        static IColourImpl* impl = isDebuggerActive()
+            ? NoColourImpl::instance()
+            : platformColourInstance();
+        impl->use( _colourCode );
+    }
+
+} // end namespace Catch
+
+// #included from: catch_generators_impl.hpp
+#define TWOBLUECUBES_CATCH_GENERATORS_IMPL_HPP_INCLUDED
+
+#include <vector>
+#include <string>
+#include <map>
+
+namespace Catch {
+
+    struct GeneratorInfo : IGeneratorInfo {
+
+        GeneratorInfo( std::size_t size )
+        :   m_size( size ),
+            m_currentIndex( 0 )
+        {}
+
+        bool moveNext() {
+            if( ++m_currentIndex == m_size ) {
+                m_currentIndex = 0;
+                return false;
+            }
+            return true;
+        }
+
+        std::size_t getCurrentIndex() const {
+            return m_currentIndex;
+        }
+
+        std::size_t m_size;
+        std::size_t m_currentIndex;
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    class GeneratorsForTest : public IGeneratorsForTest {
+
+    public:
+        ~GeneratorsForTest() {
+            deleteAll( m_generatorsInOrder );
+        }
+
+        IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) {
+            std::map<std::string, IGeneratorInfo*>::const_iterator it = m_generatorsByName.find( fileInfo );
+            if( it == m_generatorsByName.end() ) {
+                IGeneratorInfo* info = new GeneratorInfo( size );
+                m_generatorsByName.insert( std::make_pair( fileInfo, info ) );
+                m_generatorsInOrder.push_back( info );
+                return *info;
+            }
+            return *it->second;
+        }
+
+        bool moveNext() {
+            std::vector<IGeneratorInfo*>::const_iterator it = m_generatorsInOrder.begin();
+            std::vector<IGeneratorInfo*>::const_iterator itEnd = m_generatorsInOrder.end();
+            for(; it != itEnd; ++it ) {
+                if( (*it)->moveNext() )
+                    return true;
+            }
+            return false;
+        }
+
+    private:
+        std::map<std::string, IGeneratorInfo*> m_generatorsByName;
+        std::vector<IGeneratorInfo*> m_generatorsInOrder;
+    };
+
+    IGeneratorsForTest* createGeneratorsForTest()
+    {
+        return new GeneratorsForTest();
+    }
+
+} // end namespace Catch
+
+// #included from: catch_assertionresult.hpp
+#define TWOBLUECUBES_CATCH_ASSERTIONRESULT_HPP_INCLUDED
+
+namespace Catch {
+
+    AssertionInfo::AssertionInfo(   std::string const& _macroName,
+                                    SourceLineInfo const& _lineInfo,
+                                    std::string const& _capturedExpression,
+                                    ResultDisposition::Flags _resultDisposition )
+    :   macroName( _macroName ),
+        lineInfo( _lineInfo ),
+        capturedExpression( _capturedExpression ),
+        resultDisposition( _resultDisposition )
+    {}
+
+    AssertionResult::AssertionResult() {}
+
+    AssertionResult::AssertionResult( AssertionInfo const& info, AssertionResultData const& data )
+    :   m_info( info ),
+        m_resultData( data )
+    {}
+
+    AssertionResult::~AssertionResult() {}
+
+    // Result was a success
+    bool AssertionResult::succeeded() const {
+        return Catch::isOk( m_resultData.resultType );
+    }
+
+    // Result was a success, or failure is suppressed
+    bool AssertionResult::isOk() const {
+        return Catch::isOk( m_resultData.resultType ) || shouldSuppressFailure( m_info.resultDisposition );
+    }
+
+    ResultWas::OfType AssertionResult::getResultType() const {
+        return m_resultData.resultType;
+    }
+
+    bool AssertionResult::hasExpression() const {
+        return !m_info.capturedExpression.empty();
+    }
+
+    bool AssertionResult::hasMessage() const {
+        return !m_resultData.message.empty();
+    }
+
+    std::string AssertionResult::getExpression() const {
+        if( isFalseTest( m_info.resultDisposition ) )
+            return "!" + m_info.capturedExpression;
+        else
+            return m_info.capturedExpression;
+    }
+    std::string AssertionResult::getExpressionInMacro() const {
+        if( m_info.macroName.empty() )
+            return m_info.capturedExpression;
+        else
+            return m_info.macroName + "( " + m_info.capturedExpression + " )";
+    }
+
+    bool AssertionResult::hasExpandedExpression() const {
+        return hasExpression() && getExpandedExpression() != getExpression();
+    }
+
+    std::string AssertionResult::getExpandedExpression() const {
+        return m_resultData.reconstructedExpression;
+    }
+
+    std::string AssertionResult::getMessage() const {
+        return m_resultData.message;
+    }
+    SourceLineInfo AssertionResult::getSourceInfo() const {
+        return m_info.lineInfo;
+    }
+
+    std::string AssertionResult::getTestMacroName() const {
+        return m_info.macroName;
+    }
+
+} // end namespace Catch
+
+// #included from: catch_test_case_info.hpp
+#define TWOBLUECUBES_CATCH_TEST_CASE_INFO_HPP_INCLUDED
+
+namespace Catch {
+
+    inline TestCaseInfo::SpecialProperties parseSpecialTag( std::string const& tag ) {
+        if( startsWith( tag, "." ) ||
+            tag == "hide" ||
+            tag == "!hide" )
+            return TestCaseInfo::IsHidden;
+        else if( tag == "!throws" )
+            return TestCaseInfo::Throws;
+        else if( tag == "!shouldfail" )
+            return TestCaseInfo::ShouldFail;
+        else if( tag == "!mayfail" )
+            return TestCaseInfo::MayFail;
+        else
+            return TestCaseInfo::None;
+    }
+    inline bool isReservedTag( std::string const& tag ) {
+        return parseSpecialTag( tag ) == TestCaseInfo::None && tag.size() > 0 && !isalnum( tag[0] );
+    }
+    inline void enforceNotReservedTag( std::string const& tag, SourceLineInfo const& _lineInfo ) {
+        if( isReservedTag( tag ) ) {
+            {
+                Colour colourGuard( Colour::Red );
+                Catch::cerr()
+                    << "Tag name [" << tag << "] not allowed.\n"
+                    << "Tag names starting with non alpha-numeric characters are reserved\n";
+            }
+            {
+                Colour colourGuard( Colour::FileName );
+                Catch::cerr() << _lineInfo << std::endl;
+            }
+            exit(1);
+        }
+    }
+
+    TestCase makeTestCase(  ITestCase* _testCase,
+                            std::string const& _className,
+                            std::string const& _name,
+                            std::string const& _descOrTags,
+                            SourceLineInfo const& _lineInfo )
+    {
+        bool isHidden( startsWith( _name, "./" ) ); // Legacy support
+
+        // Parse out tags
+        std::set<std::string> tags;
+        std::string desc, tag;
+        bool inTag = false;
+        for( std::size_t i = 0; i < _descOrTags.size(); ++i ) {
+            char c = _descOrTags[i];
+            if( !inTag ) {
+                if( c == '[' )
+                    inTag = true;
+                else
+                    desc += c;
+            }
+            else {
+                if( c == ']' ) {
+                    TestCaseInfo::SpecialProperties prop = parseSpecialTag( tag );
+                    if( prop == TestCaseInfo::IsHidden )
+                        isHidden = true;
+                    else if( prop == TestCaseInfo::None )
+                        enforceNotReservedTag( tag, _lineInfo );
+
+                    tags.insert( tag );
+                    tag.clear();
+                    inTag = false;
+                }
+                else
+                    tag += c;
+            }
+        }
+        if( isHidden ) {
+            tags.insert( "hide" );
+            tags.insert( "." );
+        }
+
+        TestCaseInfo info( _name, _className, desc, tags, _lineInfo );
+        return TestCase( _testCase, info );
+    }
+
+    TestCaseInfo::TestCaseInfo( std::string const& _name,
+                                std::string const& _className,
+                                std::string const& _description,
+                                std::set<std::string> const& _tags,
+                                SourceLineInfo const& _lineInfo )
+    :   name( _name ),
+        className( _className ),
+        description( _description ),
+        tags( _tags ),
+        lineInfo( _lineInfo ),
+        properties( None )
+    {
+        std::ostringstream oss;
+        for( std::set<std::string>::const_iterator it = _tags.begin(), itEnd = _tags.end(); it != itEnd; ++it ) {
+            oss << "[" << *it << "]";
+            std::string lcaseTag = toLower( *it );
+            properties = static_cast<SpecialProperties>( properties | parseSpecialTag( lcaseTag ) );
+            lcaseTags.insert( lcaseTag );
+        }
+        tagsAsString = oss.str();
+    }
+
+    TestCaseInfo::TestCaseInfo( TestCaseInfo const& other )
+    :   name( other.name ),
+        className( other.className ),
+        description( other.description ),
+        tags( other.tags ),
+        lcaseTags( other.lcaseTags ),
+        tagsAsString( other.tagsAsString ),
+        lineInfo( other.lineInfo ),
+        properties( other.properties )
+    {}
+
+    bool TestCaseInfo::isHidden() const {
+        return ( properties & IsHidden ) != 0;
+    }
+    bool TestCaseInfo::throws() const {
+        return ( properties & Throws ) != 0;
+    }
+    bool TestCaseInfo::okToFail() const {
+        return ( properties & (ShouldFail | MayFail ) ) != 0;
+    }
+    bool TestCaseInfo::expectedToFail() const {
+        return ( properties & (ShouldFail ) ) != 0;
+    }
+
+    TestCase::TestCase( ITestCase* testCase, TestCaseInfo const& info ) : TestCaseInfo( info ), test( testCase ) {}
+
+    TestCase::TestCase( TestCase const& other )
+    :   TestCaseInfo( other ),
+        test( other.test )
+    {}
+
+    TestCase TestCase::withName( std::string const& _newName ) const {
+        TestCase other( *this );
+        other.name = _newName;
+        return other;
+    }
+
+    void TestCase::swap( TestCase& other ) {
+        test.swap( other.test );
+        name.swap( other.name );
+        className.swap( other.className );
+        description.swap( other.description );
+        tags.swap( other.tags );
+        lcaseTags.swap( other.lcaseTags );
+        tagsAsString.swap( other.tagsAsString );
+        std::swap( TestCaseInfo::properties, static_cast<TestCaseInfo&>( other ).properties );
+        std::swap( lineInfo, other.lineInfo );
+    }
+
+    void TestCase::invoke() const {
+        test->invoke();
+    }
+
+    bool TestCase::operator == ( TestCase const& other ) const {
+        return  test.get() == other.test.get() &&
+                name == other.name &&
+                className == other.className;
+    }
+
+    bool TestCase::operator < ( TestCase const& other ) const {
+        return name < other.name;
+    }
+    TestCase& TestCase::operator = ( TestCase const& other ) {
+        TestCase temp( other );
+        swap( temp );
+        return *this;
+    }
+
+    TestCaseInfo const& TestCase::getTestCaseInfo() const
+    {
+        return *this;
+    }
+
+} // end namespace Catch
+
+// #included from: catch_version.hpp
+#define TWOBLUECUBES_CATCH_VERSION_HPP_INCLUDED
+
+namespace Catch {
+
+    Version::Version
+        (   unsigned int _majorVersion,
+            unsigned int _minorVersion,
+            unsigned int _patchNumber,
+            std::string const& _branchName,
+            unsigned int _buildNumber )
+    :   majorVersion( _majorVersion ),
+        minorVersion( _minorVersion ),
+        patchNumber( _patchNumber ),
+        branchName( _branchName ),
+        buildNumber( _buildNumber )
+    {}
+
+    std::ostream& operator << ( std::ostream& os, Version const& version ) {
+        os  << version.majorVersion << "."
+            << version.minorVersion << "."
+            << version.patchNumber;
+
+        if( !version.branchName.empty() ) {
+            os  << "-" << version.branchName
+                << "." << version.buildNumber;
+        }
+        return os;
+    }
+
+    Version libraryVersion( 1, 2, 1, "", 0 );
+
+}
+
+// #included from: catch_message.hpp
+#define TWOBLUECUBES_CATCH_MESSAGE_HPP_INCLUDED
+
+namespace Catch {
+
+    MessageInfo::MessageInfo(   std::string const& _macroName,
+                                SourceLineInfo const& _lineInfo,
+                                ResultWas::OfType _type )
+    :   macroName( _macroName ),
+        lineInfo( _lineInfo ),
+        type( _type ),
+        sequence( ++globalCount )
+    {}
+
+    // This may need protecting if threading support is added
+    unsigned int MessageInfo::globalCount = 0;
+
+    ////////////////////////////////////////////////////////////////////////////
+
+    ScopedMessage::ScopedMessage( MessageBuilder const& builder )
+    : m_info( builder.m_info )
+    {
+        m_info.message = builder.m_stream.str();
+        getResultCapture().pushScopedMessage( m_info );
+    }
+    ScopedMessage::ScopedMessage( ScopedMessage const& other )
+    : m_info( other.m_info )
+    {}
+
+    ScopedMessage::~ScopedMessage() {
+        getResultCapture().popScopedMessage( m_info );
+    }
+
+} // end namespace Catch
+
+// #included from: catch_legacy_reporter_adapter.hpp
+#define TWOBLUECUBES_CATCH_LEGACY_REPORTER_ADAPTER_HPP_INCLUDED
+
+// #included from: catch_legacy_reporter_adapter.h
+#define TWOBLUECUBES_CATCH_LEGACY_REPORTER_ADAPTER_H_INCLUDED
+
+namespace Catch
+{
+    // Deprecated
+    struct IReporter : IShared {
+        virtual ~IReporter();
+
+        virtual bool shouldRedirectStdout() const = 0;
+
+        virtual void StartTesting() = 0;
+        virtual void EndTesting( Totals const& totals ) = 0;
+        virtual void StartGroup( std::string const& groupName ) = 0;
+        virtual void EndGroup( std::string const& groupName, Totals const& totals ) = 0;
+        virtual void StartTestCase( TestCaseInfo const& testInfo ) = 0;
+        virtual void EndTestCase( TestCaseInfo const& testInfo, Totals const& totals, std::string const& stdOut, std::string const& stdErr ) = 0;
+        virtual void StartSection( std::string const& sectionName, std::string const& description ) = 0;
+        virtual void EndSection( std::string const& sectionName, Counts const& assertions ) = 0;
+        virtual void NoAssertionsInSection( std::string const& sectionName ) = 0;
+        virtual void NoAssertionsInTestCase( std::string const& testName ) = 0;
+        virtual void Aborted() = 0;
+        virtual void Result( AssertionResult const& result ) = 0;
+    };
+
+    class LegacyReporterAdapter : public SharedImpl<IStreamingReporter>
+    {
+    public:
+        LegacyReporterAdapter( Ptr<IReporter> const& legacyReporter );
+        virtual ~LegacyReporterAdapter();
+
+        virtual ReporterPreferences getPreferences() const;
+        virtual void noMatchingTestCases( std::string const& );
+        virtual void testRunStarting( TestRunInfo const& );
+        virtual void testGroupStarting( GroupInfo const& groupInfo );
+        virtual void testCaseStarting( TestCaseInfo const& testInfo );
+        virtual void sectionStarting( SectionInfo const& sectionInfo );
+        virtual void assertionStarting( AssertionInfo const& );
+        virtual bool assertionEnded( AssertionStats const& assertionStats );
+        virtual void sectionEnded( SectionStats const& sectionStats );
+        virtual void testCaseEnded( TestCaseStats const& testCaseStats );
+        virtual void testGroupEnded( TestGroupStats const& testGroupStats );
+        virtual void testRunEnded( TestRunStats const& testRunStats );
+        virtual void skipTest( TestCaseInfo const& );
+
+    private:
+        Ptr<IReporter> m_legacyReporter;
+    };
+}
+
+namespace Catch
+{
+    LegacyReporterAdapter::LegacyReporterAdapter( Ptr<IReporter> const& legacyReporter )
+    :   m_legacyReporter( legacyReporter )
+    {}
+    LegacyReporterAdapter::~LegacyReporterAdapter() {}
+
+    ReporterPreferences LegacyReporterAdapter::getPreferences() const {
+        ReporterPreferences prefs;
+        prefs.shouldRedirectStdOut = m_legacyReporter->shouldRedirectStdout();
+        return prefs;
+    }
+
+    void LegacyReporterAdapter::noMatchingTestCases( std::string const& ) {}
+    void LegacyReporterAdapter::testRunStarting( TestRunInfo const& ) {
+        m_legacyReporter->StartTesting();
+    }
+    void LegacyReporterAdapter::testGroupStarting( GroupInfo const& groupInfo ) {
+        m_legacyReporter->StartGroup( groupInfo.name );
+    }
+    void LegacyReporterAdapter::testCaseStarting( TestCaseInfo const& testInfo ) {
+        m_legacyReporter->StartTestCase( testInfo );
+    }
+    void LegacyReporterAdapter::sectionStarting( SectionInfo const& sectionInfo ) {
+        m_legacyReporter->StartSection( sectionInfo.name, sectionInfo.description );
+    }
+    void LegacyReporterAdapter::assertionStarting( AssertionInfo const& ) {
+        // Not on legacy interface
+    }
+
+    bool LegacyReporterAdapter::assertionEnded( AssertionStats const& assertionStats ) {
+        if( assertionStats.assertionResult.getResultType() != ResultWas::Ok ) {
+            for( std::vector<MessageInfo>::const_iterator it = assertionStats.infoMessages.begin(), itEnd = assertionStats.infoMessages.end();
+                    it != itEnd;
+                    ++it ) {
+                if( it->type == ResultWas::Info ) {
+                    ResultBuilder rb( it->macroName.c_str(), it->lineInfo, "", ResultDisposition::Normal );
+                    rb << it->message;
+                    rb.setResultType( ResultWas::Info );
+                    AssertionResult result = rb.build();
+                    m_legacyReporter->Result( result );
+                }
+            }
+        }
+        m_legacyReporter->Result( assertionStats.assertionResult );
+        return true;
+    }
+    void LegacyReporterAdapter::sectionEnded( SectionStats const& sectionStats ) {
+        if( sectionStats.missingAssertions )
+            m_legacyReporter->NoAssertionsInSection( sectionStats.sectionInfo.name );
+        m_legacyReporter->EndSection( sectionStats.sectionInfo.name, sectionStats.assertions );
+    }
+    void LegacyReporterAdapter::testCaseEnded( TestCaseStats const& testCaseStats ) {
+        m_legacyReporter->EndTestCase
+            (   testCaseStats.testInfo,
+                testCaseStats.totals,
+                testCaseStats.stdOut,
+                testCaseStats.stdErr );
+    }
+    void LegacyReporterAdapter::testGroupEnded( TestGroupStats const& testGroupStats ) {
+        if( testGroupStats.aborting )
+            m_legacyReporter->Aborted();
+        m_legacyReporter->EndGroup( testGroupStats.groupInfo.name, testGroupStats.totals );
+    }
+    void LegacyReporterAdapter::testRunEnded( TestRunStats const& testRunStats ) {
+        m_legacyReporter->EndTesting( testRunStats.totals );
+    }
+    void LegacyReporterAdapter::skipTest( TestCaseInfo const& ) {
+    }
+}
+
+// #included from: catch_timer.hpp
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++11-long-long"
+#endif
+
+#ifdef CATCH_PLATFORM_WINDOWS
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+
+namespace Catch {
+
+    namespace {
+#ifdef CATCH_PLATFORM_WINDOWS
+        uint64_t getCurrentTicks() {
+            static uint64_t hz=0, hzo=0;
+            if (!hz) {
+                QueryPerformanceFrequency( reinterpret_cast<LARGE_INTEGER*>( &hz ) );
+                QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &hzo ) );
+            }
+            uint64_t t;
+            QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &t ) );
+            return ((t-hzo)*1000000)/hz;
+        }
+#else
+        uint64_t getCurrentTicks() {
+            timeval t;
+            gettimeofday(&t,NULL);
+            return static_cast<uint64_t>( t.tv_sec ) * 1000000ull + static_cast<uint64_t>( t.tv_usec );
+        }
+#endif
+    }
+
+    void Timer::start() {
+        m_ticks = getCurrentTicks();
+    }
+    unsigned int Timer::getElapsedMicroseconds() const {
+        return static_cast<unsigned int>(getCurrentTicks() - m_ticks);
+    }
+    unsigned int Timer::getElapsedMilliseconds() const {
+        return static_cast<unsigned int>(getElapsedMicroseconds()/1000);
+    }
+    double Timer::getElapsedSeconds() const {
+        return getElapsedMicroseconds()/1000000.0;
+    }
+
+} // namespace Catch
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+// #included from: catch_common.hpp
+#define TWOBLUECUBES_CATCH_COMMON_HPP_INCLUDED
+
+namespace Catch {
+
+    bool startsWith( std::string const& s, std::string const& prefix ) {
+        return s.size() >= prefix.size() && s.substr( 0, prefix.size() ) == prefix;
+    }
+    bool endsWith( std::string const& s, std::string const& suffix ) {
+        return s.size() >= suffix.size() && s.substr( s.size()-suffix.size(), suffix.size() ) == suffix;
+    }
+    bool contains( std::string const& s, std::string const& infix ) {
+        return s.find( infix ) != std::string::npos;
+    }
+    void toLowerInPlace( std::string& s ) {
+        std::transform( s.begin(), s.end(), s.begin(), ::tolower );
+    }
+    std::string toLower( std::string const& s ) {
+        std::string lc = s;
+        toLowerInPlace( lc );
+        return lc;
+    }
+    std::string trim( std::string const& str ) {
+        static char const* whitespaceChars = "\n\r\t ";
+        std::string::size_type start = str.find_first_not_of( whitespaceChars );
+        std::string::size_type end = str.find_last_not_of( whitespaceChars );
+
+        return start != std::string::npos ? str.substr( start, 1+end-start ) : "";
+    }
+
+    bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis ) {
+        bool replaced = false;
+        std::size_t i = str.find( replaceThis );
+        while( i != std::string::npos ) {
+            replaced = true;
+            str = str.substr( 0, i ) + withThis + str.substr( i+replaceThis.size() );
+            if( i < str.size()-withThis.size() )
+                i = str.find( replaceThis, i+withThis.size() );
+            else
+                i = std::string::npos;
+        }
+        return replaced;
+    }
+
+    pluralise::pluralise( std::size_t count, std::string const& label )
+    :   m_count( count ),
+        m_label( label )
+    {}
+
+    std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser ) {
+        os << pluraliser.m_count << " " << pluraliser.m_label;
+        if( pluraliser.m_count != 1 )
+            os << "s";
+        return os;
+    }
+
+    SourceLineInfo::SourceLineInfo() : line( 0 ){}
+    SourceLineInfo::SourceLineInfo( char const* _file, std::size_t _line )
+    :   file( _file ),
+        line( _line )
+    {}
+    SourceLineInfo::SourceLineInfo( SourceLineInfo const& other )
+    :   file( other.file ),
+        line( other.line )
+    {}
+    bool SourceLineInfo::empty() const {
+        return file.empty();
+    }
+    bool SourceLineInfo::operator == ( SourceLineInfo const& other ) const {
+        return line == other.line && file == other.file;
+    }
+    bool SourceLineInfo::operator < ( SourceLineInfo const& other ) const {
+        return line < other.line || ( line == other.line  && file < other.file );
+    }
+
+    std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ) {
+#ifndef __GNUG__
+        os << info.file << "(" << info.line << ")";
+#else
+        os << info.file << ":" << info.line;
+#endif
+        return os;
+    }
+
+    void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo ) {
+        std::ostringstream oss;
+        oss << locationInfo << ": Internal Catch error: '" << message << "'";
+        if( alwaysTrue() )
+            throw std::logic_error( oss.str() );
+    }
+}
+
+// #included from: catch_section.hpp
+#define TWOBLUECUBES_CATCH_SECTION_HPP_INCLUDED
+
+namespace Catch {
+
+    SectionInfo::SectionInfo
+        (   SourceLineInfo const& _lineInfo,
+            std::string const& _name,
+            std::string const& _description )
+    :   name( _name ),
+        description( _description ),
+        lineInfo( _lineInfo )
+    {}
+
+    Section::Section( SectionInfo const& info )
+    :   m_info( info ),
+        m_sectionIncluded( getResultCapture().sectionStarted( m_info, m_assertions ) )
+    {
+        m_timer.start();
+    }
+
+    Section::~Section() {
+        if( m_sectionIncluded )
+            getResultCapture().sectionEnded( m_info, m_assertions, m_timer.getElapsedSeconds() );
+    }
+
+    // This indicates whether the section should be executed or not
+    Section::operator bool() const {
+        return m_sectionIncluded;
+    }
+
+} // end namespace Catch
+
+// #included from: catch_debugger.hpp
+#define TWOBLUECUBES_CATCH_DEBUGGER_HPP_INCLUDED
+
+#include <iostream>
+
+#ifdef CATCH_PLATFORM_MAC
+
+    #include <assert.h>
+    #include <stdbool.h>
+    #include <sys/types.h>
+    #include <unistd.h>
+    #include <sys/sysctl.h>
+
+    namespace Catch{
+
+        // The following function is taken directly from the following technical note:
+        // http://developer.apple.com/library/mac/#qa/qa2004/qa1361.html
+
+        // Returns true if the current process is being debugged (either
+        // running under the debugger or has a debugger attached post facto).
+        bool isDebuggerActive(){
+
+            int                 mib[4];
+            struct kinfo_proc   info;
+            size_t              size;
+
+            // Initialize the flags so that, if sysctl fails for some bizarre
+            // reason, we get a predictable result.
+
+            info.kp_proc.p_flag = 0;
+
+            // Initialize mib, which tells sysctl the info we want, in this case
+            // we're looking for information about a specific process ID.
+
+            mib[0] = CTL_KERN;
+            mib[1] = KERN_PROC;
+            mib[2] = KERN_PROC_PID;
+            mib[3] = getpid();
+
+            // Call sysctl.
+
+            size = sizeof(info);
+            if( sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0) != 0 ) {
+                Catch::cerr() << "\n** Call to sysctl failed - unable to determine if debugger is active **\n" << std::endl;
+                return false;
+            }
+
+            // We're being debugged if the P_TRACED flag is set.
+
+            return ( (info.kp_proc.p_flag & P_TRACED) != 0 );
+        }
+    } // namespace Catch
+
+#elif defined(_MSC_VER)
+    extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent();
+    namespace Catch {
+        bool isDebuggerActive() {
+            return IsDebuggerPresent() != 0;
+        }
+    }
+#elif defined(__MINGW32__)
+    extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent();
+    namespace Catch {
+        bool isDebuggerActive() {
+            return IsDebuggerPresent() != 0;
+        }
+    }
+#else
+    namespace Catch {
+       inline bool isDebuggerActive() { return false; }
+    }
+#endif // Platform
+
+#ifdef CATCH_PLATFORM_WINDOWS
+    extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA( const char* );
+    namespace Catch {
+        void writeToDebugConsole( std::string const& text ) {
+            ::OutputDebugStringA( text.c_str() );
+        }
+    }
+#else
+    namespace Catch {
+        void writeToDebugConsole( std::string const& text ) {
+            // !TBD: Need a version for Mac/ XCode and other IDEs
+            Catch::cout() << text;
+        }
+    }
+#endif // Platform
+
+// #included from: catch_tostring.hpp
+#define TWOBLUECUBES_CATCH_TOSTRING_HPP_INCLUDED
+
+namespace Catch {
+
+namespace Detail {
+
+    std::string unprintableString = "{?}";
+
+    namespace {
+        struct Endianness {
+            enum Arch { Big, Little };
+
+            static Arch which() {
+                union _{
+                    int asInt;
+                    char asChar[sizeof (int)];
+                } u;
+
+                u.asInt = 1;
+                return ( u.asChar[sizeof(int)-1] == 1 ) ? Big : Little;
+            }
+        };
+    }
+
+    std::string rawMemoryToString( const void *object, std::size_t size )
+    {
+        // Reverse order for little endian architectures
+        int i = 0, end = static_cast<int>( size ), inc = 1;
+        if( Endianness::which() == Endianness::Little ) {
+            i = end-1;
+            end = inc = -1;
+        }
+
+        unsigned char const *bytes = static_cast<unsigned char const *>(object);
+        std::ostringstream os;
+        os << "0x" << std::setfill('0') << std::hex;
+        for( ; i != end; i += inc )
+             os << std::setw(2) << static_cast<unsigned>(bytes[i]);
+       return os.str();
+    }
+}
+
+std::string toString( std::string const& value ) {
+    std::string s = value;
+    if( getCurrentContext().getConfig()->showInvisibles() ) {
+        for(size_t i = 0; i < s.size(); ++i ) {
+            std::string subs;
+            switch( s[i] ) {
+            case '\n': subs = "\\n"; break;
+            case '\t': subs = "\\t"; break;
+            default: break;
+            }
+            if( !subs.empty() ) {
+                s = s.substr( 0, i ) + subs + s.substr( i+1 );
+                ++i;
+            }
+        }
+    }
+    return "\"" + s + "\"";
+}
+std::string toString( std::wstring const& value ) {
+
+    std::string s;
+    s.reserve( value.size() );
+    for(size_t i = 0; i < value.size(); ++i )
+        s += value[i] <= 0xff ? static_cast<char>( value[i] ) : '?';
+    return Catch::toString( s );
+}
+
+std::string toString( const char* const value ) {
+    return value ? Catch::toString( std::string( value ) ) : std::string( "{null string}" );
+}
+
+std::string toString( char* const value ) {
+    return Catch::toString( static_cast<const char*>( value ) );
+}
+
+std::string toString( const wchar_t* const value )
+{
+	return value ? Catch::toString( std::wstring(value) ) : std::string( "{null string}" );
+}
+
+std::string toString( wchar_t* const value )
+{
+	return Catch::toString( static_cast<const wchar_t*>( value ) );
+}
+
+std::string toString( int value ) {
+    std::ostringstream oss;
+    oss << value;
+    if( value >= 255 )
+        oss << " (0x" << std::hex << value << ")";
+    return oss.str();
+}
+
+std::string toString( unsigned long value ) {
+    std::ostringstream oss;
+    oss << value;
+    if( value >= 255 )
+        oss << " (0x" << std::hex << value << ")";
+    return oss.str();
+}
+
+std::string toString( unsigned int value ) {
+    return Catch::toString( static_cast<unsigned long>( value ) );
+}
+
+template<typename T>
+std::string fpToString( T value, int precision ) {
+    std::ostringstream oss;
+    oss << std::setprecision( precision )
+        << std::fixed
+        << value;
+    std::string d = oss.str();
+    std::size_t i = d.find_last_not_of( '0' );
+    if( i != std::string::npos && i != d.size()-1 ) {
+        if( d[i] == '.' )
+            i++;
+        d = d.substr( 0, i+1 );
+    }
+    return d;
+}
+
+std::string toString( const double value ) {
+    return fpToString( value, 10 );
+}
+std::string toString( const float value ) {
+    return fpToString( value, 5 ) + "f";
+}
+
+std::string toString( bool value ) {
+    return value ? "true" : "false";
+}
+
+std::string toString( char value ) {
+    return value < ' '
+        ? toString( static_cast<unsigned int>( value ) )
+        : Detail::makeString( value );
+}
+
+std::string toString( signed char value ) {
+    return toString( static_cast<char>( value ) );
+}
+
+std::string toString( unsigned char value ) {
+    return toString( static_cast<char>( value ) );
+}
+
+#ifdef CATCH_CONFIG_CPP11_NULLPTR
+std::string toString( std::nullptr_t ) {
+    return "nullptr";
+}
+#endif
+
+#ifdef __OBJC__
+    std::string toString( NSString const * const& nsstring ) {
+        if( !nsstring )
+            return "nil";
+        return "@" + toString([nsstring UTF8String]);
+    }
+    std::string toString( NSString * CATCH_ARC_STRONG const& nsstring ) {
+        if( !nsstring )
+            return "nil";
+        return "@" + toString([nsstring UTF8String]);
+    }
+    std::string toString( NSObject* const& nsObject ) {
+        return toString( [nsObject description] );
+    }
+#endif
+
+} // end namespace Catch
+
+// #included from: catch_result_builder.hpp
+#define TWOBLUECUBES_CATCH_RESULT_BUILDER_HPP_INCLUDED
+
+namespace Catch {
+
+    ResultBuilder::ResultBuilder(   char const* macroName,
+                                    SourceLineInfo const& lineInfo,
+                                    char const* capturedExpression,
+                                    ResultDisposition::Flags resultDisposition )
+    :   m_assertionInfo( macroName, lineInfo, capturedExpression, resultDisposition ),
+        m_shouldDebugBreak( false ),
+        m_shouldThrow( false )
+    {}
+
+    ResultBuilder& ResultBuilder::setResultType( ResultWas::OfType result ) {
+        m_data.resultType = result;
+        return *this;
+    }
+    ResultBuilder& ResultBuilder::setResultType( bool result ) {
+        m_data.resultType = result ? ResultWas::Ok : ResultWas::ExpressionFailed;
+        return *this;
+    }
+    ResultBuilder& ResultBuilder::setLhs( std::string const& lhs ) {
+        m_exprComponents.lhs = lhs;
+        return *this;
+    }
+    ResultBuilder& ResultBuilder::setRhs( std::string const& rhs ) {
+        m_exprComponents.rhs = rhs;
+        return *this;
+    }
+    ResultBuilder& ResultBuilder::setOp( std::string const& op ) {
+        m_exprComponents.op = op;
+        return *this;
+    }
+
+    void ResultBuilder::endExpression() {
+        m_exprComponents.testFalse = isFalseTest( m_assertionInfo.resultDisposition );
+        captureExpression();
+    }
+
+    void ResultBuilder::useActiveException( ResultDisposition::Flags resultDisposition ) {
+        m_assertionInfo.resultDisposition = resultDisposition;
+        m_stream.oss << Catch::translateActiveException();
+        captureResult( ResultWas::ThrewException );
+    }
+
+    void ResultBuilder::captureResult( ResultWas::OfType resultType ) {
+        setResultType( resultType );
+        captureExpression();
+    }
+
+    void ResultBuilder::captureExpression() {
+        AssertionResult result = build();
+        getResultCapture().assertionEnded( result );
+
+        if( !result.isOk() ) {
+            if( getCurrentContext().getConfig()->shouldDebugBreak() )
+                m_shouldDebugBreak = true;
+            if( getCurrentContext().getRunner()->aborting() || (m_assertionInfo.resultDisposition & ResultDisposition::Normal) )
+                m_shouldThrow = true;
+        }
+    }
+    void ResultBuilder::react() {
+        if( m_shouldThrow )
+            throw Catch::TestFailureException();
+    }
+
+    bool ResultBuilder::shouldDebugBreak() const { return m_shouldDebugBreak; }
+    bool ResultBuilder::allowThrows() const { return getCurrentContext().getConfig()->allowThrows(); }
+
+    AssertionResult ResultBuilder::build() const
+    {
+        assert( m_data.resultType != ResultWas::Unknown );
+
+        AssertionResultData data = m_data;
+
+        // Flip bool results if testFalse is set
+        if( m_exprComponents.testFalse ) {
+            if( data.resultType == ResultWas::Ok )
+                data.resultType = ResultWas::ExpressionFailed;
+            else if( data.resultType == ResultWas::ExpressionFailed )
+                data.resultType = ResultWas::Ok;
+        }
+
+        data.message = m_stream.oss.str();
+        data.reconstructedExpression = reconstructExpression();
+        if( m_exprComponents.testFalse ) {
+            if( m_exprComponents.op == "" )
+                data.reconstructedExpression = "!" + data.reconstructedExpression;
+            else
+                data.reconstructedExpression = "!(" + data.reconstructedExpression + ")";
+        }
+        return AssertionResult( m_assertionInfo, data );
+    }
+    std::string ResultBuilder::reconstructExpression() const {
+        if( m_exprComponents.op == "" )
+            return m_exprComponents.lhs.empty() ? m_assertionInfo.capturedExpression : m_exprComponents.op + m_exprComponents.lhs;
+        else if( m_exprComponents.op == "matches" )
+            return m_exprComponents.lhs + " " + m_exprComponents.rhs;
+        else if( m_exprComponents.op != "!" ) {
+            if( m_exprComponents.lhs.size() + m_exprComponents.rhs.size() < 40 &&
+                m_exprComponents.lhs.find("\n") == std::string::npos &&
+                m_exprComponents.rhs.find("\n") == std::string::npos )
+                return m_exprComponents.lhs + " " + m_exprComponents.op + " " + m_exprComponents.rhs;
+            else
+                return m_exprComponents.lhs + "\n" + m_exprComponents.op + "\n" + m_exprComponents.rhs;
+        }
+        else
+            return "{can't expand - use " + m_assertionInfo.macroName + "_FALSE( " + m_assertionInfo.capturedExpression.substr(1) + " ) instead of " + m_assertionInfo.macroName + "( " + m_assertionInfo.capturedExpression + " ) for better diagnostics}";
+    }
+
+} // end namespace Catch
+
+// #included from: catch_tag_alias_registry.hpp
+#define TWOBLUECUBES_CATCH_TAG_ALIAS_REGISTRY_HPP_INCLUDED
+
+// #included from: catch_tag_alias_registry.h
+#define TWOBLUECUBES_CATCH_TAG_ALIAS_REGISTRY_H_INCLUDED
+
+#include <map>
+
+namespace Catch {
+
+    class TagAliasRegistry : public ITagAliasRegistry {
+    public:
+        virtual ~TagAliasRegistry();
+        virtual Option<TagAlias> find( std::string const& alias ) const;
+        virtual std::string expandAliases( std::string const& unexpandedTestSpec ) const;
+        void add( char const* alias, char const* tag, SourceLineInfo const& lineInfo );
+        static TagAliasRegistry& get();
+
+    private:
+        std::map<std::string, TagAlias> m_registry;
+    };
+
+} // end namespace Catch
+
+#include <map>
+#include <iostream>
+
+namespace Catch {
+
+    TagAliasRegistry::~TagAliasRegistry() {}
+
+    Option<TagAlias> TagAliasRegistry::find( std::string const& alias ) const {
+        std::map<std::string, TagAlias>::const_iterator it = m_registry.find( alias );
+        if( it != m_registry.end() )
+            return it->second;
+        else
+            return Option<TagAlias>();
+    }
+
+    std::string TagAliasRegistry::expandAliases( std::string const& unexpandedTestSpec ) const {
+        std::string expandedTestSpec = unexpandedTestSpec;
+        for( std::map<std::string, TagAlias>::const_iterator it = m_registry.begin(), itEnd = m_registry.end();
+                it != itEnd;
+                ++it ) {
+            std::size_t pos = expandedTestSpec.find( it->first );
+            if( pos != std::string::npos ) {
+                expandedTestSpec =  expandedTestSpec.substr( 0, pos ) +
+                                    it->second.tag +
+                                    expandedTestSpec.substr( pos + it->first.size() );
+            }
+        }
+        return expandedTestSpec;
+    }
+
+    void TagAliasRegistry::add( char const* alias, char const* tag, SourceLineInfo const& lineInfo ) {
+
+        if( !startsWith( alias, "[@" ) || !endsWith( alias, "]" ) ) {
+            std::ostringstream oss;
+            oss << "error: tag alias, \"" << alias << "\" is not of the form [@alias name].\n" << lineInfo;
+            throw std::domain_error( oss.str().c_str() );
+        }
+        if( !m_registry.insert( std::make_pair( alias, TagAlias( tag, lineInfo ) ) ).second ) {
+            std::ostringstream oss;
+            oss << "error: tag alias, \"" << alias << "\" already registered.\n"
+                << "\tFirst seen at " << find(alias)->lineInfo << "\n"
+                << "\tRedefined at " << lineInfo;
+            throw std::domain_error( oss.str().c_str() );
+        }
+    }
+
+    TagAliasRegistry& TagAliasRegistry::get() {
+        static TagAliasRegistry instance;
+        return instance;
+
+    }
+
+    ITagAliasRegistry::~ITagAliasRegistry() {}
+    ITagAliasRegistry const& ITagAliasRegistry::get() { return TagAliasRegistry::get(); }
+
+    RegistrarForTagAliases::RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ) {
+        try {
+            TagAliasRegistry::get().add( alias, tag, lineInfo );
+        }
+        catch( std::exception& ex ) {
+            Colour colourGuard( Colour::Red );
+            Catch::cerr() << ex.what() << std::endl;
+            exit(1);
+        }
+    }
+
+} // end namespace Catch
+
+// #included from: ../reporters/catch_reporter_xml.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_XML_HPP_INCLUDED
+
+// #included from: catch_reporter_bases.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_BASES_HPP_INCLUDED
+
+#include <cstring>
+
+namespace Catch {
+
+    struct StreamingReporterBase : SharedImpl<IStreamingReporter> {
+
+        StreamingReporterBase( ReporterConfig const& _config )
+        :   m_config( _config.fullConfig() ),
+            stream( _config.stream() )
+        {}
+
+        virtual ~StreamingReporterBase();
+
+        virtual void noMatchingTestCases( std::string const& ) {}
+
+        virtual void testRunStarting( TestRunInfo const& _testRunInfo ) {
+            currentTestRunInfo = _testRunInfo;
+        }
+        virtual void testGroupStarting( GroupInfo const& _groupInfo ) {
+            currentGroupInfo = _groupInfo;
+        }
+
+        virtual void testCaseStarting( TestCaseInfo const& _testInfo ) {
+            currentTestCaseInfo = _testInfo;
+        }
+        virtual void sectionStarting( SectionInfo const& _sectionInfo ) {
+            m_sectionStack.push_back( _sectionInfo );
+        }
+
+        virtual void sectionEnded( SectionStats const& /* _sectionStats */ ) {
+            m_sectionStack.pop_back();
+        }
+        virtual void testCaseEnded( TestCaseStats const& /* _testCaseStats */ ) {
+            currentTestCaseInfo.reset();
+        }
+        virtual void testGroupEnded( TestGroupStats const& /* _testGroupStats */ ) {
+            currentGroupInfo.reset();
+        }
+        virtual void testRunEnded( TestRunStats const& /* _testRunStats */ ) {
+            currentTestCaseInfo.reset();
+            currentGroupInfo.reset();
+            currentTestRunInfo.reset();
+        }
+
+        virtual void skipTest( TestCaseInfo const& ) {
+            // Don't do anything with this by default.
+            // It can optionally be overridden in the derived class.
+        }
+
+        Ptr<IConfig> m_config;
+        std::ostream& stream;
+
+        LazyStat<TestRunInfo> currentTestRunInfo;
+        LazyStat<GroupInfo> currentGroupInfo;
+        LazyStat<TestCaseInfo> currentTestCaseInfo;
+
+        std::vector<SectionInfo> m_sectionStack;
+    };
+
+    struct CumulativeReporterBase : SharedImpl<IStreamingReporter> {
+        template<typename T, typename ChildNodeT>
+        struct Node : SharedImpl<> {
+            explicit Node( T const& _value ) : value( _value ) {}
+            virtual ~Node() {}
+
+            typedef std::vector<Ptr<ChildNodeT> > ChildNodes;
+            T value;
+            ChildNodes children;
+        };
+        struct SectionNode : SharedImpl<> {
+            explicit SectionNode( SectionStats const& _stats ) : stats( _stats ) {}
+            virtual ~SectionNode();
+
+            bool operator == ( SectionNode const& other ) const {
+                return stats.sectionInfo.lineInfo == other.stats.sectionInfo.lineInfo;
+            }
+            bool operator == ( Ptr<SectionNode> const& other ) const {
+                return operator==( *other );
+            }
+
+            SectionStats stats;
+            typedef std::vector<Ptr<SectionNode> > ChildSections;
+            typedef std::vector<AssertionStats> Assertions;
+            ChildSections childSections;
+            Assertions assertions;
+            std::string stdOut;
+            std::string stdErr;
+        };
+
+        struct BySectionInfo {
+            BySectionInfo( SectionInfo const& other ) : m_other( other ) {}
+			BySectionInfo( BySectionInfo const& other ) : m_other( other.m_other ) {}
+            bool operator() ( Ptr<SectionNode> const& node ) const {
+                return node->stats.sectionInfo.lineInfo == m_other.lineInfo;
+            }
+        private:
+			void operator=( BySectionInfo const& );
+            SectionInfo const& m_other;
+        };
+
+        typedef Node<TestCaseStats, SectionNode> TestCaseNode;
+        typedef Node<TestGroupStats, TestCaseNode> TestGroupNode;
+        typedef Node<TestRunStats, TestGroupNode> TestRunNode;
+
+        CumulativeReporterBase( ReporterConfig const& _config )
+        :   m_config( _config.fullConfig() ),
+            stream( _config.stream() )
+        {}
+        ~CumulativeReporterBase();
+
+        virtual void testRunStarting( TestRunInfo const& ) {}
+        virtual void testGroupStarting( GroupInfo const& ) {}
+
+        virtual void testCaseStarting( TestCaseInfo const& ) {}
+
+        virtual void sectionStarting( SectionInfo const& sectionInfo ) {
+            SectionStats incompleteStats( sectionInfo, Counts(), 0, false );
+            Ptr<SectionNode> node;
+            if( m_sectionStack.empty() ) {
+                if( !m_rootSection )
+                    m_rootSection = new SectionNode( incompleteStats );
+                node = m_rootSection;
+            }
+            else {
+                SectionNode& parentNode = *m_sectionStack.back();
+                SectionNode::ChildSections::const_iterator it =
+                    std::find_if(   parentNode.childSections.begin(),
+                                    parentNode.childSections.end(),
+                                    BySectionInfo( sectionInfo ) );
+                if( it == parentNode.childSections.end() ) {
+                    node = new SectionNode( incompleteStats );
+                    parentNode.childSections.push_back( node );
+                }
+                else
+                    node = *it;
+            }
+            m_sectionStack.push_back( node );
+            m_deepestSection = node;
+        }
+
+        virtual void assertionStarting( AssertionInfo const& ) {}
+
+        virtual bool assertionEnded( AssertionStats const& assertionStats ) {
+            assert( !m_sectionStack.empty() );
+            SectionNode& sectionNode = *m_sectionStack.back();
+            sectionNode.assertions.push_back( assertionStats );
+            return true;
+        }
+        virtual void sectionEnded( SectionStats const& sectionStats ) {
+            assert( !m_sectionStack.empty() );
+            SectionNode& node = *m_sectionStack.back();
+            node.stats = sectionStats;
+            m_sectionStack.pop_back();
+        }
+        virtual void testCaseEnded( TestCaseStats const& testCaseStats ) {
+            Ptr<TestCaseNode> node = new TestCaseNode( testCaseStats );
+            assert( m_sectionStack.size() == 0 );
+            node->children.push_back( m_rootSection );
+            m_testCases.push_back( node );
+            m_rootSection.reset();
+
+            assert( m_deepestSection );
+            m_deepestSection->stdOut = testCaseStats.stdOut;
+            m_deepestSection->stdErr = testCaseStats.stdErr;
+        }
+        virtual void testGroupEnded( TestGroupStats const& testGroupStats ) {
+            Ptr<TestGroupNode> node = new TestGroupNode( testGroupStats );
+            node->children.swap( m_testCases );
+            m_testGroups.push_back( node );
+        }
+        virtual void testRunEnded( TestRunStats const& testRunStats ) {
+            Ptr<TestRunNode> node = new TestRunNode( testRunStats );
+            node->children.swap( m_testGroups );
+            m_testRuns.push_back( node );
+            testRunEndedCumulative();
+        }
+        virtual void testRunEndedCumulative() = 0;
+
+        virtual void skipTest( TestCaseInfo const& ) {}
+
+        Ptr<IConfig> m_config;
+        std::ostream& stream;
+        std::vector<AssertionStats> m_assertions;
+        std::vector<std::vector<Ptr<SectionNode> > > m_sections;
+        std::vector<Ptr<TestCaseNode> > m_testCases;
+        std::vector<Ptr<TestGroupNode> > m_testGroups;
+
+        std::vector<Ptr<TestRunNode> > m_testRuns;
+
+        Ptr<SectionNode> m_rootSection;
+        Ptr<SectionNode> m_deepestSection;
+        std::vector<Ptr<SectionNode> > m_sectionStack;
+
+    };
+
+    template<char C>
+    char const* getLineOfChars() {
+        static char line[CATCH_CONFIG_CONSOLE_WIDTH] = {0};
+        if( !*line ) {
+            memset( line, C, CATCH_CONFIG_CONSOLE_WIDTH-1 );
+            line[CATCH_CONFIG_CONSOLE_WIDTH-1] = 0;
+        }
+        return line;
+    }
+
+} // end namespace Catch
+
+// #included from: ../internal/catch_reporter_registrars.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_REGISTRARS_HPP_INCLUDED
+
+namespace Catch {
+
+    template<typename T>
+    class LegacyReporterRegistrar {
+
+        class ReporterFactory : public IReporterFactory {
+            virtual IStreamingReporter* create( ReporterConfig const& config ) const {
+                return new LegacyReporterAdapter( new T( config ) );
+            }
+
+            virtual std::string getDescription() const {
+                return T::getDescription();
+            }
+        };
+
+    public:
+
+        LegacyReporterRegistrar( std::string const& name ) {
+            getMutableRegistryHub().registerReporter( name, new ReporterFactory() );
+        }
+    };
+
+    template<typename T>
+    class ReporterRegistrar {
+
+        class ReporterFactory : public IReporterFactory {
+
+            // *** Please Note ***:
+            // - If you end up here looking at a compiler error because it's trying to register
+            // your custom reporter class be aware that the native reporter interface has changed
+            // to IStreamingReporter. The "legacy" interface, IReporter, is still supported via
+            // an adapter. Just use REGISTER_LEGACY_REPORTER to take advantage of the adapter.
+            // However please consider updating to the new interface as the old one is now
+            // deprecated and will probably be removed quite soon!
+            // Please contact me via github if you have any questions at all about this.
+            // In fact, ideally, please contact me anyway to let me know you've hit this - as I have
+            // no idea who is actually using custom reporters at all (possibly no-one!).
+            // The new interface is designed to minimise exposure to interface changes in the future.
+            virtual IStreamingReporter* create( ReporterConfig const& config ) const {
+                return new T( config );
+            }
+
+            virtual std::string getDescription() const {
+                return T::getDescription();
+            }
+        };
+
+    public:
+
+        ReporterRegistrar( std::string const& name ) {
+            getMutableRegistryHub().registerReporter( name, new ReporterFactory() );
+        }
+    };
+}
+
+#define INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) \
+    namespace{ Catch::LegacyReporterRegistrar<reporterType> catch_internal_RegistrarFor##reporterType( name ); }
+#define INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) \
+    namespace{ Catch::ReporterRegistrar<reporterType> catch_internal_RegistrarFor##reporterType( name ); }
+
+// #included from: ../internal/catch_xmlwriter.hpp
+#define TWOBLUECUBES_CATCH_XMLWRITER_HPP_INCLUDED
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace Catch {
+
+    class XmlWriter {
+    public:
+
+        class ScopedElement {
+        public:
+            ScopedElement( XmlWriter* writer )
+            :   m_writer( writer )
+            {}
+
+            ScopedElement( ScopedElement const& other )
+            :   m_writer( other.m_writer ){
+                other.m_writer = NULL;
+            }
+
+            ~ScopedElement() {
+                if( m_writer )
+                    m_writer->endElement();
+            }
+
+            ScopedElement& writeText( std::string const& text, bool indent = true ) {
+                m_writer->writeText( text, indent );
+                return *this;
+            }
+
+            template<typename T>
+            ScopedElement& writeAttribute( std::string const& name, T const& attribute ) {
+                m_writer->writeAttribute( name, attribute );
+                return *this;
+            }
+
+        private:
+            mutable XmlWriter* m_writer;
+        };
+
+        XmlWriter()
+        :   m_tagIsOpen( false ),
+            m_needsNewline( false ),
+            m_os( &Catch::cout() )
+        {}
+
+        XmlWriter( std::ostream& os )
+        :   m_tagIsOpen( false ),
+            m_needsNewline( false ),
+            m_os( &os )
+        {}
+
+        ~XmlWriter() {
+            while( !m_tags.empty() )
+                endElement();
+        }
+
+        XmlWriter& startElement( std::string const& name ) {
+            ensureTagClosed();
+            newlineIfNecessary();
+            stream() << m_indent << "<" << name;
+            m_tags.push_back( name );
+            m_indent += "  ";
+            m_tagIsOpen = true;
+            return *this;
+        }
+
+        ScopedElement scopedElement( std::string const& name ) {
+            ScopedElement scoped( this );
+            startElement( name );
+            return scoped;
+        }
+
+        XmlWriter& endElement() {
+            newlineIfNecessary();
+            m_indent = m_indent.substr( 0, m_indent.size()-2 );
+            if( m_tagIsOpen ) {
+                stream() << "/>\n";
+                m_tagIsOpen = false;
+            }
+            else {
+                stream() << m_indent << "</" << m_tags.back() << ">\n";
+            }
+            m_tags.pop_back();
+            return *this;
+        }
+
+        XmlWriter& writeAttribute( std::string const& name, std::string const& attribute ) {
+            if( !name.empty() && !attribute.empty() ) {
+                stream() << " " << name << "=\"";
+                writeEncodedText( attribute );
+                stream() << "\"";
+            }
+            return *this;
+        }
+
+        XmlWriter& writeAttribute( std::string const& name, bool attribute ) {
+            stream() << " " << name << "=\"" << ( attribute ? "true" : "false" ) << "\"";
+            return *this;
+        }
+
+        template<typename T>
+        XmlWriter& writeAttribute( std::string const& name, T const& attribute ) {
+            if( !name.empty() )
+                stream() << " " << name << "=\"" << attribute << "\"";
+            return *this;
+        }
+
+        XmlWriter& writeText( std::string const& text, bool indent = true ) {
+            if( !text.empty() ){
+                bool tagWasOpen = m_tagIsOpen;
+                ensureTagClosed();
+                if( tagWasOpen && indent )
+                    stream() << m_indent;
+                writeEncodedText( text );
+                m_needsNewline = true;
+            }
+            return *this;
+        }
+
+        XmlWriter& writeComment( std::string const& text ) {
+            ensureTagClosed();
+            stream() << m_indent << "<!--" << text << "-->";
+            m_needsNewline = true;
+            return *this;
+        }
+
+        XmlWriter& writeBlankLine() {
+            ensureTagClosed();
+            stream() << "\n";
+            return *this;
+        }
+
+        void setStream( std::ostream& os ) {
+            m_os = &os;
+        }
+
+    private:
+        XmlWriter( XmlWriter const& );
+        void operator=( XmlWriter const& );
+
+        std::ostream& stream() {
+            return *m_os;
+        }
+
+        void ensureTagClosed() {
+            if( m_tagIsOpen ) {
+                stream() << ">\n";
+                m_tagIsOpen = false;
+            }
+        }
+
+        void newlineIfNecessary() {
+            if( m_needsNewline ) {
+                stream() << "\n";
+                m_needsNewline = false;
+            }
+        }
+
+        void writeEncodedText( std::string const& text ) {
+            static const char* charsToEncode = "<&\"";
+            std::string mtext = text;
+            std::string::size_type pos = mtext.find_first_of( charsToEncode );
+            while( pos != std::string::npos ) {
+                stream() << mtext.substr( 0, pos );
+
+                switch( mtext[pos] ) {
+                    case '<':
+                        stream() << "<";
+                        break;
+                    case '&':
+                        stream() << "&";
+                        break;
+                    case '\"':
+                        stream() << """;
+                        break;
+                }
+                mtext = mtext.substr( pos+1 );
+                pos = mtext.find_first_of( charsToEncode );
+            }
+            stream() << mtext;
+        }
+
+        bool m_tagIsOpen;
+        bool m_needsNewline;
+        std::vector<std::string> m_tags;
+        std::string m_indent;
+        std::ostream* m_os;
+    };
+
+}
+namespace Catch {
+    class XmlReporter : public StreamingReporterBase {
+    public:
+        XmlReporter( ReporterConfig const& _config )
+        :   StreamingReporterBase( _config ),
+            m_sectionDepth( 0 )
+        {}
+
+        virtual ~XmlReporter();
+
+        static std::string getDescription() {
+            return "Reports test results as an XML document";
+        }
+
+    public: // StreamingReporterBase
+        virtual ReporterPreferences getPreferences() const {
+            ReporterPreferences prefs;
+            prefs.shouldRedirectStdOut = true;
+            return prefs;
+        }
+
+        virtual void noMatchingTestCases( std::string const& s ) {
+            StreamingReporterBase::noMatchingTestCases( s );
+        }
+
+        virtual void testRunStarting( TestRunInfo const& testInfo ) {
+            StreamingReporterBase::testRunStarting( testInfo );
+            m_xml.setStream( stream );
+            m_xml.startElement( "Catch" );
+            if( !m_config->name().empty() )
+                m_xml.writeAttribute( "name", m_config->name() );
+        }
+
+        virtual void testGroupStarting( GroupInfo const& groupInfo ) {
+            StreamingReporterBase::testGroupStarting( groupInfo );
+            m_xml.startElement( "Group" )
+                .writeAttribute( "name", groupInfo.name );
+        }
+
+        virtual void testCaseStarting( TestCaseInfo const& testInfo ) {
+            StreamingReporterBase::testCaseStarting(testInfo);
+            m_xml.startElement( "TestCase" ).writeAttribute( "name", trim( testInfo.name ) );
+
+            if ( m_config->showDurations() == ShowDurations::Always )
+                m_testCaseTimer.start();
+        }
+
+        virtual void sectionStarting( SectionInfo const& sectionInfo ) {
+            StreamingReporterBase::sectionStarting( sectionInfo );
+            if( m_sectionDepth++ > 0 ) {
+                m_xml.startElement( "Section" )
+                    .writeAttribute( "name", trim( sectionInfo.name ) )
+                    .writeAttribute( "description", sectionInfo.description );
+            }
+        }
+
+        virtual void assertionStarting( AssertionInfo const& ) { }
+
+        virtual bool assertionEnded( AssertionStats const& assertionStats ) {
+            const AssertionResult& assertionResult = assertionStats.assertionResult;
+
+            // Print any info messages in <Info> tags.
+            if( assertionStats.assertionResult.getResultType() != ResultWas::Ok ) {
+                for( std::vector<MessageInfo>::const_iterator it = assertionStats.infoMessages.begin(), itEnd = assertionStats.infoMessages.end();
+                        it != itEnd;
+                        ++it ) {
+                    if( it->type == ResultWas::Info ) {
+                        m_xml.scopedElement( "Info" )
+                            .writeText( it->message );
+                    } else if ( it->type == ResultWas::Warning ) {
+                        m_xml.scopedElement( "Warning" )
+                            .writeText( it->message );
+                    }
+                }
+            }
+
+            // Drop out if result was successful but we're not printing them.
+            if( !m_config->includeSuccessfulResults() && isOk(assertionResult.getResultType()) )
+                return true;
+
+            // Print the expression if there is one.
+            if( assertionResult.hasExpression() ) {
+                m_xml.startElement( "Expression" )
+                    .writeAttribute( "success", assertionResult.succeeded() )
+					.writeAttribute( "type", assertionResult.getTestMacroName() )
+                    .writeAttribute( "filename", assertionResult.getSourceInfo().file )
+                    .writeAttribute( "line", assertionResult.getSourceInfo().line );
+
+                m_xml.scopedElement( "Original" )
+                    .writeText( assertionResult.getExpression() );
+                m_xml.scopedElement( "Expanded" )
+                    .writeText( assertionResult.getExpandedExpression() );
+            }
+
+            // And... Print a result applicable to each result type.
+            switch( assertionResult.getResultType() ) {
+                case ResultWas::ThrewException:
+                    m_xml.scopedElement( "Exception" )
+                        .writeAttribute( "filename", assertionResult.getSourceInfo().file )
+                        .writeAttribute( "line", assertionResult.getSourceInfo().line )
+                        .writeText( assertionResult.getMessage() );
+                    break;
+                case ResultWas::FatalErrorCondition:
+                    m_xml.scopedElement( "Fatal Error Condition" )
+                        .writeAttribute( "filename", assertionResult.getSourceInfo().file )
+                        .writeAttribute( "line", assertionResult.getSourceInfo().line )
+                        .writeText( assertionResult.getMessage() );
+                    break;
+                case ResultWas::Info:
+                    m_xml.scopedElement( "Info" )
+                        .writeText( assertionResult.getMessage() );
+                    break;
+                case ResultWas::Warning:
+                    // Warning will already have been written
+                    break;
+                case ResultWas::ExplicitFailure:
+                    m_xml.scopedElement( "Failure" )
+                        .writeText( assertionResult.getMessage() );
+                    break;
+                default:
+                    break;
+            }
+
+            if( assertionResult.hasExpression() )
+                m_xml.endElement();
+
+            return true;
+        }
+
+        virtual void sectionEnded( SectionStats const& sectionStats ) {
+            StreamingReporterBase::sectionEnded( sectionStats );
+            if( --m_sectionDepth > 0 ) {
+                XmlWriter::ScopedElement e = m_xml.scopedElement( "OverallResults" );
+                e.writeAttribute( "successes", sectionStats.assertions.passed );
+                e.writeAttribute( "failures", sectionStats.assertions.failed );
+                e.writeAttribute( "expectedFailures", sectionStats.assertions.failedButOk );
+
+                if ( m_config->showDurations() == ShowDurations::Always )
+                    e.writeAttribute( "durationInSeconds", sectionStats.durationInSeconds );
+
+                m_xml.endElement();
+            }
+        }
+
+        virtual void testCaseEnded( TestCaseStats const& testCaseStats ) {
+            StreamingReporterBase::testCaseEnded( testCaseStats );
+            XmlWriter::ScopedElement e = m_xml.scopedElement( "OverallResult" );
+            e.writeAttribute( "success", testCaseStats.totals.assertions.allOk() );
+
+            if ( m_config->showDurations() == ShowDurations::Always )
+                e.writeAttribute( "durationInSeconds", m_testCaseTimer.getElapsedSeconds() );
+
+            m_xml.endElement();
+        }
+
+        virtual void testGroupEnded( TestGroupStats const& testGroupStats ) {
+            StreamingReporterBase::testGroupEnded( testGroupStats );
+            // TODO: Check testGroupStats.aborting and act accordingly.
+            m_xml.scopedElement( "OverallResults" )
+                .writeAttribute( "successes", testGroupStats.totals.assertions.passed )
+                .writeAttribute( "failures", testGroupStats.totals.assertions.failed )
+                .writeAttribute( "expectedFailures", testGroupStats.totals.assertions.failedButOk );
+            m_xml.endElement();
+        }
+
+        virtual void testRunEnded( TestRunStats const& testRunStats ) {
+            StreamingReporterBase::testRunEnded( testRunStats );
+            m_xml.scopedElement( "OverallResults" )
+                .writeAttribute( "successes", testRunStats.totals.assertions.passed )
+                .writeAttribute( "failures", testRunStats.totals.assertions.failed )
+                .writeAttribute( "expectedFailures", testRunStats.totals.assertions.failedButOk );
+            m_xml.endElement();
+        }
+
+    private:
+        Timer m_testCaseTimer;
+        XmlWriter m_xml;
+        int m_sectionDepth;
+    };
+
+     INTERNAL_CATCH_REGISTER_REPORTER( "xml", XmlReporter )
+
+} // end namespace Catch
+
+// #included from: ../reporters/catch_reporter_junit.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_JUNIT_HPP_INCLUDED
+
+#include <assert.h>
+
+namespace Catch {
+
+    class JunitReporter : public CumulativeReporterBase {
+    public:
+        JunitReporter( ReporterConfig const& _config )
+        :   CumulativeReporterBase( _config ),
+            xml( _config.stream() )
+        {}
+
+        ~JunitReporter();
+
+        static std::string getDescription() {
+            return "Reports test results in an XML format that looks like Ant's junitreport target";
+        }
+
+        virtual void noMatchingTestCases( std::string const& /*spec*/ ) {}
+
+        virtual ReporterPreferences getPreferences() const {
+            ReporterPreferences prefs;
+            prefs.shouldRedirectStdOut = true;
+            return prefs;
+        }
+
+        virtual void testRunStarting( TestRunInfo const& runInfo ) {
+            CumulativeReporterBase::testRunStarting( runInfo );
+            xml.startElement( "testsuites" );
+        }
+
+        virtual void testGroupStarting( GroupInfo const& groupInfo ) {
+            suiteTimer.start();
+            stdOutForSuite.str("");
+            stdErrForSuite.str("");
+            unexpectedExceptions = 0;
+            CumulativeReporterBase::testGroupStarting( groupInfo );
+        }
+
+        virtual bool assertionEnded( AssertionStats const& assertionStats ) {
+            if( assertionStats.assertionResult.getResultType() == ResultWas::ThrewException )
+                unexpectedExceptions++;
+            return CumulativeReporterBase::assertionEnded( assertionStats );
+        }
+
+        virtual void testCaseEnded( TestCaseStats const& testCaseStats ) {
+            stdOutForSuite << testCaseStats.stdOut;
+            stdErrForSuite << testCaseStats.stdErr;
+            CumulativeReporterBase::testCaseEnded( testCaseStats );
+        }
+
+        virtual void testGroupEnded( TestGroupStats const& testGroupStats ) {
+            double suiteTime = suiteTimer.getElapsedSeconds();
+            CumulativeReporterBase::testGroupEnded( testGroupStats );
+            writeGroup( *m_testGroups.back(), suiteTime );
+        }
+
+        virtual void testRunEndedCumulative() {
+            xml.endElement();
+        }
+
+        void writeGroup( TestGroupNode const& groupNode, double suiteTime ) {
+            XmlWriter::ScopedElement e = xml.scopedElement( "testsuite" );
+            TestGroupStats const& stats = groupNode.value;
+            xml.writeAttribute( "name", stats.groupInfo.name );
+            xml.writeAttribute( "errors", unexpectedExceptions );
+            xml.writeAttribute( "failures", stats.totals.assertions.failed-unexpectedExceptions );
+            xml.writeAttribute( "tests", stats.totals.assertions.total() );
+            xml.writeAttribute( "hostname", "tbd" ); // !TBD
+            if( m_config->showDurations() == ShowDurations::Never )
+                xml.writeAttribute( "time", "" );
+            else
+                xml.writeAttribute( "time", suiteTime );
+            xml.writeAttribute( "timestamp", "tbd" ); // !TBD
+
+            // Write test cases
+            for( TestGroupNode::ChildNodes::const_iterator
+                    it = groupNode.children.begin(), itEnd = groupNode.children.end();
+                    it != itEnd;
+                    ++it )
+                writeTestCase( **it );
+
+            xml.scopedElement( "system-out" ).writeText( trim( stdOutForSuite.str() ), false );
+            xml.scopedElement( "system-err" ).writeText( trim( stdErrForSuite.str() ), false );
+        }
+
+        void writeTestCase( TestCaseNode const& testCaseNode ) {
+            TestCaseStats const& stats = testCaseNode.value;
+
+            // All test cases have exactly one section - which represents the
+            // test case itself. That section may have 0-n nested sections
+            assert( testCaseNode.children.size() == 1 );
+            SectionNode const& rootSection = *testCaseNode.children.front();
+
+            std::string className = stats.testInfo.className;
+
+            if( className.empty() ) {
+                if( rootSection.childSections.empty() )
+                    className = "global";
+            }
+            writeSection( className, "", rootSection );
+        }
+
+        void writeSection(  std::string const& className,
+                            std::string const& rootName,
+                            SectionNode const& sectionNode ) {
+            std::string name = trim( sectionNode.stats.sectionInfo.name );
+            if( !rootName.empty() )
+                name = rootName + "/" + name;
+
+            if( !sectionNode.assertions.empty() ||
+                !sectionNode.stdOut.empty() ||
+                !sectionNode.stdErr.empty() ) {
+                XmlWriter::ScopedElement e = xml.scopedElement( "testcase" );
+                if( className.empty() ) {
+                    xml.writeAttribute( "classname", name );
+                    xml.writeAttribute( "name", "root" );
+                }
+                else {
+                    xml.writeAttribute( "classname", className );
+                    xml.writeAttribute( "name", name );
+                }
+                xml.writeAttribute( "time", Catch::toString( sectionNode.stats.durationInSeconds ) );
+
+                writeAssertions( sectionNode );
+
+                if( !sectionNode.stdOut.empty() )
+                    xml.scopedElement( "system-out" ).writeText( trim( sectionNode.stdOut ), false );
+                if( !sectionNode.stdErr.empty() )
+                    xml.scopedElement( "system-err" ).writeText( trim( sectionNode.stdErr ), false );
+            }
+            for( SectionNode::ChildSections::const_iterator
+                    it = sectionNode.childSections.begin(),
+                    itEnd = sectionNode.childSections.end();
+                    it != itEnd;
+                    ++it )
+                if( className.empty() )
+                    writeSection( name, "", **it );
+                else
+                    writeSection( className, name, **it );
+        }
+
+        void writeAssertions( SectionNode const& sectionNode ) {
+            for( SectionNode::Assertions::const_iterator
+                    it = sectionNode.assertions.begin(), itEnd = sectionNode.assertions.end();
+                    it != itEnd;
+                    ++it )
+                writeAssertion( *it );
+        }
+        void writeAssertion( AssertionStats const& stats ) {
+            AssertionResult const& result = stats.assertionResult;
+            if( !result.isOk() ) {
+                std::string elementName;
+                switch( result.getResultType() ) {
+                    case ResultWas::ThrewException:
+                    case ResultWas::FatalErrorCondition:
+                        elementName = "error";
+                        break;
+                    case ResultWas::ExplicitFailure:
+                        elementName = "failure";
+                        break;
+                    case ResultWas::ExpressionFailed:
+                        elementName = "failure";
+                        break;
+                    case ResultWas::DidntThrowException:
+                        elementName = "failure";
+                        break;
+
+                    // We should never see these here:
+                    case ResultWas::Info:
+                    case ResultWas::Warning:
+                    case ResultWas::Ok:
+                    case ResultWas::Unknown:
+                    case ResultWas::FailureBit:
+                    case ResultWas::Exception:
+                        elementName = "internalError";
+                        break;
+                }
+
+                XmlWriter::ScopedElement e = xml.scopedElement( elementName );
+
+                xml.writeAttribute( "message", result.getExpandedExpression() );
+                xml.writeAttribute( "type", result.getTestMacroName() );
+
+                std::ostringstream oss;
+                if( !result.getMessage().empty() )
+                    oss << result.getMessage() << "\n";
+                for( std::vector<MessageInfo>::const_iterator
+                        it = stats.infoMessages.begin(),
+                        itEnd = stats.infoMessages.end();
+                            it != itEnd;
+                            ++it )
+                    if( it->type == ResultWas::Info )
+                        oss << it->message << "\n";
+
+                oss << "at " << result.getSourceInfo();
+                xml.writeText( oss.str(), false );
+            }
+        }
+
+        XmlWriter xml;
+        Timer suiteTimer;
+        std::ostringstream stdOutForSuite;
+        std::ostringstream stdErrForSuite;
+        unsigned int unexpectedExceptions;
+    };
+
+    INTERNAL_CATCH_REGISTER_REPORTER( "junit", JunitReporter )
+
+} // end namespace Catch
+
+// #included from: ../reporters/catch_reporter_console.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_CONSOLE_HPP_INCLUDED
+
+namespace Catch {
+
+    struct ConsoleReporter : StreamingReporterBase {
+        ConsoleReporter( ReporterConfig const& _config )
+        :   StreamingReporterBase( _config ),
+            m_headerPrinted( false )
+        {}
+
+        virtual ~ConsoleReporter();
+        static std::string getDescription() {
+            return "Reports test results as plain lines of text";
+        }
+        virtual ReporterPreferences getPreferences() const {
+            ReporterPreferences prefs;
+            prefs.shouldRedirectStdOut = false;
+            return prefs;
+        }
+
+        virtual void noMatchingTestCases( std::string const& spec ) {
+            stream << "No test cases matched '" << spec << "'" << std::endl;
+        }
+
+        virtual void assertionStarting( AssertionInfo const& ) {
+        }
+
+        virtual bool assertionEnded( AssertionStats const& _assertionStats ) {
+            AssertionResult const& result = _assertionStats.assertionResult;
+
+            bool printInfoMessages = true;
+
+            // Drop out if result was successful and we're not printing those
+            if( !m_config->includeSuccessfulResults() && result.isOk() ) {
+                if( result.getResultType() != ResultWas::Warning )
+                    return false;
+                printInfoMessages = false;
+            }
+
+            lazyPrint();
+
+            AssertionPrinter printer( stream, _assertionStats, printInfoMessages );
+            printer.print();
+            stream << std::endl;
+            return true;
+        }
+
+        virtual void sectionStarting( SectionInfo const& _sectionInfo ) {
+            m_headerPrinted = false;
+            StreamingReporterBase::sectionStarting( _sectionInfo );
+        }
+        virtual void sectionEnded( SectionStats const& _sectionStats ) {
+            if( _sectionStats.missingAssertions ) {
+                lazyPrint();
+                Colour colour( Colour::ResultError );
+                if( m_sectionStack.size() > 1 )
+                    stream << "\nNo assertions in section";
+                else
+                    stream << "\nNo assertions in test case";
+                stream << " '" << _sectionStats.sectionInfo.name << "'\n" << std::endl;
+            }
+            if( m_headerPrinted ) {
+                if( m_config->showDurations() == ShowDurations::Always )
+                    stream << "Completed in " << _sectionStats.durationInSeconds << "s" << std::endl;
+                m_headerPrinted = false;
+            }
+            else {
+                if( m_config->showDurations() == ShowDurations::Always )
+                    stream << _sectionStats.sectionInfo.name << " completed in " << _sectionStats.durationInSeconds << "s" << std::endl;
+            }
+            StreamingReporterBase::sectionEnded( _sectionStats );
+        }
+
+        virtual void testCaseEnded( TestCaseStats const& _testCaseStats ) {
+            StreamingReporterBase::testCaseEnded( _testCaseStats );
+            m_headerPrinted = false;
+        }
+        virtual void testGroupEnded( TestGroupStats const& _testGroupStats ) {
+            if( currentGroupInfo.used ) {
+                printSummaryDivider();
+                stream << "Summary for group '" << _testGroupStats.groupInfo.name << "':\n";
+                printTotals( _testGroupStats.totals );
+                stream << "\n" << std::endl;
+            }
+            StreamingReporterBase::testGroupEnded( _testGroupStats );
+        }
+        virtual void testRunEnded( TestRunStats const& _testRunStats ) {
+            printTotalsDivider( _testRunStats.totals );
+            printTotals( _testRunStats.totals );
+            stream << std::endl;
+            StreamingReporterBase::testRunEnded( _testRunStats );
+        }
+
+    private:
+
+        class AssertionPrinter {
+            void operator= ( AssertionPrinter const& );
+        public:
+            AssertionPrinter( std::ostream& _stream, AssertionStats const& _stats, bool _printInfoMessages )
+            :   stream( _stream ),
+                stats( _stats ),
+                result( _stats.assertionResult ),
+                colour( Colour::None ),
+                message( result.getMessage() ),
+                messages( _stats.infoMessages ),
+                printInfoMessages( _printInfoMessages )
+            {
+                switch( result.getResultType() ) {
+                    case ResultWas::Ok:
+                        colour = Colour::Success;
+                        passOrFail = "PASSED";
+                        //if( result.hasMessage() )
+                        if( _stats.infoMessages.size() == 1 )
+                            messageLabel = "with message";
+                        if( _stats.infoMessages.size() > 1 )
+                            messageLabel = "with messages";
+                        break;
+                    case ResultWas::ExpressionFailed:
+                        if( result.isOk() ) {
+                            colour = Colour::Success;
+                            passOrFail = "FAILED - but was ok";
+                        }
+                        else {
+                            colour = Colour::Error;
+                            passOrFail = "FAILED";
+                        }
+                        if( _stats.infoMessages.size() == 1 )
+                            messageLabel = "with message";
+                        if( _stats.infoMessages.size() > 1 )
+                            messageLabel = "with messages";
+                        break;
+                    case ResultWas::ThrewException:
+                        colour = Colour::Error;
+                        passOrFail = "FAILED";
+                        messageLabel = "due to unexpected exception with message";
+                        break;
+                    case ResultWas::FatalErrorCondition:
+                        colour = Colour::Error;
+                        passOrFail = "FAILED";
+                        messageLabel = "due to a fatal error condition";
+                        break;
+                    case ResultWas::DidntThrowException:
+                        colour = Colour::Error;
+                        passOrFail = "FAILED";
+                        messageLabel = "because no exception was thrown where one was expected";
+                        break;
+                    case ResultWas::Info:
+                        messageLabel = "info";
+                        break;
+                    case ResultWas::Warning:
+                        messageLabel = "warning";
+                        break;
+                    case ResultWas::ExplicitFailure:
+                        passOrFail = "FAILED";
+                        colour = Colour::Error;
+                        if( _stats.infoMessages.size() == 1 )
+                            messageLabel = "explicitly with message";
+                        if( _stats.infoMessages.size() > 1 )
+                            messageLabel = "explicitly with messages";
+                        break;
+                    // These cases are here to prevent compiler warnings
+                    case ResultWas::Unknown:
+                    case ResultWas::FailureBit:
+                    case ResultWas::Exception:
+                        passOrFail = "** internal error **";
+                        colour = Colour::Error;
+                        break;
+                }
+            }
+
+            void print() const {
+                printSourceInfo();
+                if( stats.totals.assertions.total() > 0 ) {
+                    if( result.isOk() )
+                        stream << "\n";
+                    printResultType();
+                    printOriginalExpression();
+                    printReconstructedExpression();
+                }
+                else {
+                    stream << "\n";
+                }
+                printMessage();
+            }
+
+        private:
+            void printResultType() const {
+                if( !passOrFail.empty() ) {
+                    Colour colourGuard( colour );
+                    stream << passOrFail << ":\n";
+                }
+            }
+            void printOriginalExpression() const {
+                if( result.hasExpression() ) {
+                    Colour colourGuard( Colour::OriginalExpression );
+                    stream  << "  ";
+                    stream << result.getExpressionInMacro();
+                    stream << "\n";
+                }
+            }
+            void printReconstructedExpression() const {
+                if( result.hasExpandedExpression() ) {
+                    stream << "with expansion:\n";
+                    Colour colourGuard( Colour::ReconstructedExpression );
+                    stream << Text( result.getExpandedExpression(), TextAttributes().setIndent(2) ) << "\n";
+                }
+            }
+            void printMessage() const {
+                if( !messageLabel.empty() )
+                    stream << messageLabel << ":" << "\n";
+                for( std::vector<MessageInfo>::const_iterator it = messages.begin(), itEnd = messages.end();
+                        it != itEnd;
+                        ++it ) {
+                    // If this assertion is a warning ignore any INFO messages
+                    if( printInfoMessages || it->type != ResultWas::Info )
+                        stream << Text( it->message, TextAttributes().setIndent(2) ) << "\n";
+                }
+            }
+            void printSourceInfo() const {
+                Colour colourGuard( Colour::FileName );
+                stream << result.getSourceInfo() << ": ";
+            }
+
+            std::ostream& stream;
+            AssertionStats const& stats;
+            AssertionResult const& result;
+            Colour::Code colour;
+            std::string passOrFail;
+            std::string messageLabel;
+            std::string message;
+            std::vector<MessageInfo> messages;
+            bool printInfoMessages;
+        };
+
+        void lazyPrint() {
+
+            if( !currentTestRunInfo.used )
+                lazyPrintRunInfo();
+            if( !currentGroupInfo.used )
+                lazyPrintGroupInfo();
+
+            if( !m_headerPrinted ) {
+                printTestCaseAndSectionHeader();
+                m_headerPrinted = true;
+            }
+        }
+        void lazyPrintRunInfo() {
+            stream  << "\n" << getLineOfChars<'~'>() << "\n";
+            Colour colour( Colour::SecondaryText );
+            stream  << currentTestRunInfo->name
+                    << " is a Catch v"  << libraryVersion << " host application.\n"
+                    << "Run with -? for options\n\n";
+
+            if( m_config->rngSeed() != 0 )
+                stream << "Randomness seeded to: " << m_config->rngSeed() << "\n\n";
+
+            currentTestRunInfo.used = true;
+        }
+        void lazyPrintGroupInfo() {
+            if( !currentGroupInfo->name.empty() && currentGroupInfo->groupsCounts > 1 ) {
+                printClosedHeader( "Group: " + currentGroupInfo->name );
+                currentGroupInfo.used = true;
+            }
+        }
+        void printTestCaseAndSectionHeader() {
+            assert( !m_sectionStack.empty() );
+            printOpenHeader( currentTestCaseInfo->name );
+
+            if( m_sectionStack.size() > 1 ) {
+                Colour colourGuard( Colour::Headers );
+
+                std::vector<SectionInfo>::const_iterator
+                    it = m_sectionStack.begin()+1, // Skip first section (test case)
+                    itEnd = m_sectionStack.end();
+                for( ; it != itEnd; ++it )
+                    printHeaderString( it->name, 2 );
+            }
+
+            SourceLineInfo lineInfo = m_sectionStack.front().lineInfo;
+
+            if( !lineInfo.empty() ){
+                stream << getLineOfChars<'-'>() << "\n";
+                Colour colourGuard( Colour::FileName );
+                stream << lineInfo << "\n";
+            }
+            stream << getLineOfChars<'.'>() << "\n" << std::endl;
+        }
+
+        void printClosedHeader( std::string const& _name ) {
+            printOpenHeader( _name );
+            stream << getLineOfChars<'.'>() << "\n";
+        }
+        void printOpenHeader( std::string const& _name ) {
+            stream  << getLineOfChars<'-'>() << "\n";
+            {
+                Colour colourGuard( Colour::Headers );
+                printHeaderString( _name );
+            }
+        }
+
+        // if string has a : in first line will set indent to follow it on
+        // subsequent lines
+        void printHeaderString( std::string const& _string, std::size_t indent = 0 ) {
+            std::size_t i = _string.find( ": " );
+            if( i != std::string::npos )
+                i+=2;
+            else
+                i = 0;
+            stream << Text( _string, TextAttributes()
+                                        .setIndent( indent+i)
+                                        .setInitialIndent( indent ) ) << "\n";
+        }
+
+        struct SummaryColumn {
+
+            SummaryColumn( std::string const& _label, Colour::Code _colour )
+            :   label( _label ),
+                colour( _colour )
+            {}
+            SummaryColumn addRow( std::size_t count ) {
+                std::ostringstream oss;
+                oss << count;
+                std::string row = oss.str();
+                for( std::vector<std::string>::iterator it = rows.begin(); it != rows.end(); ++it ) {
+                    while( it->size() < row.size() )
+                        *it = " " + *it;
+                    while( it->size() > row.size() )
+                        row = " " + row;
+                }
+                rows.push_back( row );
+                return *this;
+            }
+
+            std::string label;
+            Colour::Code colour;
+            std::vector<std::string> rows;
+
+        };
+
+        void printTotals( Totals const& totals ) {
+            if( totals.testCases.total() == 0 ) {
+                stream << Colour( Colour::Warning ) << "No tests ran\n";
+            }
+            else if( totals.assertions.total() > 0 && totals.assertions.allPassed() ) {
+                stream << Colour( Colour::ResultSuccess ) << "All tests passed";
+                stream << " ("
+                        << pluralise( totals.assertions.passed, "assertion" ) << " in "
+                        << pluralise( totals.testCases.passed, "test case" ) << ")"
+                        << "\n";
+            }
+            else {
+
+                std::vector<SummaryColumn> columns;
+                columns.push_back( SummaryColumn( "", Colour::None )
+                                        .addRow( totals.testCases.total() )
+                                        .addRow( totals.assertions.total() ) );
+                columns.push_back( SummaryColumn( "passed", Colour::Success )
+                                        .addRow( totals.testCases.passed )
+                                        .addRow( totals.assertions.passed ) );
+                columns.push_back( SummaryColumn( "failed", Colour::ResultError )
+                                        .addRow( totals.testCases.failed )
+                                        .addRow( totals.assertions.failed ) );
+                columns.push_back( SummaryColumn( "failed as expected", Colour::ResultExpectedFailure )
+                                        .addRow( totals.testCases.failedButOk )
+                                        .addRow( totals.assertions.failedButOk ) );
+
+                printSummaryRow( "test cases", columns, 0 );
+                printSummaryRow( "assertions", columns, 1 );
+            }
+        }
+        void printSummaryRow( std::string const& label, std::vector<SummaryColumn> const& cols, std::size_t row ) {
+            for( std::vector<SummaryColumn>::const_iterator it = cols.begin(); it != cols.end(); ++it ) {
+                std::string value = it->rows[row];
+                if( it->label.empty() ) {
+                    stream << label << ": ";
+                    if( value != "0" )
+                        stream << value;
+                    else
+                        stream << Colour( Colour::Warning ) << "- none -";
+                }
+                else if( value != "0" ) {
+                    stream  << Colour( Colour::LightGrey ) << " | ";
+                    stream  << Colour( it->colour )
+                            << value << " " << it->label;
+                }
+            }
+            stream << "\n";
+        }
+
+        static std::size_t makeRatio( std::size_t number, std::size_t total ) {
+            std::size_t ratio = total > 0 ? CATCH_CONFIG_CONSOLE_WIDTH * number/ total : 0;
+            return ( ratio == 0 && number > 0 ) ? 1 : ratio;
+        }
+        static std::size_t& findMax( std::size_t& i, std::size_t& j, std::size_t& k ) {
+            if( i > j && i > k )
+                return i;
+            else if( j > k )
+                return j;
+            else
+                return k;
+        }
+
+        void printTotalsDivider( Totals const& totals ) {
+            if( totals.testCases.total() > 0 ) {
+                std::size_t failedRatio = makeRatio( totals.testCases.failed, totals.testCases.total() );
+                std::size_t failedButOkRatio = makeRatio( totals.testCases.failedButOk, totals.testCases.total() );
+                std::size_t passedRatio = makeRatio( totals.testCases.passed, totals.testCases.total() );
+                while( failedRatio + failedButOkRatio + passedRatio < CATCH_CONFIG_CONSOLE_WIDTH-1 )
+                    findMax( failedRatio, failedButOkRatio, passedRatio )++;
+                while( failedRatio + failedButOkRatio + passedRatio > CATCH_CONFIG_CONSOLE_WIDTH-1 )
+                    findMax( failedRatio, failedButOkRatio, passedRatio )--;
+
+                stream << Colour( Colour::Error ) << std::string( failedRatio, '=' );
+                stream << Colour( Colour::ResultExpectedFailure ) << std::string( failedButOkRatio, '=' );
+                if( totals.testCases.allPassed() )
+                    stream << Colour( Colour::ResultSuccess ) << std::string( passedRatio, '=' );
+                else
+                    stream << Colour( Colour::Success ) << std::string( passedRatio, '=' );
+            }
+            else {
+                stream << Colour( Colour::Warning ) << std::string( CATCH_CONFIG_CONSOLE_WIDTH-1, '=' );
+            }
+            stream << "\n";
+        }
+        void printSummaryDivider() {
+            stream << getLineOfChars<'-'>() << "\n";
+        }
+
+    private:
+        bool m_headerPrinted;
+    };
+
+    INTERNAL_CATCH_REGISTER_REPORTER( "console", ConsoleReporter )
+
+} // end namespace Catch
+
+// #included from: ../reporters/catch_reporter_compact.hpp
+#define TWOBLUECUBES_CATCH_REPORTER_COMPACT_HPP_INCLUDED
+
+namespace Catch {
+
+    struct CompactReporter : StreamingReporterBase {
+
+        CompactReporter( ReporterConfig const& _config )
+        : StreamingReporterBase( _config )
+        {}
+
+        virtual ~CompactReporter();
+
+        static std::string getDescription() {
+            return "Reports test results on a single line, suitable for IDEs";
+        }
+
+        virtual ReporterPreferences getPreferences() const {
+            ReporterPreferences prefs;
+            prefs.shouldRedirectStdOut = false;
+            return prefs;
+        }
+
+        virtual void noMatchingTestCases( std::string const& spec ) {
+            stream << "No test cases matched '" << spec << "'" << std::endl;
+        }
+
+        virtual void assertionStarting( AssertionInfo const& ) {
+        }
+
+        virtual bool assertionEnded( AssertionStats const& _assertionStats ) {
+            AssertionResult const& result = _assertionStats.assertionResult;
+
+            bool printInfoMessages = true;
+
+            // Drop out if result was successful and we're not printing those
+            if( !m_config->includeSuccessfulResults() && result.isOk() ) {
+                if( result.getResultType() != ResultWas::Warning )
+                    return false;
+                printInfoMessages = false;
+            }
+
+            AssertionPrinter printer( stream, _assertionStats, printInfoMessages );
+            printer.print();
+
+            stream << std::endl;
+            return true;
+        }
+
+        virtual void testRunEnded( TestRunStats const& _testRunStats ) {
+            printTotals( _testRunStats.totals );
+            stream << "\n" << std::endl;
+            StreamingReporterBase::testRunEnded( _testRunStats );
+        }
+
+    private:
+        class AssertionPrinter {
+            void operator= ( AssertionPrinter const& );
+        public:
+            AssertionPrinter( std::ostream& _stream, AssertionStats const& _stats, bool _printInfoMessages )
+            : stream( _stream )
+            , stats( _stats )
+            , result( _stats.assertionResult )
+            , messages( _stats.infoMessages )
+            , itMessage( _stats.infoMessages.begin() )
+            , printInfoMessages( _printInfoMessages )
+            {}
+
+            void print() {
+                printSourceInfo();
+
+                itMessage = messages.begin();
+
+                switch( result.getResultType() ) {
+                    case ResultWas::Ok:
+                        printResultType( Colour::ResultSuccess, passedString() );
+                        printOriginalExpression();
+                        printReconstructedExpression();
+                        if ( ! result.hasExpression() )
+                            printRemainingMessages( Colour::None );
+                        else
+                            printRemainingMessages();
+                        break;
+                    case ResultWas::ExpressionFailed:
+                        if( result.isOk() )
+                            printResultType( Colour::ResultSuccess, failedString() + std::string( " - but was ok" ) );
+                        else
+                            printResultType( Colour::Error, failedString() );
+                        printOriginalExpression();
+                        printReconstructedExpression();
+                        printRemainingMessages();
+                        break;
+                    case ResultWas::ThrewException:
+                        printResultType( Colour::Error, failedString() );
+                        printIssue( "unexpected exception with message:" );
+                        printMessage();
+                        printExpressionWas();
+                        printRemainingMessages();
+                        break;
+                    case ResultWas::FatalErrorCondition:
+                        printResultType( Colour::Error, failedString() );
+                        printIssue( "fatal error condition with message:" );
+                        printMessage();
+                        printExpressionWas();
+                        printRemainingMessages();
+                        break;
+                    case ResultWas::DidntThrowException:
+                        printResultType( Colour::Error, failedString() );
+                        printIssue( "expected exception, got none" );
+                        printExpressionWas();
+                        printRemainingMessages();
+                        break;
+                    case ResultWas::Info:
+                        printResultType( Colour::None, "info" );
+                        printMessage();
+                        printRemainingMessages();
+                        break;
+                    case ResultWas::Warning:
+                        printResultType( Colour::None, "warning" );
+                        printMessage();
+                        printRemainingMessages();
+                        break;
+                    case ResultWas::ExplicitFailure:
+                        printResultType( Colour::Error, failedString() );
+                        printIssue( "explicitly" );
+                        printRemainingMessages( Colour::None );
+                        break;
+                    // These cases are here to prevent compiler warnings
+                    case ResultWas::Unknown:
+                    case ResultWas::FailureBit:
+                    case ResultWas::Exception:
+                        printResultType( Colour::Error, "** internal error **" );
+                        break;
+                }
+            }
+
+        private:
+            // Colour::LightGrey
+
+            static Colour::Code dimColour() { return Colour::FileName; }
+
+#ifdef CATCH_PLATFORM_MAC
+            static const char* failedString() { return "FAILED"; }
+            static const char* passedString() { return "PASSED"; }
+#else
+            static const char* failedString() { return "failed"; }
+            static const char* passedString() { return "passed"; }
+#endif
+
+            void printSourceInfo() const {
+                Colour colourGuard( Colour::FileName );
+                stream << result.getSourceInfo() << ":";
+            }
+
+            void printResultType( Colour::Code colour, std::string passOrFail ) const {
+                if( !passOrFail.empty() ) {
+                    {
+                        Colour colourGuard( colour );
+                        stream << " " << passOrFail;
+                    }
+                    stream << ":";
+                }
+            }
+
+            void printIssue( std::string issue ) const {
+                stream << " " << issue;
+            }
+
+            void printExpressionWas() {
+                if( result.hasExpression() ) {
+                    stream << ";";
+                    {
+                        Colour colour( dimColour() );
+                        stream << " expression was:";
+                    }
+                    printOriginalExpression();
+                }
+            }
+
+            void printOriginalExpression() const {
+                if( result.hasExpression() ) {
+                    stream << " " << result.getExpression();
+                }
+            }
+
+            void printReconstructedExpression() const {
+                if( result.hasExpandedExpression() ) {
+                    {
+                        Colour colour( dimColour() );
+                        stream << " for: ";
+                    }
+                    stream << result.getExpandedExpression();
+                }
+            }
+
+            void printMessage() {
+                if ( itMessage != messages.end() ) {
+                    stream << " '" << itMessage->message << "'";
+                    ++itMessage;
+                }
+            }
+
+            void printRemainingMessages( Colour::Code colour = dimColour() ) {
+                if ( itMessage == messages.end() )
+                    return;
+
+                // using messages.end() directly yields compilation error:
+                std::vector<MessageInfo>::const_iterator itEnd = messages.end();
+                const std::size_t N = static_cast<std::size_t>( std::distance( itMessage, itEnd ) );
+
+                {
+                    Colour colourGuard( colour );
+                    stream << " with " << pluralise( N, "message" ) << ":";
+                }
+
+                for(; itMessage != itEnd; ) {
+                    // If this assertion is a warning ignore any INFO messages
+                    if( printInfoMessages || itMessage->type != ResultWas::Info ) {
+                        stream << " '" << itMessage->message << "'";
+                        if ( ++itMessage != itEnd ) {
+                            Colour colourGuard( dimColour() );
+                            stream << " and";
+                        }
+                    }
+                }
+            }
+
+        private:
+            std::ostream& stream;
+            AssertionStats const& stats;
+            AssertionResult const& result;
+            std::vector<MessageInfo> messages;
+            std::vector<MessageInfo>::const_iterator itMessage;
+            bool printInfoMessages;
+        };
+
+        // Colour, message variants:
+        // - white: No tests ran.
+        // -   red: Failed [both/all] N test cases, failed [both/all] M assertions.
+        // - white: Passed [both/all] N test cases (no assertions).
+        // -   red: Failed N tests cases, failed M assertions.
+        // - green: Passed [both/all] N tests cases with M assertions.
+
+        std::string bothOrAll( std::size_t count ) const {
+            return count == 1 ? "" : count == 2 ? "both " : "all " ;
+        }
+
+        void printTotals( const Totals& totals ) const {
+            if( totals.testCases.total() == 0 ) {
+                stream << "No tests ran.";
+            }
+            else if( totals.testCases.failed == totals.testCases.total() ) {
+                Colour colour( Colour::ResultError );
+                const std::string qualify_assertions_failed =
+                    totals.assertions.failed == totals.assertions.total() ?
+                        bothOrAll( totals.assertions.failed ) : "";
+                stream <<
+                    "Failed " << bothOrAll( totals.testCases.failed )
+                              << pluralise( totals.testCases.failed, "test case"  ) << ", "
+                    "failed " << qualify_assertions_failed <<
+                                 pluralise( totals.assertions.failed, "assertion" ) << ".";
+            }
+            else if( totals.assertions.total() == 0 ) {
+                stream <<
+                    "Passed " << bothOrAll( totals.testCases.total() )
+                              << pluralise( totals.testCases.total(), "test case" )
+                              << " (no assertions).";
+            }
+            else if( totals.assertions.failed ) {
+                Colour colour( Colour::ResultError );
+                stream <<
+                    "Failed " << pluralise( totals.testCases.failed, "test case"  ) << ", "
+                    "failed " << pluralise( totals.assertions.failed, "assertion" ) << ".";
+            }
+            else {
+                Colour colour( Colour::ResultSuccess );
+                stream <<
+                    "Passed " << bothOrAll( totals.testCases.passed )
+                              << pluralise( totals.testCases.passed, "test case"  ) <<
+                    " with "  << pluralise( totals.assertions.passed, "assertion" ) << ".";
+            }
+        }
+    };
+
+    INTERNAL_CATCH_REGISTER_REPORTER( "compact", CompactReporter )
+
+} // end namespace Catch
+
+namespace Catch {
+    NonCopyable::~NonCopyable() {}
+    IShared::~IShared() {}
+    StreamBufBase::~StreamBufBase() CATCH_NOEXCEPT {}
+    IContext::~IContext() {}
+    IResultCapture::~IResultCapture() {}
+    ITestCase::~ITestCase() {}
+    ITestCaseRegistry::~ITestCaseRegistry() {}
+    IRegistryHub::~IRegistryHub() {}
+    IMutableRegistryHub::~IMutableRegistryHub() {}
+    IExceptionTranslator::~IExceptionTranslator() {}
+    IExceptionTranslatorRegistry::~IExceptionTranslatorRegistry() {}
+    IReporter::~IReporter() {}
+    IReporterFactory::~IReporterFactory() {}
+    IReporterRegistry::~IReporterRegistry() {}
+    IStreamingReporter::~IStreamingReporter() {}
+    AssertionStats::~AssertionStats() {}
+    SectionStats::~SectionStats() {}
+    TestCaseStats::~TestCaseStats() {}
+    TestGroupStats::~TestGroupStats() {}
+    TestRunStats::~TestRunStats() {}
+    CumulativeReporterBase::SectionNode::~SectionNode() {}
+    CumulativeReporterBase::~CumulativeReporterBase() {}
+
+    StreamingReporterBase::~StreamingReporterBase() {}
+    ConsoleReporter::~ConsoleReporter() {}
+    CompactReporter::~CompactReporter() {}
+    IRunner::~IRunner() {}
+    IMutableContext::~IMutableContext() {}
+    IConfig::~IConfig() {}
+    XmlReporter::~XmlReporter() {}
+    JunitReporter::~JunitReporter() {}
+    TestRegistry::~TestRegistry() {}
+    FreeFunctionTestCase::~FreeFunctionTestCase() {}
+    IGeneratorInfo::~IGeneratorInfo() {}
+    IGeneratorsForTest::~IGeneratorsForTest() {}
+    TestSpec::Pattern::~Pattern() {}
+    TestSpec::NamePattern::~NamePattern() {}
+    TestSpec::TagPattern::~TagPattern() {}
+    TestSpec::ExcludedPattern::~ExcludedPattern() {}
+
+    Matchers::Impl::StdString::Equals::~Equals() {}
+    Matchers::Impl::StdString::Contains::~Contains() {}
+    Matchers::Impl::StdString::StartsWith::~StartsWith() {}
+    Matchers::Impl::StdString::EndsWith::~EndsWith() {}
+
+    void Config::dummy() {}
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#endif
+
+#ifdef CATCH_CONFIG_MAIN
+// #included from: internal/catch_default_main.hpp
+#define TWOBLUECUBES_CATCH_DEFAULT_MAIN_HPP_INCLUDED
+
+#ifndef __OBJC__
+
+// Standard C/C++ main entry point
+int main (int argc, char * const argv[]) {
+    return Catch::Session().run( argc, argv );
+}
+
+#else // __OBJC__
+
+// Objective-C entry point
+int main (int argc, char * const argv[]) {
+#if !CATCH_ARC_ENABLED
+    NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
+#endif
+
+    Catch::registerTestMethods();
+    int result = Catch::Session().run( argc, (char* const*)argv );
+
+#if !CATCH_ARC_ENABLED
+    [pool drain];
+#endif
+
+    return result;
+}
+
+#endif // __OBJC__
+
+#endif
+
+#ifdef CLARA_CONFIG_MAIN_NOT_DEFINED
+#  undef CLARA_CONFIG_MAIN
+#endif
+
+//////
+
+// If this config identifier is defined then all CATCH macros are prefixed with CATCH_
+#ifdef CATCH_CONFIG_PREFIX_ALL
+
+#define CATCH_REQUIRE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal, "CATCH_REQUIRE" )
+#define CATCH_REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, "CATCH_REQUIRE_FALSE" )
+
+#define CATCH_REQUIRE_THROWS( expr ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_THROWS" )
+#define CATCH_REQUIRE_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_THROWS_AS" )
+#define CATCH_REQUIRE_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_NOTHROW" )
+
+#define CATCH_CHECK( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK" )
+#define CATCH_CHECK_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::FalseTest, "CATCH_CHECK_FALSE" )
+#define CATCH_CHECKED_IF( expr ) INTERNAL_CATCH_IF( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECKED_IF" )
+#define CATCH_CHECKED_ELSE( expr ) INTERNAL_CATCH_ELSE( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECKED_ELSE" )
+#define CATCH_CHECK_NOFAIL( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::SuppressFail, "CATCH_CHECK_NOFAIL" )
+
+#define CATCH_CHECK_THROWS( expr )  INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_THROWS" )
+#define CATCH_CHECK_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_THROWS_AS" )
+#define CATCH_CHECK_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_NOTHROW" )
+
+#define CHECK_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_THAT" )
+#define CATCH_REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_THAT" )
+
+#define CATCH_INFO( msg ) INTERNAL_CATCH_INFO( msg, "CATCH_INFO" )
+#define CATCH_WARN( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, "CATCH_WARN", msg )
+#define CATCH_SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( msg, "CATCH_INFO" )
+#define CATCH_CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CATCH_CAPTURE" )
+#define CATCH_SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CATCH_CAPTURE" )
+
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+    #define CATCH_TEST_CASE( ... ) INTERNAL_CATCH_TESTCASE( __VA_ARGS__ )
+    #define CATCH_TEST_CASE_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, __VA_ARGS__ )
+    #define CATCH_METHOD_AS_TEST_CASE( method, ... ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, __VA_ARGS__ )
+    #define CATCH_SECTION( ... ) INTERNAL_CATCH_SECTION( __VA_ARGS__ )
+    #define CATCH_FAIL( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "CATCH_FAIL", __VA_ARGS__ )
+    #define CATCH_SUCCEED( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "CATCH_SUCCEED", __VA_ARGS__ )
+#else
+    #define CATCH_TEST_CASE( name, description ) INTERNAL_CATCH_TESTCASE( name, description )
+    #define CATCH_TEST_CASE_METHOD( className, name, description ) INTERNAL_CATCH_TEST_CASE_METHOD( className, name, description )
+    #define CATCH_METHOD_AS_TEST_CASE( method, name, description ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, name, description )
+    #define CATCH_SECTION( name, description ) INTERNAL_CATCH_SECTION( name, description )
+    #define CATCH_FAIL( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "CATCH_FAIL", msg )
+    #define CATCH_SUCCEED( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "CATCH_SUCCEED", msg )
+#endif
+#define CATCH_ANON_TEST_CASE() INTERNAL_CATCH_TESTCASE( "", "" )
+
+#define CATCH_REGISTER_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType )
+#define CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType )
+
+#define CATCH_GENERATE( expr) INTERNAL_CATCH_GENERATE( expr )
+
+// "BDD-style" convenience wrappers
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+#define CATCH_SCENARIO( ... ) CATCH_TEST_CASE( "Scenario: " __VA_ARGS__ )
+#define CATCH_SCENARIO_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " __VA_ARGS__ )
+#else
+#define CATCH_SCENARIO( name, tags ) CATCH_TEST_CASE( "Scenario: " name, tags )
+#define CATCH_SCENARIO_METHOD( className, name, tags ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " name, tags )
+#endif
+#define CATCH_GIVEN( desc )    CATCH_SECTION( "Given: " desc, "" )
+#define CATCH_WHEN( desc )     CATCH_SECTION( " When: " desc, "" )
+#define CATCH_AND_WHEN( desc ) CATCH_SECTION( "  And: " desc, "" )
+#define CATCH_THEN( desc )     CATCH_SECTION( " Then: " desc, "" )
+#define CATCH_AND_THEN( desc ) CATCH_SECTION( "  And: " desc, "" )
+
+// If CATCH_CONFIG_PREFIX_ALL is not defined then the CATCH_ prefix is not required
+#else
+
+#define REQUIRE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal, "REQUIRE" )
+#define REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, "REQUIRE_FALSE" )
+
+#define REQUIRE_THROWS( expr ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::Normal, "REQUIRE_THROWS" )
+#define REQUIRE_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::Normal, "REQUIRE_THROWS_AS" )
+#define REQUIRE_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::Normal, "REQUIRE_NOTHROW" )
+
+#define CHECK( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECK" )
+#define CHECK_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::FalseTest, "CHECK_FALSE" )
+#define CHECKED_IF( expr ) INTERNAL_CATCH_IF( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECKED_IF" )
+#define CHECKED_ELSE( expr ) INTERNAL_CATCH_ELSE( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECKED_ELSE" )
+#define CHECK_NOFAIL( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::SuppressFail, "CHECK_NOFAIL" )
+
+#define CHECK_THROWS( expr )  INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECK_THROWS" )
+#define CHECK_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::ContinueOnFailure, "CHECK_THROWS_AS" )
+#define CHECK_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECK_NOTHROW" )
+
+#define CHECK_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::ContinueOnFailure, "CHECK_THAT" )
+#define REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::Normal, "REQUIRE_THAT" )
+
+#define INFO( msg ) INTERNAL_CATCH_INFO( msg, "INFO" )
+#define WARN( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, "WARN", msg )
+#define SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( msg, "INFO" )
+#define CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CAPTURE" )
+#define SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CAPTURE" )
+
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+    #define TEST_CASE( ... ) INTERNAL_CATCH_TESTCASE( __VA_ARGS__ )
+    #define TEST_CASE_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, __VA_ARGS__ )
+    #define METHOD_AS_TEST_CASE( method, ... ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, __VA_ARGS__ )
+    #define SECTION( ... ) INTERNAL_CATCH_SECTION( __VA_ARGS__ )
+    #define FAIL( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "FAIL", __VA_ARGS__ )
+    #define SUCCEED( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "SUCCEED", __VA_ARGS__ )
+#else
+    #define TEST_CASE( name, description ) INTERNAL_CATCH_TESTCASE( name, description )
+    #define TEST_CASE_METHOD( className, name, description ) INTERNAL_CATCH_TEST_CASE_METHOD( className, name, description )
+    #define METHOD_AS_TEST_CASE( method, name, description ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, name, description )
+    #define SECTION( name, description ) INTERNAL_CATCH_SECTION( name, description )
+    #define FAIL( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "FAIL", msg )
+    #define SUCCEED( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "SUCCEED", msg )
+#endif
+#define ANON_TEST_CASE() INTERNAL_CATCH_TESTCASE( "", "" )
+
+#define REGISTER_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType )
+#define REGISTER_LEGACY_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType )
+
+#define GENERATE( expr) INTERNAL_CATCH_GENERATE( expr )
+
+#endif
+
+#define CATCH_TRANSLATE_EXCEPTION( signature ) INTERNAL_CATCH_TRANSLATE_EXCEPTION( signature )
+
+// "BDD-style" convenience wrappers
+#ifdef CATCH_CONFIG_VARIADIC_MACROS
+#define SCENARIO( ... ) TEST_CASE( "Scenario: " __VA_ARGS__ )
+#define SCENARIO_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " __VA_ARGS__ )
+#else
+#define SCENARIO( name, tags ) TEST_CASE( "Scenario: " name, tags )
+#define SCENARIO_METHOD( className, name, tags ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " name, tags )
+#endif
+#define GIVEN( desc )    SECTION( "   Given: " desc, "" )
+#define WHEN( desc )     SECTION( "    When: " desc, "" )
+#define AND_WHEN( desc ) SECTION( "And when: " desc, "" )
+#define THEN( desc )     SECTION( "    Then: " desc, "" )
+#define AND_THEN( desc ) SECTION( "     And: " desc, "" )
+
+using Catch::Detail::Approx;
+
+// #included from: internal/catch_reenable_warnings.h
+
+#define TWOBLUECUBES_CATCH_REENABLE_WARNINGS_H_INCLUDED
+
+#ifdef __clang__
+#    ifdef __ICC // icpc defines the __clang__ macro
+#        pragma warning(pop)
+#    else
+#        pragma clang diagnostic pop
+#    endif
+#elif defined __GNUC__
+#    pragma GCC diagnostic pop
+#endif
+
+#endif // TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/salmon.git