[med-svn] [Git][med-team/salmon][master] 6 commits: New upstream version 1.10.0+ds1

Andreas Tille (@tille) gitlab at salsa.debian.org
Sun Mar 12 14:49:51 GMT 2023



Andreas Tille pushed to branch master at Debian Med / salmon


Commits:
2fde979a by Andreas Tille at 2023-03-08T13:46:24+01:00
New upstream version 1.10.0+ds1
- - - - -
088a42af by Andreas Tille at 2023-03-12T14:01:55+01:00
routine-update: New upstream version

- - - - -
475c60bd by Andreas Tille at 2023-03-12T14:01:56+01:00
New upstream version 1.10.1+ds1
- - - - -
1145119e by Andreas Tille at 2023-03-12T14:02:00+01:00
Update upstream source from tag 'upstream/1.10.1+ds1'

Update to upstream version '1.10.1+ds1'
with Debian dir 101e9dac9d775a219c11dd29e007f11c63e2b0b4
- - - - -
7807c675 by Andreas Tille at 2023-03-12T15:32:01+01:00
Update patches

- - - - -
2c11223d by Andreas Tille at 2023-03-12T15:48:32+01:00
Close bugs

- - - - -


22 changed files:

- CMakeLists.txt
- cmake/Modules/FindCereal.cmake → cmake/Modules/Findcereal.cmake
- cmake/Modules/Findlibstadenio.cmake
- cmake/TestSalmonQuasi.cmake
- current_version.txt
- debian/changelog
- debian/patches/series
- debian/patches/use_debian_packaged_libs.patch
- doc/source/conf.py
- doc/source/salmon.rst
- docker/Dockerfile
- docker/build_test.sh
- include/AlevinOpts.hpp
- include/SalmonConfig.hpp
- include/SalmonMappingUtils.hpp
- include/Transcript.hpp
- scripts/fetchPufferfish.sh
- scripts/v1_10x/run.sh
- src/Alevin.cpp
- src/AlevinUtils.cpp
- src/CMakeLists.txt
- src/SalmonAlevin.cpp


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.12)
+cmake_minimum_required(VERSION 3.15)
 
 if(DEFINED ENV{CC})
   set(CC $ENV{CC})
@@ -224,10 +224,10 @@ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
     endif()
 
     set(WARNING_IGNORE_FLAGS "${WARNING_IGNORE_FLAGS} -Wno-unused-local-typedefs")
-    set(BOOST_TOOLSET "${CC}")
+    set(BOOST_TOOLSET "gcc")
     set(BOOST_CONFIGURE_TOOLSET "--with-toolset=gcc")
     set(BCXX_FLAGS "${CXXSTDFLAG} ${SCHAR_FLAG}")
-    set(BOOST_EXTRA_FLAGS toolset=${CC} cxxflags=${BCXX_FLAGS})
+    set(BOOST_EXTRA_FLAGS toolset=gcc cxxflags=${BCXX_FLAGS})
 # Tentatively, we support clang now
 elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     set(CLANG TRUE)
@@ -580,22 +580,21 @@ set(EXTERNAL_LIBRARY_PATH $CMAKE_CURRENT_SOURCE_DIR/lib)
 #  set(SUFFARRAY_INCLUDE_DIRS ${SUFFARRAY_INCLUDE_DIR})
 #endif()
 
-
-find_package(Cereal)
+find_package(cereal "1.3.2")
 if (NOT CEREAL_FOUND)
-  message("Build system will fetch and build the Cereal serialization library")
+  message("Build system will fetch and build the cereal serialization library")
   message("==================================================================")
   include(ExternalProject)
   externalproject_add(libcereal
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    DOWNLOAD_COMMAND curl -k -L https://github.com/USCiLab/cereal/archive/v1.3.0.tar.gz -o cereal-v1.3.0.tar.gz &&
-      ${SHASUM} 329ea3e3130b026c03a4acc50e168e7daff4e6e661bc6a7dfec0d77b570851d5 cereal-v1.3.0.tar.gz &&
-      tar -xzvf cereal-v1.3.0.tar.gz
+    DOWNLOAD_COMMAND curl -k -L https://github.com/USCiLab/cereal/archive/refs/tags/v1.3.2.tar.gz -o cereal-v1.3.2.tar.gz &&
+      ${SHASUM} 16a7ad9b31ba5880dac55d62b5d6f243c3ebc8d46a3514149e56b5e7ea81f85f cereal-v1.3.2.tar.gz &&
+      tar -xzvf cereal-v1.3.2.tar.gz
 
-    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.3.0
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.3.2
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     #UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
-    BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.3.0/build
+    BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.3.2/build
     CONFIGURE_COMMAND ""
     BUILD_COMMAND ""
     INSTALL_COMMAND sh -c "mkdir -p <INSTALL_DIR>/include && cp -r <SOURCE_DIR>/include/cereal <INSTALL_DIR>/include"
@@ -662,7 +661,7 @@ tar -xzvf v2021.5.tar.gz
 SOURCE_DIR ${TBB_SOURCE_DIR}
 INSTALL_DIR ${TBB_INSTALL_DIR}
 PATCH_COMMAND "${TBB_PATCH_STEP}"
-CMAKE_ARGS -DCMAKE_CXX_FLAGS=${TBB_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DTBB_TEST=OFF -DTBB_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}
+CMAKE_ARGS -DCMAKE_CXX_FLAGS=${TBB_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DTBB_TEST=OFF -DTBB_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
 BUILD_IN_SOURCE TRUE
 )
 
@@ -786,7 +785,7 @@ find_package(CURL)
 if (FETCH_STADEN)
   set(LIBSTADEN_FOUND FALSE)
 else ()
-  find_package(libstadenio)
+  find_package(libstadenio 1.14.15)
 endif()
 
 if (NOT LIBSTADENIO_FOUND)
@@ -794,12 +793,12 @@ if (NOT LIBSTADENIO_FOUND)
   message("==================================================================")
   externalproject_add(libstadenio
     DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
-    DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/archive/v1.14.8.1.tar.gz -o staden-io_lib-v1.14.8.tar.gz &&
-      ${SHASUM} f6f30eefa478cfb708f3109a35fb6ffa0e24951d9d971985df2cef5919dd0bc3 staden-io_lib-v1.14.8.tar.gz &&
-      mkdir -p staden-io_lib-1.14.8 &&
-      tar -xzf staden-io_lib-v1.14.8.tar.gz --strip-components=1 -C staden-io_lib-1.14.8 &&
+    DOWNLOAD_COMMAND curl -k -L https://github.com/jkbonfield/io_lib/releases/download/io_lib-1-14-15/io_lib-1.14.15.tar.gz -o staden-io_lib-v1.14.15.tar.gz &&
+      ${SHASUM} 20814c4365e1e2fe6630fb11d0df370dec4c5688af3871de7f1cb0129671401e staden-io_lib-v1.14.15.tar.gz &&
+      mkdir -p staden-io_lib-1.14.15 &&
+      tar -xzf staden-io_lib-v1.14.15.tar.gz --strip-components=1 -C staden-io_lib-1.14.15 &&
       rm -fr staden-io_lib &&
-      mv -f staden-io_lib-1.14.8 staden-io_lib
+      mv -f staden-io_lib-1.14.15 staden-io_lib
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/staden-io_lib
     INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
     CONFIGURE_COMMAND ./configure --enable-shared=no --without-libcurl --prefix=<INSTALL_DIR> LDFLAGS=${LIBSTADEN_LDFLAGS} CFLAGS=${LIBSTADEN_CFLAGS} CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
@@ -814,7 +813,7 @@ if (NOT LIBSTADENIO_FOUND)
   endif()
 
   set(FETCHED_STADEN TRUE)
-  set(STADEN_LIBRARIES "${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a")
+  set(STADEN_LIBRARIES "${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a;${GAT_SOURCE_DIR}/external/install/lib/libhtscodecs.a")
 endif()
 
 if (ASAN_BUILD)
@@ -906,11 +905,20 @@ set(CPACK_SOURCE_IGNORE_FILES
 
 message("CPACK_SOURCE_IGNORE_FILES = ${CPACK_SOURCE_IGNORE_FILES}")
 
+# we will use this property later
+define_property(TARGET PROPERTY COMPACT_VECTOR_DIR INHERITED 
+   BRIEF_DOCS "the path to the directory containing the compact_vector include tree"
+   FULL_DOCS "the path to the directory containing the compact_vector include tree")
+
 # Recurse into pufferfish source directory 
 # and build the library 
 set(BUILD_PUFF_FOR_SALMON TRUE)
 add_subdirectory(external/pufferfish)
 
+# make sure we know the path to compact_vector
+get_property(COMPACT_VECTOR_INCLUDE_PATH TARGET graphdump PROPERTY COMPACT_VECTOR_DIR)
+message("fetched path for compact_vector as [${COMPACT_VECTOR_INCLUDE_PATH}]")
+
 # and then the main salmon source directory
 add_subdirectory(src)
 


=====================================
cmake/Modules/FindCereal.cmake → cmake/Modules/Findcereal.cmake
=====================================
@@ -11,11 +11,34 @@ find_path(CEREAL_INCLUDE_DIR cereal
 
 set(CEREAL_INCLUDE_DIRS ${CEREAL_INCLUDE_DIR})
 
+if(CEREAL_INCLUDE_DIR)
+  set(CEREAL_FOUND YES)
+  set(CEREAL_VERSION_MAJOR 0)
+  set(CEREAL_VERSION_MINOR 0)
+  set(CEREAL_VERSION_PATCH 0)
+  if(EXISTS "${CEREAL_INCLUDE_DIR}/cereal/version.hpp")
+    # Read and parse cereal version header file for version number
+    file(READ "${CEREAL_INCLUDE_DIR}/cereal/version.hpp"
+        _CEREAL_HEADER_CONTENTS)
+    string(REGEX REPLACE ".*#define CEREAL_VERSION_MAJOR ([0-9]+).*" "\\1"
+        CEREAL_VERSION_MAJOR "${_CEREAL_HEADER_CONTENTS}")
+    string(REGEX REPLACE ".*#define CEREAL_VERSION_MINOR ([0-9]+).*" "\\1"
+        CEREAL_VERSION_MINOR "${_CEREAL_HEADER_CONTENTS}")
+    string(REGEX REPLACE ".*#define CEREAL_VERSION_PATCH ([0-9]+).*" "\\1"
+        CEREAL_VERSION_PATCH "${_CEREAL_HEADER_CONTENTS}")
+    set(CEREAL_VERSION_STRING "${CEREAL_VERSION_MAJOR}.${CEREAL_VERSION_MINOR}.${CEREAL_VERSION_PATCH}")
+  else()
+    set(CEREAL_FOUND NO)
+  endif()
+endif()
+
 include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Cereal DEFAULT_MSG CEREAL_INCLUDE_DIR)
+find_package_handle_standard_args(cereal 
+  REQUIRED_VARS CEREAL_INCLUDE_DIR
+  VERSION_VAR CEREAL_VERSION_STRING)
 
 mark_as_advanced(CEREAL_INCLUDE_DIR)
 
 if(CEREAL_FOUND)
-  message(STATUS "Cereal found (include: ${CEREAL_INCLUDE_DIRS})")
+  message(STATUS "cereal found (include: ${CEREAL_INCLUDE_DIRS})")
 endif(CEREAL_FOUND)


=====================================
cmake/Modules/Findlibstadenio.cmake
=====================================
@@ -10,7 +10,10 @@ find_path(STADEN_INCLUDE_DIR io_lib
 	HINTS ${STADEN_ROOT} ENV STADEN_ROOT
   PATH_SUFFIXES include)
 
-find_library(STADEN_LIBRARY NAMES staden-read libstaden-read
+find_library(STADEN_LIBRARY NAMES staden-read libstaden-read 
+  HINTS ${STADEN_ROOT} ENV STADEN_ROOT PATH_SUFFIXES lib lib64)
+
+find_library(HTSCODEC_LIBRARY NAMES htscodecs libhtscodecs
   HINTS ${STADEN_ROOT} ENV STADEN_ROOT PATH_SUFFIXES lib lib64)
 
 if(STADEN_INCLUDE_DIR)
@@ -30,7 +33,7 @@ find_package_handle_standard_args(libstadenio DEFAULT_MSG
 
 if (LIBSTADENIO_FOUND)
   message(STATUS "Staden IOLib found (include: ${STADEN_INCLUDE_DIR})")
-  set(STADEN_LIBRARIES ${STADEN_LIBRARY})
+  set(STADEN_LIBRARIES "${STADEN_LIBRARY};${HTSCODEC_LIBRARY}")
 endif()
 
 mark_as_advanced(STADEN_INCLUDE_DIR STADEN_LIBRARY)


=====================================
cmake/TestSalmonQuasi.cmake
=====================================
@@ -14,7 +14,10 @@ execute_process(COMMAND ${SALMON_QUASI_INDEX_CMD}
                 )
 
 if (SALMON_QUASI_INDEX_RESULT)
-    message(FATAL_ERROR "Error running ${SALMON_QUASI_INDEX_COMMAND}")
+    message(FATAL_ERROR
+            "While running: ${SALMON_QUASI_INDEX_CMD}\n"
+            "error: ${SALMON_QUASI_INDEX_RESULT}"
+            )
 endif()
 
 set(SALMON_QUANT_COMMAND ${CMAKE_BINARY_DIR}/salmon quant -i sample_salmon_quasi_index -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_salmon_quasi_quant)
@@ -23,7 +26,10 @@ execute_process(COMMAND ${SALMON_QUANT_COMMAND}
                 RESULT_VARIABLE SALMON_QUASI_QUANT_RESULT
                 )
 if (SALMON_QUASI_QUANT_RESULT)
-    message(FATAL_ERROR "Error running ${SALMON_QUASI_QUANT_RESULT}")
+    message(FATAL_ERROR
+            "While running: ${SALMON_QUANT_COMMAND}\n"
+            "error: ${SALMON_QUASI_QUANT_RESULT}"
+            )
 endif()
 
 if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_salmon_quasi_quant/quant.sf)


=====================================
current_version.txt
=====================================
@@ -1,3 +1,3 @@
 VERSION_MAJOR 1
-VERSION_MINOR 9
-VERSION_PATCH 0
+VERSION_MINOR 10
+VERSION_PATCH 1


=====================================
debian/changelog
=====================================
@@ -1,10 +1,12 @@
-salmon (1.9.0+ds1-2) UNRELEASED; urgency=medium
+salmon (1.10.1+ds1-1) UNRELEASED; urgency=medium
 
   * Team upload.
+  * New upstream version
+    Closes: #1028713, #1031580
   * Refresh external files from pufferfish
   * Add autopkgtest
 
- -- Andreas Tille <tille at debian.org>  Tue, 31 Jan 2023 10:12:05 +0100
+ -- Andreas Tille <tille at debian.org>  Sun, 12 Mar 2023 15:32:15 +0100
 
 salmon (1.9.0+ds1-1) unstable; urgency=medium
 


=====================================
debian/patches/series
=====================================
@@ -6,4 +6,3 @@ disable-version-check.patch
 # 0008-Remove-salmon_core-lib.patch
 # add_MEM_F_SELF_OVLP.patch
 sphinx.patch
-verbose-test.patch


=====================================
debian/patches/use_debian_packaged_libs.patch
=====================================
@@ -2,9 +2,9 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update: Tue, 11 Dec 2018 19:29:39 +0100
 Description: Use Debian packaged libraries and make sure these are linked dynamically
 Forwarded: not-needed
---- salmon.orig/src/CMakeLists.txt
-+++ salmon/src/CMakeLists.txt
-@@ -4,7 +4,7 @@
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -4,7 +4,7 @@ endif()
  
  include_directories(
  ${GAT_SOURCE_DIR}/include
@@ -13,17 +13,18 @@ Forwarded: not-needed
  ${GAT_SOURCE_DIR}/external
  ${GAT_SOURCE_DIR}/external/cereal/include
  ${GAT_SOURCE_DIR}/external/install/include
-@@ -234,21 +234,21 @@
+@@ -239,22 +239,22 @@ target_link_libraries(salmon
      graphdump
      ntcard
      gff
 -    ${Boost_LIBRARIES}
 +    boost_iostreams boost_filesystem boost_system boost_timer boost_chrono boost_program_options boost_regex
      ${ICU_LIBS}
-     ${STADEN_LIBRARIES} ${CURL_LIBRARIES}
+     ${CURL_LIBRARIES}
 -    ${ZLIB_LIBRARY}
 +    z
      m
+     ${STADEN_LIBRARIES} 
 -    ${LIBLZMA_LIBRARIES}
 +    lzma
      ${BZIP2_LIBRARIES}
@@ -41,12 +42,17 @@ Forwarded: not-needed
      ${LIBRT}
      ${CMAKE_DL_LIBS}
  )
-@@ -274,16 +274,16 @@
-     gff
-     UnitTestsMain
-     ${STADEN_LIBRARIES} 
+@@ -275,7 +275,7 @@ if(TBB_RECONFIGURE OR TBB_TARGET_EXISTED
+ target_link_libraries(unitTests
+     Threads::Threads
+ ## PUFF_INTEGRATION
 -    ${Boost_LIBRARIES}
 +    boost_iostreams boost_filesystem boost_system boost_timer boost_chrono boost_program_options boost_regex
+     salmon_core
+     alevin_core
+     gff
+@@ -284,13 +284,13 @@ target_link_libraries(unitTests
+     ${STADEN_LIBRARIES} 
      ${ICU_LIBS}
      ${CURL_LIBRARIES}
 -    ${ZLIB_LIBRARY}


=====================================
doc/source/conf.py
=====================================
@@ -55,9 +55,9 @@ copyright = u'2013-2021, Rob Patro, Geet Duggal, Mike Love, Rafael Irizarry and
 # built documents.
 #
 # The short X.Y version.
-version = '1.9'
+version = '1.10'
 # The full version, including alpha/beta/rc tags.
-release = '1.9.0'
+release = '1.10.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.


=====================================
doc/source/salmon.rst
=====================================
@@ -269,8 +269,8 @@ mode, and a description of each, run ``salmon quant --help-alignment``.
     header sections must be identical).
 
 
-Description of important options
---------------------------------
+Description of some important options
+-------------------------------------
 
 Salmon exposes a number of useful optional command-line parameters to the user.
 The particularly important ones are explained here, but you can always run
@@ -296,6 +296,20 @@ by RSEM), but using the default scoring scheme and allowing both mismatches and
 indels in alignments. These setting essentially disallow indels in the resulting
 alignments.
 
+""""""""""""""""""""""""""""""
+``--meta``
+""""""""""""""""""""""""""""""
+
+As with the flags described above, this is a "meta-flag" that simply enables some options
+that may make more sense when quantifying metagenomic data.  Specifically, the ``--meta``
+flag sets the following options: 
+
+* The abundance optimization is initialized from the uniform distribution (compared to the default of using a weighted combination of the uniform intialization and the abundances learned during the online optimization)
+
+* Rich equivalence classes are disabled. Using rich equivalence classes with metagenomic data should not be particularly problematic, but since they have been developed and tested most in the context of bulk RNA-seq quantification, they are currently disabled under this flag.
+
+* The EM algorithm is used for abundance optimization instead of the default VBEM optimization.  Neither is universally better than the other, but the parameters for the VBEM (e.g. the prior size and type) are set based on typical bulk RNA-seq transcriptome samples, and so may be less appropriate in the metagenomic context. Hence the ``--meta`` flags opts for the basic EM algorithm instead.
+
 """"""""""""""""""""""""""""""
 ``--recoverOrphans``
 """"""""""""""""""""""""""""""


=====================================
docker/Dockerfile
=====================================
@@ -6,7 +6,7 @@ MAINTAINER salmon.maintainer at gmail.com
 
 ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \
     ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget
-ENV SALMON_VERSION 1.9.0
+ENV SALMON_VERSION 1.10.1
 
 # salmon binary will be installed in /home/salmon/bin/salmon
 


=====================================
docker/build_test.sh
=====================================
@@ -1,3 +1,3 @@
 #! /bin/bash
-SALMON_VERSION=1.9.0
-docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .
+SALMON_VERSION=1.10.1
+TMPDIR=/mnt/scratch2/DELETE_ME_TEMP docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest .


=====================================
include/AlevinOpts.hpp
=====================================
@@ -13,6 +13,10 @@
 
 enum class BarcodeEnd { FIVE = 5, THREE = 3 };
 enum class Sequence { BARCODE, UMI };
+// refers to which reads to use for mapping; use both for 5' libraries
+enum class ReadsToMap { USE_BOTH, USE_FIRST, USE_SECOND };
+// refers to which reads map to transcripts and to which transcripts they map to
+enum class PairingStatus { UNPAIRED_LEFT, UNPAIRED_RIGHT, PAIRED_FR, PAIRED_RF };
 
 template <class protocolT>
 struct AlevinOpts {
@@ -82,11 +86,11 @@ struct AlevinOpts {
   // initialize EM with uniform prior
   bool initUniform;
   //number of cells
-  uint32_t numCells;
+  uint64_t numCells;
   // minimum number of CB to use for low confidence region
-  uint32_t lowRegionMinNumBarcodes;
+  uint64_t lowRegionMinNumBarcodes;
   // maximum number of barcodes to use
-  uint32_t maxNumBarcodes;
+  uint64_t maxNumBarcodes;
   // number of bootstraps to perform
   uint32_t numBootstraps;
   // number of gibbs samples to perform
@@ -115,27 +119,27 @@ struct AlevinOpts {
   boost::filesystem::path bfhFile;
 
   //meta-info related tags
-  uint32_t totalReads;
-  uint32_t totalUsedReads;
-  uint32_t readsThrown;
+  uint64_t totalReads;
+  uint64_t totalUsedReads;
+  uint64_t readsThrown;
 
-  uint32_t totalCBs;
-  uint32_t totalUsedCBs;
+  uint64_t totalCBs;
+  uint64_t totalUsedCBs;
 
-  uint32_t kneeCutoff;
-  uint32_t intelligentCutoff;
-  uint32_t totalLowConfidenceCBs;
-  uint32_t numFeatures;
-  uint32_t numNoMapCB;
+  uint64_t kneeCutoff;
+  uint64_t intelligentCutoff;
+  uint64_t totalLowConfidenceCBs;
+  uint64_t numFeatures;
+  uint64_t numNoMapCB;
 
-  uint32_t eqReads;
-  uint32_t noisyUmis;
+  uint64_t eqReads;
+  uint64_t noisyUmis;
   double mappingRate;
   double keepCBFraction;
   double vbemNorm;
 
-  uint32_t totalDedupUMIs;
-  uint32_t totalExpGenes;
+  uint64_t totalDedupUMIs;
+  uint64_t totalExpGenes;
 };
 
 #endif // ALEVIN_OPTS_HPP


=====================================
include/SalmonConfig.hpp
=====================================
@@ -26,9 +26,9 @@
 
 namespace salmon {
 constexpr char majorVersion[] = "1";
-constexpr char minorVersion[] = "9";
-constexpr char patchVersion[] = "0";
-constexpr char version[] = "1.9.0";
+constexpr char minorVersion[] = "10";
+constexpr char patchVersion[] = "1";
+constexpr char version[] = "1.10.1";
 constexpr uint32_t indexVersion = 5;
 constexpr char requiredQuasiIndexVersion[] = "p7";
 } // namespace salmon


=====================================
include/SalmonMappingUtils.hpp
=====================================
@@ -22,7 +22,6 @@
 #ifndef __SALMON_MAPPING_UTILS__
 #define __SALMON_MAPPING_UTILS__
 
-
 #include <algorithm>
 #include <atomic>
 #include <cassert>
@@ -39,107 +38,122 @@
 // Future C++ convenience classes
 #include "core/range.hpp"
 
-#include "SalmonDefaults.hpp"
-#include "SalmonMath.hpp"
-#include "SalmonUtils.hpp"
-#include "Transcript.hpp"
+#include "AlevinOpts.hpp"
 #include "AlignmentGroup.hpp"
 #include "ProgramOptionsGenerator.hpp"
 #include "ReadExperiment.hpp"
+#include "SalmonDefaults.hpp"
+#include "SalmonMath.hpp"
 #include "SalmonOpts.hpp"
+#include "SalmonUtils.hpp"
+#include "Transcript.hpp"
 
-#include "pufferfish/Util.hpp"
-#include "pufferfish/MemCollector.hpp"
+#include "parallel_hashmap/phmap.h"
 #include "pufferfish/MemChainer.hpp"
-#include "pufferfish/SAMWriter.hpp"
+#include "pufferfish/MemCollector.hpp"
 #include "pufferfish/PuffAligner.hpp"
-#include "pufferfish/ksw2pp/KSW2Aligner.hpp"
-#include "pufferfish/metro/metrohash64.h"
+#include "pufferfish/SAMWriter.hpp"
 #include "pufferfish/SelectiveAlignmentUtils.hpp"
+#include "pufferfish/Util.hpp"
 #include "pufferfish/itlib/small_vector.hpp"
-#include "parallel_hashmap/phmap.h"
+#include "pufferfish/ksw2pp/KSW2Aligner.hpp"
+#include "pufferfish/metro/metrohash64.h"
 
 namespace salmon {
-  namespace mapping_utils {
+namespace mapping_utils {
 
-    using MateStatus = pufferfish::util::MateStatus;
-    constexpr const int32_t invalid_score_ = std::numeric_limits<int32_t>::min();
-    constexpr const int32_t invalid_index_ = std::numeric_limits<int32_t>::min();
-    constexpr const size_t static_vec_size = 32;
+using MateStatus = pufferfish::util::MateStatus;
+constexpr const int32_t invalid_score_ = std::numeric_limits<int32_t>::min();
+constexpr const int32_t invalid_index_ = std::numeric_limits<int32_t>::min();
+constexpr const size_t static_vec_size = 32;
 
-    class MappingScoreInfo {
-    public:
-      MappingScoreInfo()
-          : bestScore(invalid_score_), secondBestScore(invalid_score_),
-            bestDecoyScore(invalid_score_), decoyThresh(1.0), collect_decoy_info_(false) {}
+class MappingScoreInfo {
+public:
+  MappingScoreInfo()
+      : bestScore(invalid_score_), secondBestScore(invalid_score_),
+        bestDecoyScore(invalid_score_), decoyThresh(1.0),
+        collect_decoy_info_(false) {}
 
-      MappingScoreInfo(double decoyThreshIn) : MappingScoreInfo() {
-        decoyThresh = decoyThreshIn;
-      }
+  MappingScoreInfo(double decoyThreshIn) : MappingScoreInfo() {
+    decoyThresh = decoyThreshIn;
+  }
 
-      void collect_decoys(bool do_collect) { collect_decoy_info_ = do_collect; }
-      bool collect_decoys() const { return collect_decoy_info_; }
-
-      // clear everything but the decoy threshold 
-      void clear(size_t num_hits) {
-        bestScore = invalid_score_;
-        secondBestScore = invalid_score_;
-        bestDecoyScore = invalid_score_;
-        scores_.clear(); scores_.resize(num_hits, invalid_score_);
-        bestScorePerTranscript_.clear();
-        perm_.clear();
-        // NOTE: we do _not_ reset decoyThresh here
-        if (collect_decoy_info_) {
-          bool revert_to_static = best_decoy_hits.size() > static_vec_size;
-          best_decoy_hits.clear();
-          if (revert_to_static) { best_decoy_hits.revert_to_static(); }
-        }
-      }
+  void collect_decoys(bool do_collect) { collect_decoy_info_ = do_collect; }
+  bool collect_decoys() const { return collect_decoy_info_; }
 
-      bool haveOnlyDecoyMappings() const {
-        // if the best non-decoy mapping has score less than decoyThresh *
-        // bestDecoyScore and if the bestDecoyScore is a valid value, then we
-        // have no valid non-decoy mappings.
-        return (bestScore < static_cast<int32_t>(decoyThresh * bestDecoyScore)) and
-               (bestDecoyScore > std::numeric_limits<decltype(bestDecoyScore)>::min());
+  // clear everything but the decoy threshold
+  void clear(size_t num_hits) {
+    bestScore = invalid_score_;
+    secondBestScore = invalid_score_;
+    bestDecoyScore = invalid_score_;
+    scores_.clear();
+    scores_.resize(num_hits, invalid_score_);
+    bestScorePerTranscript_.clear();
+    perm_.clear();
+    // NOTE: we do _not_ reset decoyThresh here
+    if (collect_decoy_info_) {
+      bool revert_to_static = best_decoy_hits.size() > static_vec_size;
+      best_decoy_hits.clear();
+      if (revert_to_static) {
+        best_decoy_hits.revert_to_static();
       }
-      
-      inline bool update_decoy_mappings(int32_t hitScore, size_t idx, uint32_t tid) {
-        const bool better_score = hitScore > bestDecoyScore;
-        if (hitScore > bestDecoyScore) {
-          bestDecoyScore = hitScore;
-          if (collect_decoy_info_) { 
-            best_decoy_hits.clear();
-            best_decoy_hits.push_back(std::make_pair(static_cast<int32_t>(idx), static_cast<int32_t>(tid)));
-          }
-        } else if (collect_decoy_info_ and (hitScore == bestDecoyScore)){
-          best_decoy_hits.push_back(std::make_pair(static_cast<int32_t>(idx), static_cast<int32_t>(tid)));
-        }
-        return better_score;
+    }
+  }
+
+  bool haveOnlyDecoyMappings() const {
+    // if the best non-decoy mapping has score less than decoyThresh *
+    // bestDecoyScore and if the bestDecoyScore is a valid value, then we
+    // have no valid non-decoy mappings.
+    return (bestScore < static_cast<int32_t>(decoyThresh * bestDecoyScore)) and
+           (bestDecoyScore >
+            std::numeric_limits<decltype(bestDecoyScore)>::min());
+  }
+
+  inline bool update_decoy_mappings(int32_t hitScore, size_t idx,
+                                    uint32_t tid) {
+    const bool better_score = hitScore > bestDecoyScore;
+    if (hitScore > bestDecoyScore) {
+      bestDecoyScore = hitScore;
+      if (collect_decoy_info_) {
+        best_decoy_hits.clear();
+        best_decoy_hits.push_back(std::make_pair(static_cast<int32_t>(idx),
+                                                 static_cast<int32_t>(tid)));
       }
+    } else if (collect_decoy_info_ and (hitScore == bestDecoyScore)) {
+      best_decoy_hits.push_back(
+          std::make_pair(static_cast<int32_t>(idx), static_cast<int32_t>(tid)));
+    }
+    return better_score;
+  }
 
-      int32_t bestScore;
-      int32_t secondBestScore;
-      int32_t bestDecoyScore;
-      double decoyThresh;
-      itlib::small_vector<std::pair<int32_t, int32_t>> best_decoy_hits;
-      bool collect_decoy_info_;
-      std::vector<int32_t> scores_;
-      phmap::flat_hash_map<uint32_t, std::pair<int32_t, int32_t>> bestScorePerTranscript_;
-      std::vector<std::pair<int32_t, int32_t>> perm_;
-    };
+  int32_t bestScore;
+  int32_t secondBestScore;
+  int32_t bestDecoyScore;
+  double decoyThresh;
+  itlib::small_vector<std::pair<int32_t, int32_t>> best_decoy_hits;
+  bool collect_decoy_info_;
+  std::vector<int32_t> scores_;
+  phmap::flat_hash_map<uint32_t, std::pair<int32_t, int32_t>>
+      bestScorePerTranscript_;
+  std::vector<std::pair<int32_t, int32_t>> perm_;
+};
 
 template <typename IndexT>
-inline bool initMapperSettings(SalmonOpts& salmonOpts, MemCollector<IndexT>& memCollector, ksw2pp::KSW2Aligner& aligner,
-                        pufferfish::util::AlignmentConfig& aconf, pufferfish::util::MappingConstraintPolicy& mpol) {
+inline bool
+initMapperSettings(SalmonOpts& salmonOpts, MemCollector<IndexT>& memCollector,
+                   ksw2pp::KSW2Aligner& aligner,
+                   pufferfish::util::AlignmentConfig& aconf,
+                   pufferfish::util::MappingConstraintPolicy& mpol) {
   memCollector.configureMemClusterer(salmonOpts.maxOccsPerHit);
-  double consensusFraction = (salmonOpts.consensusSlack == 0.0) ? 1.0 : (1.0 - salmonOpts.consensusSlack);
+  double consensusFraction = (salmonOpts.consensusSlack == 0.0)
+                                 ? 1.0
+                                 : (1.0 - salmonOpts.consensusSlack);
   memCollector.setConsensusFraction(consensusFraction);
   memCollector.setHitFilterPolicy(salmonOpts.hitFilterPolicy);
   memCollector.setAltSkip(salmonOpts.mismatchSeedSkip);
   memCollector.setChainSubOptThresh(salmonOpts.pre_merge_chain_sub_thresh);
 
-  //Initialize ksw aligner
+  // Initialize ksw aligner
   ksw2pp::KSW2Config config;
   config.dropoff = -1;
   config.gapo = salmonOpts.gapOpenPenalty;
@@ -166,14 +180,17 @@ inline bool initMapperSettings(SalmonOpts& salmonOpts, MemCollector<IndexT>& mem
   aconf.mimicBT2 = salmonOpts.mimicBT2;
   aconf.mimicBT2Strict = salmonOpts.mimicStrictBT2;
   aconf.allowOverhangSoftclip = salmonOpts.softclipOverhangs;
-  aconf.allowSoftclip  = salmonOpts.softclip; 
+  aconf.allowSoftclip = salmonOpts.softclip;
   aconf.useAlignmentCache = !salmonOpts.disableAlignmentCache;
   aconf.alignmentMode = pufferfish::util::PuffAlignmentMode::SCORE_ONLY;
 
-  // we actually care about the softclips in the cigar string 
-  // if we are writing output and softclipping (or softclipping of overhangs) is enabled
-  if ( (!salmonOpts.qmFileName.empty()) and (salmonOpts.softclip or salmonOpts.softclipOverhangs) ) {
-    aconf.alignmentMode = pufferfish::util::PuffAlignmentMode::APPROXIMATE_CIGAR;
+  // we actually care about the softclips in the cigar string
+  // if we are writing output and softclipping (or softclipping of overhangs) is
+  // enabled
+  if ((!salmonOpts.qmFileName.empty()) and
+      (salmonOpts.softclip or salmonOpts.softclipOverhangs)) {
+    aconf.alignmentMode =
+        pufferfish::util::PuffAlignmentMode::APPROXIMATE_CIGAR;
   }
 
   mpol.noOrphans = !salmonOpts.allowOrphans;
@@ -189,25 +206,26 @@ inline bool initMapperSettings(SalmonOpts& salmonOpts, MemCollector<IndexT>& mem
 
   mpol.setPostMergeChainSubThresh(salmonOpts.post_merge_chain_sub_thresh);
   mpol.setOrphanChainSubThresh(salmonOpts.orphan_chain_sub_thresh);
-  
+
   return true;
 }
 
-
-inline void updateRefMappings(uint32_t tid, int32_t hitScore, bool isCompat, size_t idx,
-                  const std::vector<Transcript>& transcripts,
-                  int32_t invalidScore,
-                  salmon::mapping_utils::MappingScoreInfo& msi) {
+inline void updateRefMappings(uint32_t tid, int32_t hitScore, bool isCompat,
+                              size_t idx,
+                              const std::vector<Transcript>& transcripts,
+                              int32_t invalidScore,
+                              salmon::mapping_utils::MappingScoreInfo& msi) {
   auto& scores = msi.scores_;
   scores[idx] = hitScore;
   auto& t = transcripts[tid];
   bool isDecoy = t.isDecoy();
-  double decoyCutoff = static_cast<int32_t>(msi.decoyThresh * msi.bestDecoyScore);
+  double decoyCutoff =
+      static_cast<int32_t>(msi.decoyThresh * msi.bestDecoyScore);
 
-  //if (hitScore < decoyCutoff or (hitScore == invalidScore)) { }
+  // if (hitScore < decoyCutoff or (hitScore == invalidScore)) { }
 
   if (isDecoy) {
-    // NOTE: decide here if we need to process any of this if the 
+    // NOTE: decide here if we need to process any of this if the
     // current score is < the best (non-decoy) score. I think not.
 
     // if this is a decoy and its score is better than the best decoy score
@@ -215,10 +233,10 @@ inline void updateRefMappings(uint32_t tid, int32_t hitScore, bool isCompat, siz
     (void)did_update;
     return;
   } else if (hitScore < decoyCutoff or (hitScore == invalidScore)) {
-    // if the current score is to a valid target but doesn't 
+    // if the current score is to a valid target but doesn't
     // exceed the necessary decoy threshold, then skip it.
     return;
-  } 
+  }
   // otherwise, we have a "high-scoring" hit to a non-decoy
 
   auto& perm = msi.perm_;
@@ -230,7 +248,8 @@ inline void updateRefMappings(uint32_t tid, int32_t hitScore, bool isCompat, siz
     // this is the current best
     bestScorePerTranscript[tid].first = hitScore;
     bestScorePerTranscript[tid].second = idx;
-  } else if ((hitScore > it->second.first) or (hitScore == it->second.first and isCompat)) {
+  } else if ((hitScore > it->second.first) or
+             (hitScore == it->second.first and isCompat)) {
     // otherwise, if we had an alignment for this transcript and it's
     // better than the current best, then set the best score to this
     // alignment's score, and invalidate the previous alignment
@@ -241,7 +260,7 @@ inline void updateRefMappings(uint32_t tid, int32_t hitScore, bool isCompat, siz
     // otherwise, there is already a better mapping for this transcript.
     scores[idx] = invalidScore;
   }
-  
+
   if (hitScore > msi.bestScore) {
     msi.secondBestScore = msi.bestScore;
     msi.bestScore = hitScore;
@@ -249,44 +268,50 @@ inline void updateRefMappings(uint32_t tid, int32_t hitScore, bool isCompat, siz
   perm.push_back(std::make_pair(idx, tid));
 }
 
-
 inline void filterAndCollectAlignments(
-                                       std::vector<pufferfish::util::JointMems>& jointHits,
-                                       uint32_t readLen,
-                                       uint32_t mateLen,
-                                       bool singleEnd,
-                                       bool tryAlign,
-                                       bool hardFilter,
-                                       double scoreExp,
-                                       double minAlnProb,
-                                       salmon::mapping_utils::MappingScoreInfo& msi,
-                                       std::vector<pufferfish::util::QuasiAlignment>& jointAlignments) {
+    std::vector<pufferfish::util::JointMems>& jointHits, uint32_t readLen,
+    uint32_t mateLen, bool singleEnd, bool tryAlign, bool hardFilter,
+    double scoreExp, double minAlnProb,
+    salmon::mapping_utils::MappingScoreInfo& msi,
+    std::vector<pufferfish::util::QuasiAlignment>& jointAlignments) {
 
   auto invalidScore = std::numeric_limits<decltype(msi.bestDecoyScore)>::min();
-  if (msi.bestDecoyScore == invalidScore) { msi.bestDecoyScore = invalidScore + 1 ;}
+  if (msi.bestDecoyScore == invalidScore) {
+    msi.bestDecoyScore = invalidScore + 1;
+  }
 
-  int32_t decoyThreshold = static_cast<decltype(msi.bestDecoyScore)>(msi.decoyThresh * msi.bestDecoyScore);
+  int32_t decoyThreshold = static_cast<decltype(msi.bestDecoyScore)>(
+      msi.decoyThresh * msi.bestDecoyScore);
 
-  //auto filterScore = (bestDecoyScore < secondBestScore) ? secondBestScore : bestDecoyScore;
+  // auto filterScore = (bestDecoyScore < secondBestScore) ? secondBestScore :
+  // bestDecoyScore;
   auto& scores = msi.scores_;
   auto& perm = msi.perm_;
 
   // throw away any pairs for which we should not produce valid alignments :
   // ======
-  // If we are doing soft-filtering (default), we remove those not exceeding the bestDecoyScore
-  // If we are doing hard-filtering, we remove those less than the bestScore
-  perm.erase(std::remove_if(perm.begin(), perm.end(),
-                            [&scores, hardFilter, &msi, decoyThreshold, invalidScore](const std::pair<int32_t, int32_t>& idxtid) -> bool {
-                              return !hardFilter ?  scores[idxtid.first] < decoyThreshold : scores[idxtid.first] < msi.bestScore;
-                              //return !hardFilter ?  scores[idxtid.first] < filterScore : scores[idxtid.first] < bestScore; 
-                            }), perm.end());
-
-  // Unlike RapMap, pufferfish doesn't guarantee the hits computed above are in order
-  // by transcript, so we find the permutation of indices that puts things in transcript
-  // order.
-  std::sort(perm.begin(), perm.end(), [](const std::pair<int32_t, int32_t>& p1,
-                                         const std::pair<int32_t, int32_t>& p2) {
-                                        return p1.second < p2.second;});
+  // If we are doing soft-filtering (default), we remove those not exceeding the
+  // bestDecoyScore If we are doing hard-filtering, we remove those less than
+  // the bestScore
+  perm.erase(std::remove_if(
+                 perm.begin(), perm.end(),
+                 [&scores, hardFilter, &msi, decoyThreshold, invalidScore](
+                     const std::pair<int32_t, int32_t>& idxtid) -> bool {
+                   return !hardFilter ? scores[idxtid.first] < decoyThreshold
+                                      : scores[idxtid.first] < msi.bestScore;
+                   // return !hardFilter ?  scores[idxtid.first] < filterScore :
+                   // scores[idxtid.first] < bestScore;
+                 }),
+             perm.end());
+
+  // Unlike RapMap, pufferfish doesn't guarantee the hits computed above are in
+  // order by transcript, so we find the permutation of indices that puts things
+  // in transcript order.
+  std::sort(perm.begin(), perm.end(),
+            [](const std::pair<int32_t, int32_t>& p1,
+               const std::pair<int32_t, int32_t>& p2) {
+              return p1.second < p2.second;
+            });
 
   // moving our alinged / score jointMEMs over to QuasiAlignment objects
   double bestScoreD = static_cast<double>(msi.bestScore);
@@ -298,27 +323,33 @@ inline void filterAndCollectAlignments(
     double currScore = scores[ctr];
     double v = bestScoreD - currScore;
     // why -1?
-    double estAlnProb = hardFilter ? -1.0 : std::exp(- scoreExp * v );
+    double estAlnProb = hardFilter ? -1.0 : std::exp(-scoreExp * v);
     // skip any alignment with aln prob < minAlnProb
-    if (!hardFilter and (estAlnProb < minAlnProb)) { continue; }
+    if (!hardFilter and (estAlnProb < minAlnProb)) {
+      continue;
+    }
 
     if (singleEnd or jointHit.isOrphan()) {
       readLen = jointHit.isLeftAvailable() ? readLen : mateLen;
-      jointAlignments.emplace_back(tid,           // reference id
-                                   jointHit.orphanClust()->getTrFirstHitPos(),     // reference pos
-                                   jointHit.orphanClust()->isFw,     // fwd direction
-                                   readLen, // read length
-                                   jointHit.orphanClust()->cigar, // cigar string
-                                   jointHit.fragmentLen,       // fragment length
-                                   false);
-      auto &qaln = jointAlignments.back();
+      jointAlignments.emplace_back(
+          tid,                                        // reference id
+          jointHit.orphanClust()->getTrFirstHitPos(), // reference pos
+          jointHit.orphanClust()->isFw,               // fwd direction
+          readLen,                                    // read length
+          jointHit.orphanClust()->cigar,              // cigar string
+          jointHit.fragmentLen,                       // fragment length
+          false);
+      auto& qaln = jointAlignments.back();
       // NOTE : score should not be filled in from a double
-      qaln.score = !tryAlign ? static_cast<int32_t >(jointHit.orphanClust()->coverage):jointHit.alignmentScore;
+      qaln.score = !tryAlign
+                       ? static_cast<int32_t>(jointHit.orphanClust()->coverage)
+                       : jointHit.alignmentScore;
       qaln.estAlnProb(estAlnProb);
       // NOTE : wth is numHits?
-      qaln.numHits = static_cast<uint32_t >(jointHits.size());//orphanClust()->coverage;
+      qaln.numHits =
+          static_cast<uint32_t>(jointHits.size()); // orphanClust()->coverage;
       qaln.mateStatus = jointHit.mateStatus;
-      if(singleEnd) {
+      if (singleEnd) {
         qaln.mateLen = readLen;
         qaln.mateCigar.clear();
         qaln.matePos = 0;
@@ -327,116 +358,732 @@ inline void filterAndCollectAlignments(
         qaln.mateStatus = MateStatus::SINGLE_END;
       }
     } else {
-      jointAlignments.emplace_back(tid,           // reference id
-                                   jointHit.leftClust->getTrFirstHitPos(),     // reference pos
-                                   jointHit.leftClust->isFw,     // fwd direction
-                                   readLen, // read length
-                                   jointHit.leftClust->cigar, // cigar string
-                                   jointHit.fragmentLen,       // fragment length
-                                   true);         // properly paired
+      jointAlignments.emplace_back(
+          tid,                                    // reference id
+          jointHit.leftClust->getTrFirstHitPos(), // reference pos
+          jointHit.leftClust->isFw,               // fwd direction
+          readLen,                                // read length
+          jointHit.leftClust->cigar,              // cigar string
+          jointHit.fragmentLen,                   // fragment length
+          true);                                  // properly paired
       // Fill in the mate info
-      auto &qaln = jointAlignments.back();
+      auto& qaln = jointAlignments.back();
       qaln.mateLen = mateLen;
       qaln.mateCigar = jointHit.rightClust->cigar;
-      qaln.matePos = static_cast<int32_t >(jointHit.rightClust->getTrFirstHitPos());
+      qaln.matePos =
+          static_cast<int32_t>(jointHit.rightClust->getTrFirstHitPos());
       qaln.mateIsFwd = jointHit.rightClust->isFw;
       qaln.mateStatus = MateStatus::PAIRED_END_PAIRED;
       // NOTE : wth is numHits?
-      qaln.numHits = static_cast<uint32_t >(jointHits.size());
+      qaln.numHits = static_cast<uint32_t>(jointHits.size());
       // NOTE : score should not be filled in from a double
-      qaln.score = !tryAlign ? static_cast<int32_t >(jointHit.leftClust->coverage):jointHit.alignmentScore;
+      qaln.score = !tryAlign
+                       ? static_cast<int32_t>(jointHit.leftClust->coverage)
+                       : jointHit.alignmentScore;
       qaln.estAlnProb(estAlnProb);
-      qaln.mateScore = !tryAlign ? static_cast<int32_t >(jointHit.rightClust->coverage):jointHit.mateAlignmentScore;
+      qaln.mateScore = !tryAlign
+                           ? static_cast<int32_t>(jointHit.rightClust->coverage)
+                           : jointHit.mateAlignmentScore;
     }
   }
   // done moving our alinged / score jointMEMs over to QuasiAlignment objects
 }
 
-
 inline void filterAndCollectAlignmentsDecoy(
-                                       std::vector<pufferfish::util::JointMems>& jointHits,
-                                       uint32_t readLen,
-                                       uint32_t mateLen,
-                                       bool singleEnd,
-                                       bool tryAlign,
-                                       bool hardFilter,
-                                       double scoreExp,
-                                       double minAlnProb,
-                                       salmon::mapping_utils::MappingScoreInfo& msi,
-                                       std::vector<pufferfish::util::QuasiAlignment>& jointAlignments) {
-// NOTE: this function should only be called in the case that we have valid decoy mappings to report.
-// Currently, this happens only when there are *no valid non-decoy* mappings.
-// Further, this function will only add equally *best* decoy mappings to the output jointAlignments object
-// regardless of the the status of hardFilter (i.e. no sub-optimal decoy mappings will be reported).
-(void) hardFilter;
-(void) minAlnProb;
-(void) scoreExp;
-double estAlnProb = 1.0; //std::exp(-scoreExp * 0.0);
-for (auto& idxTxp : msi.best_decoy_hits) {
-  int32_t ctr = idxTxp.first;
-  int32_t tid = idxTxp.second;
-  auto& jointHit = jointHits[ctr];
-
-  if (singleEnd or jointHit.isOrphan()) {
-    readLen = jointHit.isLeftAvailable() ? readLen : mateLen;
-    jointAlignments.emplace_back(
-        tid,                                        // reference id
-        jointHit.orphanClust()->getTrFirstHitPos(), // reference pos
-        jointHit.orphanClust()->isFw,               // fwd direction
-        readLen,                                    // read length
-        jointHit.orphanClust()->cigar,              // cigar string
-        jointHit.fragmentLen,                       // fragment length
-        false);
-    auto& qaln = jointAlignments.back();
-    // NOTE : score should not be filled in from a double
-    qaln.score = !tryAlign
-                     ? static_cast<int32_t>(jointHit.orphanClust()->coverage)
-                     : jointHit.alignmentScore;
-    qaln.estAlnProb(estAlnProb);
-    // NOTE : wth is numHits?
-    qaln.numHits =
-        static_cast<uint32_t>(jointHits.size()); // orphanClust()->coverage;
-    qaln.mateStatus = jointHit.mateStatus;
-    if (singleEnd) {
-      qaln.mateLen = readLen;
-      qaln.mateCigar.clear();
-      qaln.matePos = 0;
-      qaln.mateIsFwd = true;
-      qaln.mateScore = 0;
-      qaln.mateStatus = MateStatus::SINGLE_END;
+    std::vector<pufferfish::util::JointMems>& jointHits, uint32_t readLen,
+    uint32_t mateLen, bool singleEnd, bool tryAlign, bool hardFilter,
+    double scoreExp, double minAlnProb,
+    salmon::mapping_utils::MappingScoreInfo& msi,
+    std::vector<pufferfish::util::QuasiAlignment>& jointAlignments) {
+  // NOTE: this function should only be called in the case that we have valid
+  // decoy mappings to report. Currently, this happens only when there are *no
+  // valid non-decoy* mappings. Further, this function will only add equally
+  // *best* decoy mappings to the output jointAlignments object regardless of
+  // the the status of hardFilter (i.e. no sub-optimal decoy mappings will be
+  // reported).
+  (void)hardFilter;
+  (void)minAlnProb;
+  (void)scoreExp;
+  double estAlnProb = 1.0; // std::exp(-scoreExp * 0.0);
+  for (auto& idxTxp : msi.best_decoy_hits) {
+    int32_t ctr = idxTxp.first;
+    int32_t tid = idxTxp.second;
+    auto& jointHit = jointHits[ctr];
+
+    if (singleEnd or jointHit.isOrphan()) {
+      readLen = jointHit.isLeftAvailable() ? readLen : mateLen;
+      jointAlignments.emplace_back(
+          tid,                                        // reference id
+          jointHit.orphanClust()->getTrFirstHitPos(), // reference pos
+          jointHit.orphanClust()->isFw,               // fwd direction
+          readLen,                                    // read length
+          jointHit.orphanClust()->cigar,              // cigar string
+          jointHit.fragmentLen,                       // fragment length
+          false);
+      auto& qaln = jointAlignments.back();
+      // NOTE : score should not be filled in from a double
+      qaln.score = !tryAlign
+                       ? static_cast<int32_t>(jointHit.orphanClust()->coverage)
+                       : jointHit.alignmentScore;
+      qaln.estAlnProb(estAlnProb);
+      // NOTE : wth is numHits?
+      qaln.numHits =
+          static_cast<uint32_t>(jointHits.size()); // orphanClust()->coverage;
+      qaln.mateStatus = jointHit.mateStatus;
+      if (singleEnd) {
+        qaln.mateLen = readLen;
+        qaln.mateCigar.clear();
+        qaln.matePos = 0;
+        qaln.mateIsFwd = true;
+        qaln.mateScore = 0;
+        qaln.mateStatus = MateStatus::SINGLE_END;
+      }
+    } else {
+      jointAlignments.emplace_back(
+          tid,                                    // reference id
+          jointHit.leftClust->getTrFirstHitPos(), // reference pos
+          jointHit.leftClust->isFw,               // fwd direction
+          readLen,                                // read length
+          jointHit.leftClust->cigar,              // cigar string
+          jointHit.fragmentLen,                   // fragment length
+          true);                                  // properly paired
+      // Fill in the mate info
+      auto& qaln = jointAlignments.back();
+      qaln.mateLen = mateLen;
+      qaln.mateCigar = jointHit.rightClust->cigar;
+      qaln.matePos =
+          static_cast<int32_t>(jointHit.rightClust->getTrFirstHitPos());
+      qaln.mateIsFwd = jointHit.rightClust->isFw;
+      qaln.mateStatus = MateStatus::PAIRED_END_PAIRED;
+      // NOTE : wth is numHits?
+      qaln.numHits = static_cast<uint32_t>(jointHits.size());
+      // NOTE : score should not be filled in from a double
+      qaln.score = !tryAlign
+                       ? static_cast<int32_t>(jointHit.leftClust->coverage)
+                       : jointHit.alignmentScore;
+      qaln.estAlnProb(estAlnProb);
+      qaln.mateScore = !tryAlign
+                           ? static_cast<int32_t>(jointHit.rightClust->coverage)
+                           : jointHit.mateAlignmentScore;
     }
-  } else {
-    jointAlignments.emplace_back(
-        tid,                                    // reference id
-        jointHit.leftClust->getTrFirstHitPos(), // reference pos
-        jointHit.leftClust->isFw,               // fwd direction
-        readLen,                                // read length
-        jointHit.leftClust->cigar,              // cigar string
-        jointHit.fragmentLen,                   // fragment length
-        true);                                  // properly paired
-    // Fill in the mate info
-    auto& qaln = jointAlignments.back();
-    qaln.mateLen = mateLen;
-    qaln.mateCigar = jointHit.rightClust->cigar;
-    qaln.matePos =
-        static_cast<int32_t>(jointHit.rightClust->getTrFirstHitPos());
-    qaln.mateIsFwd = jointHit.rightClust->isFw;
-    qaln.mateStatus = MateStatus::PAIRED_END_PAIRED;
-    // NOTE : wth is numHits?
-    qaln.numHits = static_cast<uint32_t>(jointHits.size());
-    // NOTE : score should not be filled in from a double
-    qaln.score = !tryAlign ? static_cast<int32_t>(jointHit.leftClust->coverage)
-                           : jointHit.alignmentScore;
-    qaln.estAlnProb(estAlnProb);
-    qaln.mateScore = !tryAlign
-                         ? static_cast<int32_t>(jointHit.rightClust->coverage)
-                         : jointHit.mateAlignmentScore;
+  } // end for over best decoy hits
+}
+
+namespace pasc {
+
+constexpr int32_t invalid_frag_len = std::numeric_limits<int32_t>::min();
+constexpr int32_t invalid_mate_pos = std::numeric_limits<int32_t>::min();
+
+/* from pesc
+struct simple_hit {
+    bool is_fw{false};
+    bool mate_is_fw{false};
+    int32_t pos{-1};
+    float score{0.0};
+    uint32_t num_hits{0};
+    uint32_t tid{std::numeric_limits<uint32_t>::max()};
+    int32_t mate_pos{std::numeric_limits<int32_t>::max()};
+    int32_t fragment_length{std::numeric_limits<int32_t>::max()};
+    inline bool valid_pos(int32_t read_len, uint32_t txp_len, int32_t max_over)
+{ int32_t signed_txp_len = static_cast<int32_t>(txp_len); return (pos >
+-max_over) and ((pos + read_len) < (signed_txp_len + max_over));
+    }
+    inline bool has_mate() const { return mate_pos != invalid_mate_pos; }
+    inline bool mate_is_mapped() const { return mate_pos != invalid_mate_pos; }
+    inline int32_t frag_len() const {
+        return (fragment_length != invalid_frag_len) ? fragment_length : 0;
+    }
+};
+*/
+struct simple_hit {
+  bool is_fw{false};
+  int32_t pos{-1};
+  float score{0.0};
+  uint32_t num_hits{0};
+  uint32_t tid{std::numeric_limits<uint32_t>::max()};
+  // int32_t mate_pos{std::numeric_limits<int32_t>::max()};
+  // int32_t fragment_length{std::numeric_limits<int32_t>::max()};
+
+  // UNPAIRED_LEFT, UNPAIRED_RIGHT, PAIRED_FR, PAIRED_RF
+  PairingStatus pairing_status{PairingStatus::UNPAIRED_RIGHT};
+
+  bool valid_pos(int32_t read_len, uint32_t txp_len, int32_t max_over) {
+    int32_t signed_txp_len = static_cast<int32_t>(txp_len);
+    return (pos > -max_over) and
+           ((pos + read_len) < (signed_txp_len + max_over));
+  }
+
+  bool operator<(const simple_hit& hit2) {
+    if (tid != hit2.tid) { // hit1 and hit2 are on different transcripts
+      return tid < hit2.tid;
+    } else { // hit1 and hit2 are on the same transcript
+      // NOTE: Is this what we actually want?
+      if (is_fw) { // fw < rc
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+};
+
+enum class HitDirection : uint8_t { FW, RC, BOTH };
+
+/*
+struct sketch_hit_info {
+  // add a hit to the current target that occurs in the forward
+  // orientation with respect to the target.
+  bool add_fw(int32_t ref_pos, int32_t read_pos, int32_t rl, int32_t k,
+              int32_t max_stretch, float score_inc) {
+    (void)rl;
+    (void)k;
+    bool added{false};
+
+    // since hits are collected by moving _forward_ in the
+    // read, if this is a fw hit, it should be moving
+    // forward in the reference. Only add it if this is
+    // the case.  This ensure that we don't
+    // double-count a k-mer that might occur twice on
+    // this target.
+    if (ref_pos > last_ref_pos_fw and read_pos > last_read_pos_fw) {
+      if (last_read_pos_fw == -1) {
+        approx_pos_fw = ref_pos - read_pos;
+      } else {
+        if ((ref_pos - approx_pos_fw) > max_stretch) {
+          return false;
+        }
+      }
+      // if (last_ref_pos_fw > -1 and (ref_pos > last_ref_pos_fw + 15)) { return
+      // false; }
+      last_ref_pos_fw = ref_pos;
+      last_read_pos_fw = read_pos;
+      fw_score += score_inc;
+      ++fw_hits;
+      added = true;
+    }
+    return added;
+  }
+
+  // add a hit to the current target that occurs in the forward
+  // orientation with respect to the target.
+  bool add_rc(int32_t ref_pos, int32_t read_pos, int32_t rl, int32_t k,
+              int32_t max_stretch, float score_inc) {
+
+    bool added{false};
+    // since hits are collected by moving _forward_ in the
+    // read, if this is an rc hit, it should be moving
+    // backwards in the reference. Only add it if this is
+    // the case.
+    // This ensures that we don't double-count a k-mer that
+    // might occur twice on this target.
+    if (ref_pos < last_ref_pos_rc and read_pos > last_read_pos_rc) {
+      approx_pos_rc = (ref_pos - (rl - (read_pos + k)));
+      if (last_read_pos_rc == -1) {
+        approx_end_pos_rc = ref_pos + read_pos;
+      } else {
+        if (approx_end_pos_rc - approx_pos_rc > max_stretch) {
+          return false;
+        }
+      }
+      // if (last_ref_pos_rc > -1 and ref_pos < last_ref_pos_rc - 15) { return
+      // false; }
+      last_ref_pos_rc = ref_pos;
+      last_read_pos_rc = read_pos;
+      rc_score += score_inc;
+      ++rc_hits;
+      added = true;
+    }
+    return added;
   }
-} // end for over best decoy hits
+
+  inline uint32_t max_hits_for_target() { return std::max(fw_hits, rc_hits); }
+
+  // true if forward, false if rc
+  // second element is score
+  inline HitDirection best_hit_direction() {
+    int32_t fw_minus_rc =
+        static_cast<int32_t>(fw_hits) - static_cast<int32_t>(rc_hits);
+    return (fw_minus_rc > 0)
+               ? HitDirection::FW
+               : ((fw_minus_rc < 0) ? HitDirection::RC : HitDirection::BOTH);
+  }
+
+  inline simple_hit get_fw_hit(PairingStatus ps) {
+    return simple_hit{true,
+                      approx_pos_fw,
+                      fw_score,
+                      fw_hits,
+                      std::numeric_limits<uint32_t>::max(),
+                      ps};
+  }
+
+  inline simple_hit get_rc_hit(PairingStatus ps) {
+    return simple_hit{false,
+                      approx_pos_rc,
+                      rc_score,
+                      rc_hits,
+                      std::numeric_limits<uint32_t>::max(),
+                      ps};
+  }
+
+  inline std::string to_string() {
+    std::stringstream ss;
+    ss << "fw_hits: " << fw_hits << ", fw_score : " << fw_score
+       << ", fw_pos : " << approx_pos_fw << " || rc_hits: " << rc_hits
+       << ", rc_score: " << rc_score << ", rc_pos: " << approx_pos_rc;
+    return ss.str();
+  }
+
+  int32_t last_read_pos_fw{-1};
+  int32_t last_read_pos_rc{-1};
+
+  int32_t last_ref_pos_fw{-1};
+  int32_t last_ref_pos_rc{std::numeric_limits<int32_t>::max()};
+
+  int32_t approx_pos_fw{-1};
+  int32_t approx_pos_rc{-1};
+  int32_t approx_end_pos_rc{-1};
+
+  uint32_t fw_hits{0};
+  uint32_t rc_hits{0};
+  float fw_score{0.0};
+  float rc_score{0.0};
+};*/
+
+struct sketch_hit_info {
+    // add a hit to the current target that occurs in the forward
+    // orientation with respect to the target.
+    bool add_fw(int32_t ref_pos, int32_t read_pos, int32_t rl, int32_t k, int32_t max_stretch,
+                float score_inc) {
+        (void)rl;
+        (void)k;
+        bool added{false};
+
+        // since hits are collected by moving _forward_ in the
+        // read, if this is a fw hit, it should be moving
+        // forward in the reference. Only add it if this is
+        // the case.  This ensure that we don't
+        // double-count a k-mer that might occur twice on
+        // this target.
+        if (ref_pos > last_ref_pos_fw and read_pos > last_read_pos_fw) {
+            if (last_read_pos_fw == -1) {
+                approx_pos_fw = ref_pos - read_pos;
+            } else {
+                if ((ref_pos - approx_pos_fw) > max_stretch) { return false; }
+            }
+            last_ref_pos_fw = ref_pos;
+            last_read_pos_fw = read_pos;
+            fw_score += score_inc;
+            ++fw_hits;
+            added = true;
+        }
+        return added;
+    }
+
+    // add a hit to the current target that occurs in the forward
+    // orientation with respect to the target.
+    bool add_rc(int32_t ref_pos, int32_t read_pos, int32_t rl, int32_t k, int32_t max_stretch,
+                float score_inc) {
+        bool added{false};
+        // since hits are collected by moving _forward_ in the
+        // read, if this is an rc hit, it should be moving
+        // backwards in the reference. 
+        // In general, we only add the hit if this is the case.
+        // This ensures that we don't double-count a k-mer that
+        // might occur twice on this target.
+
+        // we have a special case here; what if the same exact 
+        // k-mer (i.e. not just the same sequence but same position
+        // on the query) occurs more than one time on this refernece?
+        //
+        // In that case, the GENERAL case code will already have 
+        // processed and seen a k-mer with the read position 
+        // equal to `read_pos`.  In the case below, we see 
+        // a hit with the *same* read pos again (one or more times).
+        // 
+        // Here, we swap out the previous hit having this read_pos 
+        // if the position of the current hit on the read is
+        // the same and the position on the reference is greater 
+        // (this is a heuristic to help in the case of tandem repeats or 
+        // highly-repetitive subsequence).
+        // NOTE: consider if a similar heuristic should be
+        // adopted for the forward case.
+        if ((read_pos == last_read_pos_rc) and (ref_pos > last_ref_pos_rc) and
+            (ref_pos < rightmost_bound_rc)) {
+
+            last_ref_pos_rc = ref_pos;
+            // if the read_pos was the same as the first read pos
+            // then also update the approx_end_pos_rc accordingly
+            // NOTE: for the time being don't mess with this position
+            // empirically this does better, but if we really want 
+            // to optimize this for accuracy we need a better general
+            // heuristic.
+            // if (read_pos == first_read_pos_rc) {
+            //   approx_end_pos_rc = ref_pos + read_pos;
+            //}
+
+            return added;
+        }
+
+        // GENERAL case
+        if (ref_pos < last_ref_pos_rc and read_pos > last_read_pos_rc) {
+            approx_pos_rc = (ref_pos - (rl - (read_pos + k)));
+            if (last_read_pos_rc == -1) {
+                approx_end_pos_rc = ref_pos + read_pos;
+                first_read_pos_rc = read_pos;
+            } else {
+                if (approx_end_pos_rc - approx_pos_rc > max_stretch) { return false; }
+            }
+            rc_score += score_inc;
+            ++rc_hits;
+
+            // new
+            rightmost_bound_rc = last_ref_pos_rc;
+
+            last_ref_pos_rc = ref_pos;
+            last_read_pos_rc = read_pos;
+            added = true;
+        }
+        return added;
+    }
+
+    inline uint32_t max_hits_for_target() { return std::max(fw_hits, rc_hits); }
+
+    // true if forward, false if rc
+    // second element is score
+    inline HitDirection best_hit_direction() {
+        int32_t fw_minus_rc = static_cast<int32_t>(fw_hits) - static_cast<int32_t>(rc_hits);
+        return (fw_minus_rc > 0) ? HitDirection::FW
+                                 : ((fw_minus_rc < 0) ? HitDirection::RC : HitDirection::BOTH);
+    }
+
+    inline simple_hit get_fw_hit(PairingStatus ps) {
+        return simple_hit{true, approx_pos_fw,
+                          fw_score, fw_hits, std::numeric_limits<uint32_t>::max(), ps};
+    }
+
+    inline simple_hit get_rc_hit(PairingStatus ps) {
+        return simple_hit{false, approx_pos_rc,
+                          rc_score, rc_hits, std::numeric_limits<uint32_t>::max(), ps};
+    }
+
+    inline std::string to_string() {
+        std::stringstream ss;
+        ss << "fw_hits: " << fw_hits << ", fw_score : " << fw_score
+           << ", fw_pos : " << approx_pos_fw << " || rc_hits: " << rc_hits
+           << ", rc_score: " << rc_score << ", rc_pos: " << approx_pos_rc;
+        return ss.str();
+    }
+
+    int32_t last_read_pos_fw{-1};
+    int32_t last_read_pos_rc{-1};
+    int32_t rightmost_bound_rc{std::numeric_limits<int32_t>::max()};
+
+    // marks the read position (key) of the 
+    // first hit we see in the rc direction
+    int32_t first_read_pos_rc{-1};
+
+    int32_t last_ref_pos_fw{-1};
+    int32_t last_ref_pos_rc{std::numeric_limits<int32_t>::max()};
+
+    int32_t approx_pos_fw{-1};
+    int32_t approx_pos_rc{-1};
+    int32_t approx_end_pos_rc{-1};
+
+    uint32_t fw_hits{0};
+    uint32_t rc_hits{0};
+    float fw_score{0.0};
+    float rc_score{0.0};
+};
+
+/**
+ * This contains the information necessary to collect hits and perform
+ * mapping on a read using PASC.
+ **/
+template <typename IndexT> struct mapping_cache_info {
+public:
+  mapping_cache_info(IndexT* midx, size_t max_occ_default_in,
+                     size_t max_occ_recover_in, size_t max_read_occ_in)
+      : idx(midx), k(midx->k()), hit_searcher(midx),
+        max_occ_default(max_occ_default_in),
+        max_occ_recover(max_occ_recover_in),
+        attempt_occ_recover(max_occ_recover > max_occ_default),
+        max_read_occ(max_read_occ_in), alt_max_occ(max_read_occ_in) {}
+
+  inline void clear() {
+    map_type = salmon::utils::MappingType::UNMAPPED;
+    hit_searcher.clear();
+    hit_map.clear();
+    accepted_hits.clear();
+    alt_max_occ = max_read_occ;
+    has_matching_kmers = false;
+  }
+
+  // will store how the read mapped
+  salmon::utils::MappingType map_type{salmon::utils::MappingType::UNMAPPED};
+
+  // map from reference id to hit info
+  phmap::flat_hash_map<uint32_t, sketch_hit_info> hit_map;
+
+  // where the mappings that pass will be stored
+  std::vector<simple_hit> accepted_hits;
+
+  // map to recall the number of unmapped reads we see
+  // for each barcode
+  phmap::flat_hash_map<uint64_t, uint32_t> unmapped_bc_map;
+
+  // pointer to the underlying index
+  IndexT* idx{nullptr};
+
+  // the k-mer size of our index
+  size_t k{0};
+
+  // implements the PASC algorithm
+  MemCollector<IndexT> hit_searcher;
+
+  // the number of occurrences of a hit above which we
+  // won't consider it
+  size_t max_occ_default;
+
+  // the number of occurences of a hit above which we
+  // won't consider it, even in recovery mode
+  size_t max_occ_recover;
+
+  // will we attempt recovery if there are no hits with
+  // frequency below max_occ_default?
+  bool attempt_occ_recover;
+
+  // maximum number of places a read can occur and still
+  // be considered.
+  size_t max_read_occ;
+
+  size_t alt_max_occ;
+
+  // to perform queries
+  pufferfish::util::QueryCache qc;
+
+  // regardless of having full mappings, did any k-mers match
+  bool has_matching_kmers{false};
+};
+
+/**
+ * This function will map the read given by `read_seq`
+ * using PASC. The relevant parameters (e.g. maximum ambiguity
+ * of seed and maximum number of allowable mappings) are stored 
+ * in the `map_cache` structure.  The `PairingStatus` is passed in 
+ * by the caller and designates if the read being mapped is the 
+ * left end or the right end.
+ **/
+template <typename IndexT>
+inline bool map_read(std::string* read_seq,
+                     mapping_cache_info<IndexT>& map_cache, PairingStatus ps,
+                     bool verbose = false) {
+  map_cache.clear();
+  // rebind map_cache variables to
+  // local names
+  IndexT* qidx = map_cache.idx;
+  auto& qc = map_cache.qc;
+  auto& hit_searcher = map_cache.hit_searcher;
+  auto& hit_map = map_cache.hit_map;
+  auto& accepted_hits = map_cache.accepted_hits;
+  auto& map_type = map_cache.map_type;
+  const bool attempt_occ_recover = map_cache.attempt_occ_recover;
+  auto k = map_cache.k;
+  int32_t signed_k = static_cast<int32_t>(k);
+
+  // collect the set of matching seeds
+  map_cache.has_matching_kmers =
+      hit_searcher.get_raw_hits_sketch(*read_seq, qc, true, false);
+  bool early_stop = false;
+
+  // if there were hits
+  if (map_cache.has_matching_kmers) {
+    uint32_t num_valid_hits{0};
+    uint64_t total_occs{0};
+    uint64_t largest_occ{0};
+    auto& raw_hits = hit_searcher.get_left_hits();
+
+    // SANITY
+    decltype(raw_hits[0].first) prev_read_pos = -1;
+    // the maximum span the supporting k-mers of a
+    // mapping position are allowed to have.
+    // NOTE this is still > read_length b/c the stretch is measured wrt the
+    // START of the terminal k-mer.
+    int32_t max_stretch = static_cast<int32_t>(read_seq->length() * 1.0);
+
+    // a raw hit is a pair of read_pos and a projected hit
+
+    // the least frequent hit for this fragment.
+    uint64_t min_occ = std::numeric_limits<uint64_t>::max();
+
+    // this is false by default and will be set to true
+    // if *every* collected hit for this fragment occurs
+    // max_occ_default times or more.
+    bool had_alt_max_occ = false;
+    int32_t signed_rl = static_cast<int32_t>(read_seq->length());
+    auto collect_mappings_from_hits =
+        [&max_stretch, &min_occ, &hit_map, &num_valid_hits, &total_occs,
+         &largest_occ, &early_stop, signed_rl, k, signed_k, qidx,
+         verbose](auto& raw_hits, auto& prev_read_pos, auto& max_allowed_occ,
+                  auto& had_alt_max_occ) -> bool {
+      for (auto& raw_hit : raw_hits) {
+        auto& read_pos = raw_hit.first;
+        auto& proj_hits = raw_hit.second;
+        auto& refs = proj_hits.refRange;
+
+        // Keep track of the *least* ambiguous hit we see.
+        // If all hits occur more than `max_allowed_occ` times,
+        // but the least ambiguous hit (which occurs `min_occ`
+        // times) has < `max_occ_recover` occurrences, then 
+        // we will use all hits occurring `min_occ` number 
+        // of times or less to try top recover the read's mappings.
+        uint64_t num_occ = static_cast<uint64_t>(refs.size());
+        min_occ = std::min(min_occ, num_occ);
+        had_alt_max_occ = true;
+
+        // we visit every hit that occurs the allowable number of time
+        bool still_have_valid_target = false;
+        prev_read_pos = read_pos;
+        if (num_occ <= max_allowed_occ) {
+          total_occs += num_occ;
+          largest_occ = (num_occ > largest_occ) ? num_occ : largest_occ;
+          float score_inc = 1.0;
+
+          // vist each mapped reference position (mrp) where this hit 
+          // occurs.
+          for (auto& pos_it : refs) {
+            const auto& ref_pos_ori = proj_hits.decodeHit(pos_it);
+            uint32_t tid =
+                static_cast<uint32_t>(qidx->getRefId(pos_it.transcript_id()));
+            int32_t pos = static_cast<int32_t>(ref_pos_ori.pos);
+            bool ori = ref_pos_ori.isFW;
+            auto& target = hit_map[tid];
+
+            // Why >= here instead of == ?
+            // Because hits can happen on the same target in both the forward
+            // and rc orientations, it is possible that we start the loop with
+            // the target having num_valid_hits hits in a given orientation (o)
+            // we see a new hit for this target in oriention o (now it has
+            // num_valid_hits + 1) then we see a hit for this target in
+            // orientation rc(o).  We still want to add / consider this hit, but
+            // max_hits_for_target() > num_valid_hits. So, we must allow for
+            // that here.
+            if (target.max_hits_for_target() >= num_valid_hits) {
+              if (ori) {
+                target.add_fw(pos, static_cast<int32_t>(read_pos), signed_rl,
+                              signed_k, max_stretch, score_inc);
+              } else {
+                target.add_rc(pos, static_cast<int32_t>(read_pos), signed_rl,
+                              signed_k, max_stretch, score_inc);
+              }
+
+              // if this target is still valid after evaluating this hit 
+              // then we will have at least one globally valid target. If 
+              // no targets are valid after evaluating all mrps for this 
+              // hit then we can exit the search early.
+              still_have_valid_target |=
+                  (target.max_hits_for_target() >= num_valid_hits + 1);
+            }
+
+          } // DONE: for (auto &pos_it : refs)
+
+          ++num_valid_hits;
+
+          // if there are no targets reaching the valid hit threshold, then
+          // break early
+          if (!still_have_valid_target) {
+            return true;
+          }
+
+        } // DONE : if (num_occ <= max_allowed_occ)
+      }   // DONE : for (auto& raw_hit : raw_hits)
+
+      return false;
+    };
+
+    bool _discard = false;
+    auto mao_first_pass = map_cache.max_occ_default - 1;
+    early_stop = collect_mappings_from_hits(raw_hits, prev_read_pos,
+                                            mao_first_pass, _discard);
+
+    // If our default threshold was too stringent, then fallback to a more
+    // liberal threshold and look up the k-mers that occur the least frequently.
+    // Specifically, if the min occuring hits have frequency < max_occ_recover
+    // (2500 by default) times, then collect the min occuring hits to get the
+    // mapping.
+    if (attempt_occ_recover and (min_occ >= map_cache.max_occ_default) and
+        (min_occ < map_cache.max_occ_recover)) {
+      prev_read_pos = -1;
+      uint64_t max_allowed_occ = min_occ;
+      early_stop = collect_mappings_from_hits(raw_hits, prev_read_pos,
+                                              max_allowed_occ, had_alt_max_occ);
+    }
+
+    uint32_t best_alt_hits = 0;
+
+    for (auto& kv : hit_map) {
+      auto best_hit_dir = kv.second.best_hit_direction();
+      // if the best direction is FW or BOTH, add the fw hit
+      // otherwise add the RC.
+      auto mapping = (best_hit_dir != HitDirection::RC)
+                         ? kv.second.get_fw_hit(ps)
+                         : kv.second.get_rc_hit(ps);
+
+      if (mapping.num_hits >= num_valid_hits) {
+        mapping.tid = kv.first;
+        accepted_hits.emplace_back(mapping);
+        // if we had equally good hits in both directions
+        // add the rc hit here (since we added the fw)
+        // above if the best hit was either FW or BOTH
+        if (best_hit_dir == HitDirection::BOTH) {
+          auto second_hit = kv.second.get_rc_hit(ps);
+          second_hit.tid = kv.first;
+          accepted_hits.emplace_back(second_hit);
+        }
+      } else {
+        // best_alt_score = mapping.score > best_alt_score ? mapping.score :
+        // best_alt_score;
+        best_alt_hits =
+            mapping.num_hits > best_alt_hits ? mapping.num_hits : best_alt_hits;
+      }
+    }
+
+    map_cache.alt_max_occ =
+        had_alt_max_occ ? accepted_hits.size() : map_cache.max_occ_default;
+
+    /*
+     * This rule; if enabled, allows through mappings missing a single hit, if
+    there
+     * was no mapping with all hits. NOTE: this won't work with the current
+    early-exit
+     * optimization however.
+     if (accepted_hits.empty() and (num_valid_hits > 1) and (best_alt_hits >=
+     num_valid_hits
+     - 1)) { for (auto& kv : hit_map) { auto mapping = kv.second.get_best_hit();
+    if (mapping.num_hits >= best_alt_hits) {
+    //if (mapping.valid_pos(signed_read_len, transcripts[kv.first].RefLength,
+    10)) { mapping.tid = kv.first; accepted_hits.emplace_back(mapping);
+    //}
+    }
+    }
+    }
+    */
+  } // DONE : if (rh)
+
+  // If the read mapped to > maxReadOccs places, discard it
+  if (accepted_hits.size() > map_cache.alt_max_occ) {
+    accepted_hits.clear();
+    map_type = salmon::utils::MappingType::UNMAPPED;
+  } else if (!accepted_hits.empty()) {
+    map_type = salmon::utils::MappingType::SINGLE_MAPPED;
+  }
+
+  return early_stop;
 }
 
+} // namespace pasc
 
-  } // namespace mapping_utils
+} // namespace mapping_utils
 } // namespace salmon
 
 #endif //__SALMON_MAPPING_UTILS__


=====================================
include/Transcript.hpp
=====================================
@@ -14,8 +14,7 @@
 #include <limits>
 #include <memory>
 
-//#include "rapmap/bit_array.h"
-#include "pufferfish/compact_vector/compact_vector.hpp"
+#include "compact_vector/compact_vector.hpp"
 #include "pufferfish/rank9b.hpp"
 
 class Transcript {


=====================================
scripts/fetchPufferfish.sh
=====================================
@@ -23,11 +23,11 @@ if [ -d ${INSTALL_DIR}/src/pufferfish ] ; then
     rm -fr ${INSTALL_DIR}/src/pufferfish
 fi
 
-SVER=salmon-v1.9.0
+SVER=salmon-v1.10.1
 #SVER=develop
 #SVER=sketch-mode
 
-EXPECTED_SHA256=2a862daeff95a19c9b188bc26a5d02fc0ecc5b9c9ae5523a67c9d14e62551c5d
+EXPECTED_SHA256=cf2a007f3817c1087abd4170db70e6b3c04aa24babecf92a2d9d2eb7784b6021
 
 mkdir -p ${EXTERNAL_DIR}
 curl -k -L https://github.com/COMBINE-lab/pufferfish/archive/${SVER}.zip -o ${EXTERNAL_DIR}/pufferfish.zip
@@ -92,7 +92,8 @@ cp ${EXTERNAL_DIR}/pufferfish/include/BinWriter.hpp ${INSTALL_DIR}/include/puffe
 
 cp -r ${EXTERNAL_DIR}/pufferfish/include/libdivide ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/ksw2pp ${INSTALL_DIR}/include/pufferfish
-cp -r ${EXTERNAL_DIR}/pufferfish/include/compact_vector ${INSTALL_DIR}/include/pufferfish
+# this is now automatically tracked and inherited via twopaco (on which libpuffer depends)
+# cp -r ${EXTERNAL_DIR}/pufferfish/include/compact_vector ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/metro ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/itlib ${INSTALL_DIR}/include/pufferfish
 cp -r ${EXTERNAL_DIR}/pufferfish/include/sparsepp ${INSTALL_DIR}/include/pufferfish


=====================================
scripts/v1_10x/run.sh
=====================================
@@ -13,8 +13,8 @@ p2="$tmpdir/p2.fa"
 mkfifo $p1
 mkfifo $p2
 
-i1=`ls $base*I1*`
-wrapper <(cat $base*I1*) <(cat $base*RA*) >> $p1 2>> $p2 &
+i1=`ls $base/*I1*`
+wrapper <(cat $base/*I1*) <(cat $base/*RA*) >> $p1 2>> $p2 &
 
 echo "Running command [${new_cmd} -1 $p1 -2 $p2 -r $i1]"
 ${new_cmd} -1 $p1 -2 $p2 -r $i1


=====================================
src/Alevin.cpp
=====================================
@@ -164,10 +164,10 @@ std::vector<size_t> sort_indexes(const std::vector<T> &v) {
 
 // reference from https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
 // and https://github.com/CGATOxford/UMI-tools/blob/master/umi_tools/umi_methods.py#L193
-bool gaussianKDE(const std::vector<uint32_t>& freqCounter,
+bool gaussianKDE(const std::vector<uint64_t>& freqCounter,
                  const std::vector<size_t>& sortedIdx,
                  double& invCovariance, double& normFactor,
-                 uint32_t expect_cells, uint32_t& predicted_cells){
+                 uint64_t expect_cells, uint64_t& predicted_cells){
   double covariance{0.0}, mean{0.0}, bw_method {0.01}, threshold = 0.001*freqCounter[sortedIdx[0]];
   std::vector<double> logDataset;
   size_t numElem, xSpace {10000};
@@ -218,7 +218,7 @@ bool gaussianKDE(const std::vector<uint32_t>& freqCounter,
   }
 
   //calculating the argrelextrema
-  std::vector<uint32_t> local_mins;
+  std::vector<uint64_t> local_mins;
   for (size_t i=1; i<xSpace-1; i++){
     if (density[i-1] > density[i] and density[i] < density[i+1]){
       local_mins.emplace_back(i);
@@ -247,26 +247,26 @@ bool gaussianKDE(const std::vector<uint32_t>& freqCounter,
   return false;
 }
 
-uint32_t getLeftBoundary(std::vector<size_t>& sortedIdx,
-                         uint32_t topxBarcodes,
-                         const std::vector<uint32_t>& freqCounter){
+uint64_t getLeftBoundary(std::vector<size_t>& sortedIdx,
+                         uint64_t topxBarcodes,
+                         const std::vector<uint64_t>& freqCounter){
   // iterate in reverse order since sortedIdx is sorted in decreasing
   // order
   double cumCount{0.0};
   std::vector<double> freqs(topxBarcodes);
-  for(uint32_t i = 0; i < topxBarcodes; i++){
+  for(uint64_t i = 0; i < topxBarcodes; i++){
     size_t ind = sortedIdx[topxBarcodes-i-1];
     cumCount += freqCounter[ind];
     freqs[i] = std::log(cumCount);
   }
 
-  std::vector<uint32_t> X(topxBarcodes);
+  std::vector<uint64_t> X(topxBarcodes);
   std::iota(X.begin(), X.end(), 0);
 
   bool isUp;
-  uint32_t x;
+  uint64_t x;
   double y, slope, leftExtreme{freqs[0]};
-  for(uint32_t j=0; j<topxBarcodes; j++){
+  for(uint64_t j=0; j<topxBarcodes; j++){
     x = X[j];
     y = freqs[j];
 
@@ -279,7 +279,7 @@ uint32_t getLeftBoundary(std::vector<size_t>& sortedIdx,
     slope = y/x;
     // fill in the values for fitted line
     std::transform(X.begin()+nextBcIdx, X.end(), Y.begin(),
-                   [slope](uint32_t i) {return i*slope;} );
+                   [slope](uint64_t i) {return i*slope;} );
 
     isUp = false;
     double curveY, lineY;
@@ -294,9 +294,9 @@ uint32_t getLeftBoundary(std::vector<size_t>& sortedIdx,
 
     if(isUp == false){
       // ignoring all the frequencies having same frequency as cutoff
-      uint32_t cutoff = topxBarcodes-j;
-      uint32_t cutoffFrequency = freqCounter[sortedIdx[cutoff]];
-      uint32_t nearestLeftFrequency = cutoffFrequency;
+      uint64_t cutoff = topxBarcodes-j;
+      uint64_t cutoffFrequency = freqCounter[sortedIdx[cutoff]];
+      uint64_t nearestLeftFrequency = cutoffFrequency;
       while(nearestLeftFrequency == cutoffFrequency){
         nearestLeftFrequency = freqCounter[sortedIdx[--cutoff]];
       }
@@ -310,13 +310,13 @@ uint32_t getLeftBoundary(std::vector<size_t>& sortedIdx,
 
 void dumpFeatures(std::string& frequencyFileName,
                   std::vector<size_t>& sortedIdx,
-                  const std::vector<uint32_t>& freqCounter,
+                  const std::vector<uint64_t>& freqCounter,
                   std::unordered_map<uint32_t, nonstd::basic_string_view<char>>& colMap
                   ){
   std::ofstream freqFile;
   freqFile.open(frequencyFileName);
   for (auto i:sortedIdx){
-    uint32_t count = freqCounter[i];
+    uint64_t count = freqCounter[i];
     if (count == 0){
       break;
     }
@@ -330,7 +330,7 @@ void dumpFeatures(std::string& frequencyFileName,
   Knee calculation and sample true set of barcodes
  */
 template <typename ProtocolT>
-void sampleTrueBarcodes(const std::vector<uint32_t>& freqCounter,
+void sampleTrueBarcodes(const std::vector<uint64_t>& freqCounter,
                         TrueBcsT& trueBarcodes, size_t& lowRegionNumBarcodes,
                         std::unordered_map<uint32_t, nonstd::basic_string_view<char>> colMap,
                         AlevinOpts<ProtocolT>& aopt){
@@ -338,7 +338,7 @@ void sampleTrueBarcodes(const std::vector<uint32_t>& freqCounter,
   size_t maxNumBarcodes { aopt.maxNumBarcodes }, lowRegionMaxNumBarcodes { 1000 };
   size_t lowRegionMinNumBarcodes { aopt.lowRegionMinNumBarcodes };
   double lowConfidenceFraction { 0.5 };
-  uint32_t topxBarcodes = std::min(maxNumBarcodes, freqCounter.size());
+  uint64_t topxBarcodes = std::min(maxNumBarcodes, freqCounter.size());
   uint64_t history { 0 };
 
   if (aopt.forceCells > 0) {
@@ -358,13 +358,13 @@ void sampleTrueBarcodes(const std::vector<uint32_t>& freqCounter,
     // Expect Cells algorithm is taken from
     // https://github.com/10XGenomics/cellranger/blob/e5396c6c444acec6af84caa7d3655dd33a162852/lib/python/cellranger/stats.py#L138
 
-    constexpr uint32_t MAX_FILTERED_CELLS = 2;
+    constexpr uint64_t MAX_FILTERED_CELLS = 2;
     constexpr double UPPER_CELL_QUANTILE = 0.01;
     constexpr double FRACTION_MAX_FREQUENCY = 0.1;
 
-    uint32_t baselineBcs = std::max(1, static_cast<int32_t>( aopt.expectCells*UPPER_CELL_QUANTILE ));
+    uint64_t baselineBcs = std::max(static_cast<uint64_t>(1), static_cast<uint64_t>( aopt.expectCells*UPPER_CELL_QUANTILE ));
     double cutoffFrequency = std::max(1.0, freqCounter[sortedIdx[baselineBcs]] * FRACTION_MAX_FREQUENCY);
-    uint32_t maxNumCells = std::min(static_cast<uint32_t>(freqCounter.size()), aopt.expectCells * MAX_FILTERED_CELLS);
+    uint64_t maxNumCells = std::min(static_cast<uint64_t>(freqCounter.size()), aopt.expectCells * MAX_FILTERED_CELLS);
 
     topxBarcodes = maxNumCells;
     for(size_t i=baselineBcs; i<maxNumCells; i++){
@@ -395,7 +395,7 @@ void sampleTrueBarcodes(const std::vector<uint32_t>& freqCounter,
                           green, topxBarcodes, RESET_COLOR);
 
       double invCovariance {0.0}, normFactor{0.0};
-      uint32_t gaussThreshold;
+      uint64_t gaussThreshold;
       bool isGaussOk = gaussianKDE(freqCounter, sortedIdx,
                                    invCovariance, normFactor,
                                    topxBarcodes, gaussThreshold);
@@ -421,7 +421,7 @@ void sampleTrueBarcodes(const std::vector<uint32_t>& freqCounter,
     }
   }//end-left-knee finding case
 
-  uint32_t fractionTrueBarcodes = static_cast<int>(lowConfidenceFraction * topxBarcodes);
+  uint64_t fractionTrueBarcodes = static_cast<uint64_t>(lowConfidenceFraction * topxBarcodes);
 
   if (fractionTrueBarcodes < lowRegionMinNumBarcodes){
     lowRegionNumBarcodes = lowRegionMinNumBarcodes;
@@ -442,8 +442,8 @@ void sampleTrueBarcodes(const std::vector<uint32_t>& freqCounter,
   }
   topxBarcodes += lowRegionNumBarcodes;
 
-  uint32_t cutoffFrequency = freqCounter[sortedIdx[ topxBarcodes ]];
-  uint32_t nearestLeftFrequency = cutoffFrequency;
+  uint64_t cutoffFrequency = freqCounter[sortedIdx[ topxBarcodes ]];
+  uint64_t nearestLeftFrequency = cutoffFrequency;
   while(nearestLeftFrequency == cutoffFrequency && lowRegionNumBarcodes != 0){
     nearestLeftFrequency = freqCounter[sortedIdx[--topxBarcodes]];
     if (lowRegionNumBarcodes > lowRegionMinNumBarcodes)  { lowRegionNumBarcodes--; }
@@ -484,7 +484,7 @@ void indexBarcodes(AlevinOpts<ProtocolT>& aopt,
                    SoftMapT& barcodeSoftMap){
   std::unordered_set<std::string> neighbors;
   std::unordered_map<std::string, std::vector<std::string>> ZMatrix;
-  uint32_t wrngWhiteListCount{0};
+  uint64_t wrngWhiteListCount{0};
   for (const auto trueBarcode: trueBarcodes){
     neighbors.clear();
     //find all neighbors to this true barcode
@@ -493,7 +493,7 @@ void indexBarcodes(AlevinOpts<ProtocolT>& aopt,
                        neighbors);
 
     for(const auto& neighbor : neighbors){
-      uint32_t freq{0};
+      uint64_t freq{0};
       auto indexIt = freqCounter.find(neighbor);
       bool indexOk = indexIt != freqCounter.end();
       //bool indexOk = freqCounter.find(neighbor, freq);
@@ -542,7 +542,7 @@ bool writeFastq(AlevinOpts<ProtocolT>& aopt,
                 SoftMapT& barcodeMap,
                 TrueBcsT& trueBarcodes){
   size_t rangeSize{0};
-  uint32_t totNumBarcodes{0};
+  uint64_t totNumBarcodes{0};
   std::string barcode( aopt.protocol.barcodeLength, 'N');
   std::string umi( aopt.protocol.umiLength, 'N');
 
@@ -717,7 +717,7 @@ void processBarcodes(std::vector<std::string>& barcodeFiles,
     if (aopt.dumpfeatures) {
       aopt.jointLog->info("Sorting and dumping raw barcodes");
       auto frequencyFileName = (aopt.outputDirectory / "raw_cb_frequency.txt").string();
-      std::vector<uint32_t> collapsedfrequency;
+      std::vector<uint64_t> collapsedfrequency;
       std::unordered_map<uint32_t, nonstd::basic_string_view<char>> collapMap;
       size_t ind{0};
       for(auto fqIt = freqCounter.begin(); fqIt != freqCounter.end(); ++fqIt){
@@ -731,7 +731,7 @@ void processBarcodes(std::vector<std::string>& barcodeFiles,
     }
   }
   else {
-    std::vector<uint32_t> collapsedfrequency;
+    std::vector<uint64_t> collapsedfrequency;
     std::unordered_map<uint32_t, nonstd::basic_string_view<char>> collapMap;
     size_t ind{0};
     for(auto fqIt = freqCounter.begin(); fqIt != freqCounter.end(); ++fqIt){
@@ -741,7 +741,7 @@ void processBarcodes(std::vector<std::string>& barcodeFiles,
     }
 
     if (aopt.keepCBFraction != 0.0) {
-      aopt.forceCells = std::min(static_cast<uint32_t>(aopt.keepCBFraction * freqCounter.size()),
+      aopt.forceCells = std::min(static_cast<uint64_t>(aopt.keepCBFraction * freqCounter.size()),
                                  aopt.maxNumBarcodes);
       aopt.jointLog->info("Forcing to use {} cells", aopt.forceCells);
       aopt.jointLog->flush();


=====================================
src/AlevinUtils.cpp
=====================================
@@ -348,7 +348,7 @@ namespace alevin {
       (void)read2;
       pt.anchorPos = read.find(pt.anchorSeq);
       if (pt.anchorPos != std::string::npos && ( pt.anchorPos == pt.maxHairpinIndexLen || pt.anchorPos == pt.maxHairpinIndexLen -1) // only 2 possible values of pt.anchorPos
-         && read.length() >= pt.barcodeLength + pt.umiLength + pt.anchorSeqLen) {
+         && read.length() >= pt.barcodeLength + pt.umiLength + pt.anchorSeqLen -2) { // Subtract 2 to counter artificial increase in barcode length
            std::string bcAssign = read.substr(0,pt.anchorPos) + read.substr(pt.anchorPos + pt.anchorSeqLen + pt.umiLength, pt.rtIdxLen);
         if (pt.anchorPos < pt.maxHairpinIndexLen) { // hairpin index can be 9 or 10 bp
            bcAssign += "AC";


=====================================
src/CMakeLists.txt
=====================================
@@ -141,7 +141,6 @@ else()
   set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
 endif()
 
-
 set (TGT_RELEASE_FLAGS "${TGT_COMPILE_FLAGS};${TGT_WARN_FLAGS}")
 set (TGT_DEBUG_FLAGS "-g;${TGT_COMPILE_FLAGS};${TGT_WARN_FLAGS}")
 
@@ -155,6 +154,8 @@ HAVE_SSTREAM=1
 STX_NO_STD_STRING_VIEW=1
 span_FEATURE_MAKE_SPAN_TO_STD=14
 )
+target_include_directories(salmon_core PUBLIC ${COMPACT_VECTOR_INCLUDE_PATH})
+
 
 if (USE_ARM)
 target_compile_definitions(salmon_core PUBLIC KSW_USE_ARM=1)
@@ -176,6 +177,7 @@ HAVE_ANSI_TERM=1
 HAVE_SSTREAM=1
 span_FEATURE_MAKE_SPAN_TO_STD=14
 )
+target_include_directories(alevin_core PUBLIC ${COMPACT_VECTOR_INCLUDE_PATH})
 
 if (USE_ARM)
 target_compile_definitions(alevin_core PUBLIC KSW_USE_ARM=1)
@@ -190,6 +192,7 @@ endif()
 
 # Build the salmon executable
 add_executable(salmon ${SALMON_MAIN_SRCS} ${SALMON_ALIGN_SRCS})
+target_include_directories(salmon PUBLIC ${COMPACT_VECTOR_INCLUDE_PATH})
 
 if(HAS_IPO AND (NOT NO_IPO))
   set_property(TARGET salmon PROPERTY INTERPROCEDURAL_OPTIMIZATION True)
@@ -202,6 +205,7 @@ target_compile_options(UnitTestsMain PUBLIC "$<$<CONFIG:RELEASE>:${TGT_RELEASE_F
 add_executable(unitTests ${UNIT_TESTS_INDIVIDUAL_SRCS} ${GAT_SOURCE_DIR}/tests/catch.hpp)
 target_compile_options(unitTests PUBLIC "$<$<CONFIG:DEBUG>:${TGT_DEBUG_FLAGS}>")
 target_compile_options(unitTests PUBLIC "$<$<CONFIG:RELEASE>:${TGT_RELEASE_FLAGS}>")
+target_include_directories(unitTests PUBLIC ${COMPACT_VECTOR_INCLUDE_PATH})
 
 #add_executable(salmon-read ${SALMON_READ_SRCS})
 #set_target_properties(salmon-read PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_LIBPTHREAD -D_PBGZF_USE -fopenmp"
@@ -225,6 +229,7 @@ add_dependencies(salmon ksw2pp)
 add_dependencies(salmon salmon_core)
 add_dependencies(salmon alevin_core)
 if(TBB_RECONFIGURE OR TBB_TARGET_EXISTED)
+
 # Link the executable
 target_link_libraries(salmon
     Threads::Threads 
@@ -236,9 +241,10 @@ target_link_libraries(salmon
     gff
     ${Boost_LIBRARIES}
     ${ICU_LIBS}
-    ${STADEN_LIBRARIES} ${CURL_LIBRARIES}
+    ${CURL_LIBRARIES}
     ${ZLIB_LIBRARY}
     m
+    ${STADEN_LIBRARIES} 
     ${LIBLZMA_LIBRARIES}
     ${BZIP2_LIBRARIES}
     ${LIBSALMON_LINKER_FLAGS}
@@ -269,12 +275,13 @@ if(TBB_RECONFIGURE OR TBB_TARGET_EXISTED)
 target_link_libraries(unitTests
     Threads::Threads
 ## PUFF_INTEGRATION
+    ${Boost_LIBRARIES}
     salmon_core
     alevin_core
     gff
     UnitTestsMain
-    ${STADEN_LIBRARIES} 
     ${Boost_LIBRARIES}
+    ${STADEN_LIBRARIES} 
     ${ICU_LIBS}
     ${CURL_LIBRARIES}
     ${ZLIB_LIBRARY}


=====================================
src/SalmonAlevin.cpp
=====================================
@@ -414,15 +414,10 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
   salmon::utils::ShortFragStats shortFragStats;
   double maxZeroFrac{0.0};
 
-  // Write unmapped reads
-  fmt::MemoryWriter unmappedNames;
-  bool writeUnmapped = salmonOpts.writeUnmappedNames;
-  spdlog::logger* unmappedLogger = (writeUnmapped) ? salmonOpts.unmappedLog.get() : nullptr;
-
-  // Write unmapped reads
-  fmt::MemoryWriter orphanLinks;
-  bool writeOrphanLinks = salmonOpts.writeOrphanLinks;
-  spdlog::logger* orphanLinkLogger = (writeOrphanLinks) ? salmonOpts.orphanLinkLog.get() : nullptr;
+  bool quiet = salmonOpts.quiet;
+  if (salmonOpts.writeQualities) {
+    salmonOpts.jointLog->warn("The --writeQualities flag has no effect when writing results to a RAD file.");
+  }
 
   BasicBinWriter bw;
   uint32_t num_reads_in_chunk{0};
@@ -436,200 +431,49 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
   uint64_t localNumAssignedFragments{0};
   uint64_t localNumUniqueMappings{0};
 
-  bool consistentHits = salmonOpts.consistentHits;
-  bool quiet = salmonOpts.quiet;
-
-  size_t maxNumHits{salmonOpts.maxReadOccs};
   size_t readLenLeft{0};
   size_t readLenRight{0};
 
-  constexpr const int32_t invalidScore = std::numeric_limits<int32_t>::min();
-  MemCollector<IndexT> memCollector(qidx);
-  ksw2pp::KSW2Aligner aligner;
-  pufferfish::util::AlignmentConfig aconf;
-  pufferfish::util::MappingConstraintPolicy mpol;
-  bool initOK = salmon::mapping_utils::initMapperSettings(salmonOpts, memCollector, aligner, aconf, mpol);
-  PuffAligner puffaligner(qidx->refseq_, qidx->refAccumLengths_, qidx->k(), aconf, aligner);
-
-  pufferfish::util::CachedVectorMap<size_t, std::vector<pufferfish::util::MemCluster>, std::hash<size_t>> hits;
-  std::vector<pufferfish::util::MemCluster> recoveredHits;
-  std::vector<pufferfish::util::JointMems> jointHits;
-  PairedAlignmentFormatter<IndexT*> formatter(qidx);
-  if (salmonOpts.writeQualities) {
-    salmonOpts.jointLog->warn("The --writeQualities flag has no effect when writing results to a RAD file.");
-  }
-  pufferfish::util::QueryCache qc;
-
-  bool mimicStrictBT2 = salmonOpts.mimicStrictBT2;
-  bool mimicBT2 = salmonOpts.mimicBT2;
-  bool noDovetail = !salmonOpts.allowDovetail;
-  bool useChainingHeuristic = !salmonOpts.disableChainingHeuristic;
-
-  pufferfish::util::HitCounters hctr;
   salmon::utils::MappingType mapType{salmon::utils::MappingType::UNMAPPED};
-  bool hardFilter = salmonOpts.hardFilter;
 
   fmt::MemoryWriter sstream;
   auto* qmLog = salmonOpts.qmLog.get();
   bool writeQuasimappings = (qmLog != nullptr);
-
-  struct SimpleHit {
-    bool is_fw{false};
-    int32_t pos{-1};
-    float score{0.0};
-    uint32_t num_hits{0};
-    uint32_t tid{std::numeric_limits<uint32_t>::max()};
-    bool valid_pos(int32_t read_len, uint32_t txp_len, int32_t max_over) {
-      int32_t signed_txp_len = static_cast<int32_t>(txp_len);
-      return (pos > -max_over) and ((pos + read_len) < (signed_txp_len + max_over));
-    }
-  };
-
-  enum class HitDirection : uint8_t {FW, RC, BOTH};
-
-  struct SketchHitInfo {
-    // add a hit to the current target that occurs in the forward 
-    // orientation with respect to the target.
-    bool add_fw(int32_t ref_pos, int32_t read_pos, int32_t rl, int32_t k, int32_t max_stretch, float score_inc) {
-      (void)rl;
-      (void)k;
-      bool added{false};
-      
-      // since hits are collected by moving _forward_ in the
-      // read, if this is a fw hit, it should be moving 
-      // forward in the reference. Only add it if this is
-      // the case.  This ensure that we don't 
-      // double-count a k-mer that might occur twice on
-      // this target.
-      if (ref_pos > last_ref_pos_fw and read_pos > last_read_pos_fw) {
-        if (last_read_pos_fw == -1) {
-          approx_pos_fw = ref_pos - read_pos;
-        } else {
-          if ((ref_pos - approx_pos_fw) > max_stretch) { return false; }
-        }
-        //if (last_ref_pos_fw > -1 and (ref_pos > last_ref_pos_fw + 15)) { return false; }
-        last_ref_pos_fw = ref_pos;
-        last_read_pos_fw = read_pos;
-        fw_score += score_inc;
-        ++fw_hits;
-        added = true;
-      }   
-      return added;
-    }
-
-    // add a hit to the current target that occurs in the forward 
-    // orientation with respect to the target.
-    bool add_rc(int32_t ref_pos, int32_t read_pos, int32_t rl, int32_t k, int32_t max_stretch, float score_inc) {
-
-      bool added{false};
-      // since hits are collected by moving _forward_ in the
-      // read, if this is an rc hit, it should be moving 
-      // backwards in the reference. Only add it if this is
-      // the case.
-      // This ensures that we don't double-count a k-mer that 
-      // might occur twice on this target.
-      if (ref_pos < last_ref_pos_rc and read_pos > last_read_pos_rc) {
-        approx_pos_rc = (ref_pos - (rl - (read_pos + k)));
-        if (last_read_pos_rc == -1) { 
-          approx_end_pos_rc = ref_pos + read_pos; 
-        } else {
-          if (approx_end_pos_rc - approx_pos_rc > max_stretch) { return false;}
-        }
-        //if (last_ref_pos_rc > -1 and ref_pos < last_ref_pos_rc - 15) { return false; }
-        last_ref_pos_rc = ref_pos;
-        last_read_pos_rc = read_pos;
-        rc_score += score_inc;
-        ++rc_hits;
-        added = true;
-        
-      }
-      return added;
-    }
-
-    inline uint32_t max_hits_for_target() {
-      return std::max(fw_hits, rc_hits);
-    }
-
-    // true if forward, false if rc
-    // second element is score
-    inline HitDirection best_hit_direction() {
-      int32_t fw_minus_rc = static_cast<int32_t>(fw_hits) - static_cast<int32_t>(rc_hits);
-      return (fw_minus_rc > 0) ? HitDirection::FW : 
-             ((fw_minus_rc < 0) ? HitDirection::RC : HitDirection::BOTH);
-    }
-
-    inline SimpleHit get_fw_hit() {
-      return SimpleHit{true, approx_pos_fw, fw_score, fw_hits, std::numeric_limits<uint32_t>::max()}; 
-    }
-
-    inline SimpleHit get_rc_hit() {
-      return SimpleHit{false, approx_pos_rc, rc_score, rc_hits, std::numeric_limits<uint32_t>::max()};
-    }
-
-    inline SimpleHit get_best_hit() {
-      auto best_direction = best_hit_direction();
-      return (best_direction != HitDirection::RC) ? 
-        SimpleHit{true, approx_pos_fw, fw_score, fw_hits, std::numeric_limits<uint32_t>::max()} : 
-        SimpleHit{false, approx_pos_rc, rc_score, rc_hits, std::numeric_limits<uint32_t>::max()};
-    }
-
-    inline std::string to_string() {
-      std::stringstream ss;
-      ss << "fw_hits: " << fw_hits << ", fw_score : " << fw_score << ", fw_pos : " << approx_pos_fw
-         << " || rc_hits: " << rc_hits << ", rc_score: " << rc_score << ", rc_pos: " << approx_pos_rc;
-      return ss.str();
-    }
-
-    int32_t last_read_pos_fw{-1};
-    int32_t last_read_pos_rc{-1};
-
-    int32_t last_ref_pos_fw{-1};
-    int32_t last_ref_pos_rc{std::numeric_limits<int32_t>::max()};
-    
-    int32_t approx_pos_fw{-1};
-    int32_t approx_pos_rc{-1};
-    int32_t approx_end_pos_rc{-1};
-
-    uint32_t fw_hits{0};
-    uint32_t rc_hits{0};
-    float fw_score{0.0};
-    float rc_score{0.0}; 
-  };
-
+  PairedAlignmentFormatter<IndexT*> formatter(qidx);
+  
   // map to recall the number of unmapped reads we see 
   // for each barcode
   phmap::flat_hash_map<uint64_t, uint32_t> unmapped_bc_map; 
 
-  // map from transcript id to hit info
-  phmap::flat_hash_map<uint32_t, SketchHitInfo> hit_map; 
-  std::vector<SimpleHit> accepted_hits;
-
-  //////////////////////
-  // NOTE: validation mapping based new parameters
-  std::string rc1; rc1.reserve(300);
-
-
   // check the frequency and decide here if we should
   // be attempting recovery of highly-multimapping reads
   const size_t max_occ_default = salmonOpts.maxReadOccs;
   const size_t max_occ_recover = salmonOpts.maxRecoverReadOccs;
   const bool attempt_occ_recover = (max_occ_recover > max_occ_default);
 
-  size_t numMappingsDropped{0};
-  size_t numDecoyFrags{0};
-  const double decoyThreshold = salmonOpts.decoyThreshold;
+  // think if this should be different since it's at the read level
+  const size_t max_read_occs = salmonOpts.maxReadOccs;
+  size_t alt_max_occ = max_read_occs;
 
-  salmon::mapping_utils::MappingScoreInfo msi(decoyThreshold);
-  // we only collect detailed decoy information if we will be 
-  // writing output to SAM.
-  msi.collect_decoys(writeQuasimappings);
+  // we'll use this for mapping
+  salmon::mapping_utils::pasc::mapping_cache_info<IndexT> map_cache_first(qidx, max_occ_default, max_occ_recover, max_read_occs);
+  salmon::mapping_utils::pasc::mapping_cache_info<IndexT> map_cache_second(qidx, max_occ_default, max_occ_recover, max_read_occs);
+
+  std::vector<salmon::mapping_utils::pasc::simple_hit>* accepted_hits_left_ptr;
+  std::vector<salmon::mapping_utils::pasc::simple_hit>* accepted_hits_right_ptr;
+  std::vector<salmon::mapping_utils::pasc::simple_hit>* accepted_hits_ptr;
+
+  // buffer in which to accumulate accepted hits if we are merging for left 
+  // and right reads
+  std::vector<salmon::mapping_utils::pasc::simple_hit> accepted_hits_buffer;
 
   std::string readBuffer;
   std::string umi(alevinOpts.protocol.umiLength, 'N');
   std::string barcode(alevinOpts.protocol.barcodeLength, 'N');
   //////////////////////
 
-  bool tryAlign{salmonOpts.validateMappings};
+  auto localProtocol = alevinOpts.protocol;
+  // start parsing reads
   auto rg = parser->getReadGroup();
   while (parser->refill(rg)) {
     rangeSize = rg.size();
@@ -650,45 +494,35 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
     	extraBAMtags.reserve(reserveSize);
     }
 
-    auto localProtocol = alevinOpts.protocol;
     for (size_t i = 0; i < rangeSize; ++i) { // For all the read in this batch
       auto& rp = rg[i];
       readLenLeft = rp.first.seq.length();
       readLenRight= rp.second.seq.length();
 
+      // while the pasc code path will use the jointHitGroup to 
+      // store UMI and BarCode info, it won't actually use the 
+      // "alignment" fields
       bool tooShortRight = (readLenRight < (minK+alevinOpts.trimRight));
-      //localUpperBoundHits = 0;
       auto& jointHitGroup = structureVec[i];
       jointHitGroup.clearAlignments();
-      auto& jointAlignments= jointHitGroup.alignments();
 
-      hits.clear();
-      jointHits.clear();
-      memCollector.clear();
-      hit_map.clear(); 
-      accepted_hits.clear();
-      //jointAlignments.clear();
-      //readSubSeq.clear();
       mapType = salmon::utils::MappingType::UNMAPPED;
+      // ensure that accepted_hits_ptr points to a valid 
+      // vector in case it's not set below
+      accepted_hits_ptr = &accepted_hits_buffer;
+      alt_max_occ = max_read_occs;
 
       //////////////////////////////////////////////////////////////
       // extracting barcodes
       size_t barcodeLength = alevinOpts.protocol.barcodeLength;
       size_t umiLength = alevinOpts.protocol.umiLength;
-      //umi.clear();
-      //barcode.clear();
+
       nonstd::optional<uint32_t> barcodeIdx;
       extraBAMtags.clear();
       bool seqOk = false;
 
-      // keep track of the *least* freqeuntly 
-      // occurring hit in this fragment to consider 
-      // alternative processing of fragments where 
-      // all observed hits have high frequency.
-      uint64_t alt_max_occ = 0;
-
       if (alevinOpts.protocol.end == bcEnd::FIVE ||
-          alevinOpts.protocol.end == bcEnd::THREE){
+          alevinOpts.protocol.end == bcEnd::THREE) {
         // If the barcode sequence could be extracted, then this is set to true,
         // but the barcode sequence itself may still be invalid (e.g. contain `N` characters).
         // However, if extracted_barcode is false here, there is no hope to even recover the
@@ -721,164 +555,15 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
 
             if (isUmiIdxOk) {
               jointHitGroup.setUMI(umiIdx.word(0));
-              bool rh = false;
-              std::string* readSubSeq = aut::getReadSequence(localProtocol, rp.first.seq, rp.second.seq, readBuffer);
-              rh = tooShortRight
-                       ? false
-                       : memCollector.get_raw_hits_sketch(*readSubSeq,
-                                                          qc,
-                                                          true, // isLeft
-                                                          false // verbose
-                         );
-              // if there were hits
-              if (rh) {
-                uint32_t num_valid_hits{0};
-                uint64_t total_occs{0};
-                uint64_t largest_occ{0};
-                auto& raw_hits = memCollector.get_left_hits();
-
-                // SANITY
-                decltype(raw_hits[0].first) prev_read_pos = -1;
-                // the maximum span the supporting k-mers of a
-                // mapping position are allowed to have.
-                // NOTE this is still > read_length b/c the stretch is measured wrt the
-                // START of the terminal k-mer.
-                int32_t signed_rl = static_cast<int32_t>(readSubSeq->length());
-                int32_t max_stretch = static_cast<int32_t>(signed_rl * 1.0);
-
-                // a raw hit is a pair of read_pos and a projected hit
-
-                // the least frequent hit for this fragment.
-                uint64_t min_occ = std::numeric_limits<uint64_t>::max();
-
-                // this is false by default and will be set to true
-                // if *every* collected hit for this fragment occurs
-                // salmonOpts.maxReadOccs times or more.
-                bool had_alt_max_occ = false;
-
-                auto collect_mappings_from_hits = [&max_stretch, &min_occ, &hit_map,
-                                                   &salmonOpts, &num_valid_hits, &total_occs,
-                                                   &largest_occ, &qidx, signed_rl, signed_k](
-                  auto& raw_hits, auto& prev_read_pos,
-                  auto& max_allowed_occ, auto& had_alt_max_occ
-                ) -> bool {
-                  for (auto& raw_hit : raw_hits) {
-                    auto& read_pos = raw_hit.first;
-                    auto& proj_hits = raw_hit.second;
-                    auto& refs = proj_hits.refRange;
-                    uint64_t num_occ = static_cast<uint64_t>(refs.size());
-                    min_occ = std::min(min_occ, num_occ);
-                    had_alt_max_occ = true;
-
-                    bool still_have_valid_target = false;
-                    prev_read_pos = read_pos;
-                    if (num_occ <= max_allowed_occ) {
-
-                      total_occs += num_occ;
-                      largest_occ = (num_occ > largest_occ) ? num_occ : largest_occ;
-                      float score_inc = 1.0;
-
-                      for (auto &pos_it : refs) {
-                        const auto& ref_pos_ori = proj_hits.decodeHit(pos_it);
-                        uint32_t tid = static_cast<uint32_t>(qidx->getRefId(pos_it.transcript_id()));
-                        int32_t pos = static_cast<int32_t>(ref_pos_ori.pos);
-                        bool ori = ref_pos_ori.isFW;
-                        auto& target = hit_map[tid];
-
-                        // Why >= here instead of == ?
-                        // Because hits can happen on the same target in both the forward
-                        // and rc orientations, it is possible that we start the loop with
-                        // the target having num_valid_hits hits in a given orientation (o)
-                        // we see a new hit for this target in oriention o (now it has num_valid_hits + 1)
-                        // then we see a hit for this target in orientation rc(o).  We still want to
-                        // add / consider this hit, but max_hits_for_target() > num_valid_hits.
-                        // So, we must allow for that here.
-                        if (target.max_hits_for_target() >= num_valid_hits) {
-                          if (ori) {
-                            target.add_fw(pos, static_cast<int32_t>(read_pos), 
-                                          signed_rl, signed_k, max_stretch, score_inc);
-                          } else {
-                            target.add_rc(pos, static_cast<int32_t>(read_pos), 
-                                          signed_rl, signed_k, max_stretch, score_inc);
-                          }
-
-                          still_have_valid_target |= (target.max_hits_for_target() >= num_valid_hits + 1);
-                        }
-
-                      } // DONE: for (auto &pos_it : refs)
-
-                      ++num_valid_hits;
-
-                      // if there are no targets reaching the valid hit threshold, then break early
-                      if (!still_have_valid_target) { return true; }
-
-                    } // DONE : if (num_occ <= max_allowed_occ)
-                  } // DONE : for (auto& raw_hit : raw_hits)
-
-                  return false;
-                };
-
-                bool _discard = false;
-                auto mao_first_pass = max_occ_default - 1;
-                bool early_stop = collect_mappings_from_hits(raw_hits, prev_read_pos, mao_first_pass, _discard);
-
-                // If our default threshold was too stringent, then fallback to a more liberal
-                // threshold and look up the k-mers that occur the least frequently.
-                // Specifically, if the min occuring hits have frequency < max_occ_recover (2500 by default)
-                // times, then collect the min occuring hits to get the mapping.
-                if (attempt_occ_recover and (min_occ >= max_occ_default) and (min_occ < max_occ_recover)) {
-                  prev_read_pos = -1;
-                  uint64_t max_allowed_occ = min_occ;
-                  early_stop = collect_mappings_from_hits(raw_hits, prev_read_pos, max_allowed_occ, had_alt_max_occ);
-                }
 
-                uint32_t best_alt_hits = 0;
-                int32_t signed_read_len = static_cast<int32_t>(readSubSeq->length());
-
-                for (auto& kv : hit_map) {
-                  auto best_hit_dir = kv.second.best_hit_direction();
-                  // if the best direction is FW or BOTH, add the fw hit
-                  // otherwise add the RC.
-                  auto simple_hit = (best_hit_dir != HitDirection::RC) ? 
-                                    kv.second.get_fw_hit() : kv.second.get_rc_hit();
-
-                  if (simple_hit.num_hits >= num_valid_hits) { 
-                    simple_hit.tid = kv.first;
-                    accepted_hits.emplace_back(simple_hit);
-                    // if we had equally good hits in both directions
-                    // add the rc hit here (since we added the fw)
-                    // above if the best hit was either FW or BOTH
-                    if (best_hit_dir == HitDirection::BOTH) {
-                      auto second_hit = kv.second.get_rc_hit();
-                      second_hit.tid = kv.first;
-                      accepted_hits.emplace_back(second_hit);
-                    }
-                  } else {
-                    //best_alt_score = simple_hit.score > best_alt_score ? simple_hit.score : best_alt_score;
-                    best_alt_hits = simple_hit.num_hits > best_alt_hits ? simple_hit.num_hits : best_alt_hits;
-                  }
-                }
-
-                alt_max_occ = had_alt_max_occ ? accepted_hits.size() : salmonOpts.maxReadOccs;
-
-                /*
-                 * This rule; if enabled, allows through mappings missing a single hit, if there
-                 * was no mapping with all hits. NOTE: this won't work with the current early-exit
-                 * optimization however.
-                if (accepted_hits.empty() and (num_valid_hits > 1) and (best_alt_hits >= num_valid_hits - 1)) {
-                  for (auto& kv : hit_map) { 
-                    auto simple_hit = kv.second.get_best_hit();
-                    if (simple_hit.num_hits >= best_alt_hits) {
-                      //if (simple_hit.valid_pos(signed_read_len, transcripts[kv.first].RefLength, 10)) {
-                        simple_hit.tid = kv.first;
-                        accepted_hits.emplace_back(simple_hit);
-                      //}
-                    }
-                  }
-                }
-                */
-              } // DONE : if (rh)
+              // get the subsequence for the alignable portion of the read
+              std::string* readSubSeq = aut::getReadSequence(localProtocol, rp.first.seq, rp.second.seq, readBuffer);
 
+              // try and map it
+              bool early_exit = salmon::mapping_utils::pasc::map_read(readSubSeq, map_cache_second, PairingStatus::UNPAIRED_RIGHT);
+              // in this case accepted_hits_ptr gets set to what we found
+              accepted_hits_ptr = &map_cache_second.accepted_hits;
+              alt_max_occ = map_cache_second.alt_max_occ;
             } else {
               nSeqs += 1;
             }
@@ -900,19 +585,20 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
       }
 
       // If the read mapped to > maxReadOccs places, discard it
-      if (accepted_hits.size() > alt_max_occ ) { 
-        accepted_hits.clear();
-      } else if (!accepted_hits.empty()) {
+      if (accepted_hits_ptr->size() > alt_max_occ) { 
+        accepted_hits_ptr->clear();
+      } else if (!accepted_hits_ptr->empty()) {
         mapType = salmon::utils::MappingType::SINGLE_MAPPED;
       }
 
+
       alevin::types::AlevinCellBarcodeKmer bck;
       bool barcode_ok = bck.fromChars(barcode);
  
       // NOTE: Think if we should put decoy mappings in the RAD file
       if (mapType == salmon::utils::MappingType::SINGLE_MAPPED) {
         // num aln
-        bw << static_cast<uint32_t>(accepted_hits.size()); 
+        bw << static_cast<uint32_t>(accepted_hits_ptr->size()); 
 
         // bc
         // if we can fit the barcode into an integer 
@@ -941,7 +627,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
           bw << umi;
         }
 
-        for (auto& aln : accepted_hits) {
+        for (auto& aln : (*accepted_hits_ptr)) {
           uint32_t fw_mask = aln.is_fw ? 0x80000000 : 0x00000000;
           bw << (aln.tid | fw_mask);
         }
@@ -969,9 +655,9 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
         bw << num_reads_in_chunk;
       }
 
-      validHits += accepted_hits.size();
-      localNumAssignedFragments += (accepted_hits.size() > 0);
-      localNumUniqueMappings += (accepted_hits.size() == 1) ? 1 : 0;
+      validHits += accepted_hits_ptr->size();
+      localNumAssignedFragments += (accepted_hits_ptr->size() > 0);
+      localNumUniqueMappings += (accepted_hits_ptr->size() == 1) ? 1 : 0;
       locRead++;
       ++numObservedFragments;
       jointHitGroup.clearAlignments();
@@ -1028,7 +714,7 @@ void process_reads_sc_sketch(paired_parser* parser, ReadExperimentT& readExp, Re
   }
   numAssignedFragments += localNumAssignedFragments;
   numUniqueMappings += localNumUniqueMappings;
-  mstats.numDecoyFragments += numDecoyFrags;
+  mstats.numDecoyFragments += 0; // no decoys with pasc (yet?)
   readExp.updateShortFrags(shortFragStats);
 }
 



View it on GitLab: https://salsa.debian.org/med-team/salmon/-/compare/de70ca9f4c8e48c60fc69f96735d148949a153ba...2c11223de9e8257b3f0fe51897fb3f96f2309bc2

-- 
View it on GitLab: https://salsa.debian.org/med-team/salmon/-/compare/de70ca9f4c8e48c60fc69f96735d148949a153ba...2c11223de9e8257b3f0fe51897fb3f96f2309bc2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230312/bd0ea6cd/attachment-0001.htm>


More information about the debian-med-commit mailing list