[med-svn] [salmon] 02/04: Imported Upstream version 0.4.2+ds1

Michael Crusoe misterc-guest at moszumanska.debian.org
Sat Jan 30 08:16:07 UTC 2016


This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository salmon.

commit b6b142af4f009ec66dd631e277f40b1e79434477
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date:   Sat Jan 30 00:07:34 2016 -0800

    Imported Upstream version 0.4.2+ds1
---
 include/CollapsedIterativeOptimizer.hpp.bak | 2069 ---------------------------
 src/SalmonQuantify.cpp.bak                  | 1314 -----------------
 2 files changed, 3383 deletions(-)

diff --git a/include/CollapsedIterativeOptimizer.hpp.bak b/include/CollapsedIterativeOptimizer.hpp.bak
deleted file mode 100644
index cc9e4bd..0000000
--- a/include/CollapsedIterativeOptimizer.hpp.bak
+++ /dev/null
@@ -1,2069 +0,0 @@
-/**
->HEADER
-    Copyright (c) 2013 Rob Patro robp at cs.cmu.edu
-
-    This file is part of Sailfish.
-
-    Sailfish is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Sailfish is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Sailfish.  If not, see <http://www.gnu.org/licenses/>.
-<HEADER
-**/
-
-
-#ifndef COLLAPSED_ITERATIVE_OPTIMIZER_HPP
-#define COLLAPSED_ITERATIVE_OPTIMIZER_HPP
-
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <unordered_map>
-#include <map>
-#include <vector>
-#include <unordered_set>
-#include <mutex>
-#include <thread>
-#include <sstream>
-#include <exception>
-#include <random>
-#include <queue>
-#include "btree_map.h"
-
-/** Boost Includes */
-#include <boost/dynamic_bitset/dynamic_bitset.hpp>
-#include <boost/range/irange.hpp>
-#include <boost/program_options.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/scoped_ptr.hpp>
-#include <boost/heap/fibonacci_heap.hpp>
-#include <boost/accumulators/statistics/stats.hpp>
-#include <boost/accumulators/framework/accumulator_set.hpp>
-#include <boost/accumulators/statistics/p_square_quantile.hpp>
-#include <boost/accumulators/statistics/count.hpp>
-#include <boost/accumulators/statistics/median.hpp>
-#include <boost/accumulators/statistics/weighted_mean.hpp>
-#include <boost/lockfree/queue.hpp>
-#include <boost/thread/thread.hpp>
-#include <boost/math/special_functions/digamma.hpp>
-#include <boost/math/distributions/beta.hpp>
-
-
-//#include <Eigen/Core>
-#include <jellyfish/sequence_parser.hpp>
-#include <jellyfish/parse_read.hpp>
-#include <jellyfish/mer_counting.hpp>
-#include <jellyfish/misc.hpp>
-#include <jellyfish/compacted_hash.hpp>
-
-#include "tbb/concurrent_unordered_set.h"
-#include "tbb/concurrent_vector.h"
-#include "tbb/concurrent_unordered_map.h"
-#include "tbb/concurrent_queue.h"
-#include "tbb/parallel_for.h"
-#include "tbb/parallel_for_each.h"
-#include "tbb/parallel_reduce.h"
-#include "tbb/blocked_range.h"
-#include "tbb/task_scheduler_init.h"
-#include "tbb/partitioner.h"
-
-#include "BiasIndex.hpp"
-#include "ezETAProgressBar.hpp"
-#include "LookUpTableUtils.hpp"
-#include "SalmonMath.hpp"
-
-template <typename ReadHash>
-class CollapsedIterativeOptimizer {
-
-private:
-    /**
-    * Type aliases
-    */
-    using TranscriptID = uint32_t;
-    using KmerID =  uint64_t;
-    using Count = uint32_t;
-    using KmerQuantity = double;
-    using Promiscutity = double;
-    using KmerMap = tbb::concurrent_unordered_map< uint64_t, tbb::concurrent_vector<uint32_t> >;
-
-    struct TranscriptGeneVectors;
-    using TranscriptIDVector = std::vector<TranscriptID>;
-    using KmerIDMap = std::vector<TranscriptIDVector>;
-
-
-    using TranscriptKmerSet = std::tuple<TranscriptID, std::vector<KmerID>>;
-    using StringPtr = std::string*;
-    using TranscriptScore = uint64_t;
-    using HashArray = jellyfish::invertible_hash::array<uint64_t, atomic::gcc, allocators::mmap>;
-    using ReadLength = size_t;
-
-    // Necessary forward declaration
-    struct TranscriptData;
-    using HeapPair = std::tuple<TranscriptScore, TranscriptID>;
-    using Handle = typename boost::heap::fibonacci_heap<HeapPair>::handle_type;
-    using BlockedIndexRange =  tbb::blocked_range<size_t>;
-
-    struct TranscriptGeneVectors {
-        tbb::concurrent_vector<uint32_t> transcripts;
-        tbb::concurrent_vector<uint32_t> genes;
-    };
-
-    struct TranscriptData {
-        TranscriptID id;
-        StringPtr header;
-        std::map<KmerID, KmerQuantity> binMers;
-        KmerQuantity mean;
-        size_t length;
-    };
-
-    class TranscriptInfo {
-      public:
-
-      TranscriptInfo() : binMers(std::unordered_map<KmerID, KmerQuantity>()),
-                         //logLikes(std::vector<KmerQuantity>()),
-                         //weights(std::vector<KmerQuantity>()),
-                         mean(0.0), fracLow(0.0), fracHigh(0.0), length(0), effectiveLength(0) { updated.store(0); /* weightNum.store(0); totalWeight.store(0.0);*/ }
-      TranscriptInfo(TranscriptInfo&& other) {
-        std::swap(binMers, other.binMers);
-        //std::swap(weights, other.weights);
-        //std::swap(logLikes, other.logLikes);
-        //totalWeight.store(other.totalWeight.load());
-        //weightNum.store(other.weightNum.load());
-        updated.store(other.updated.load());
-        mean = other.mean;
-        length = other.length;
-        effectiveLength = other.effectiveLength;
-      }
-        //std::atomic<double> totalWeight;
-        //btree::btree_map<KmerID, KmerQuantity> binMers;
-        //std::vector<KmerQuantity> weights;
-        //std::vector<KmerQuantity> logLikes;
-        //std::atomic<uint32_t> weightNum;
-        std::atomic<uint32_t> updated;
-        std::unordered_map<KmerID, KmerQuantity> binMers;
-        KmerQuantity mean;
-        KmerQuantity fracLow, fracHigh;
-        ReadLength length;
-        ReadLength effectiveLength;
-    };
-
-    // This struct represents a "job" (transcript) that needs to be processed
-    struct TranscriptJob {
-        StringPtr header;
-        StringPtr seq;
-        TranscriptID id;
-    };
-
-    struct TranscriptResult {
-        TranscriptData *data;
-        TranscriptKmerSet *ks;
-    };
-
-    struct BinmerUpdates {
-        std::vector<KmerID> zeroedBinmers;
-        std::vector<KmerID> updatedBinmers;
-    };
-
-    bool loggedCounts_;
-    uint32_t numThreads_;
-    size_t merLen_;
-    ReadHash & readHash_;
-    BiasIndex& biasIndex_;
-    std::string kmerEquivClassFname_;
-
-    // The number of occurences above whcih a kmer is considered promiscuous
-    size_t promiscuousKmerCutoff_ {std::numeric_limits<size_t>::max()};
-
-    // Map each kmer to the set of transcripts it occurs in
-    KmerIDMap transcriptsForKmer_;
-
-    // The actual data for each transcript
-    std::vector<TranscriptInfo> transcripts_;
-
-    TranscriptGeneMap& transcriptGeneMap_;
-
-    tbb::concurrent_unordered_set<uint64_t> genePromiscuousKmers_;
-
-    std::vector<Promiscutity> kmerGroupPromiscuities_;
-    std::vector<Promiscutity> kmerGroupBiases_;
-    std::vector<KmerQuantity> kmerGroupCounts_;
-    std::vector<Count> kmerGroupSizes_;
-    
-
-    /**
-     * logs all counts in the kmerGroupCounts_ variable.  Groups with a 
-     * count of zero are assigned the special value LOG_0.
-     */
-    inline void logKmerGroupCounts_() {
-        std::for_each(kmerGroupCounts_.begin(), kmerGroupCounts_.end(),
-                      [](KmerQuantity& count) {
-                          if (count > 0) { 
-                              count = std::log(count);
-                          } else {
-                              count = salmon::math::LOG_0;
-                          }
-                      });
-    }
-
-    /**
-     * Compute the "Inverse Document Frequency" (IDF) of a kmer within a set of transcripts.
-     * The inverse document frequency is the log of the number of documents (i.e. transcripts)
-     * divded by the number of documents containing this term (i.e. kmer).
-     * @param  k [kmer id for which the IDF should be computed]
-     * @return   [IDF(k)]
-     */
-    inline double _idf( uint64_t k ) {
-        double df = transcriptsForKmer_[k].size();
-        return (df > 0.0) ? std::log(transcripts_.size() / df) : 0.0;
-    }
-
-    /**
-     * Returns true if this kmer should be considered in our estimation, false
-     * otherwise
-     * @param  mer [kmer id to test for consideration]
-     * @return     [true if we consider the kmer with this id, false otherwise]
-     */
-    inline bool _considered( uint64_t mer ) {
-        // The kmer is only considered if it exists in the transcript set
-        // (i.e. it's possible to cover) and it's less prmiscuous than the
-        // cutoff.
-        return true;
-    }
-
-    /**
-     * The weight attributed to each appearence of the kmer with the given ID.
-     * If the kmer with ID k occurs in m different transcripts, then
-     * weight_(k) = 1 / m.
-     * @param  k [The ID of the kmer whose weight is to be computed]
-     * @return   [The weight of each appearance of the kmer with the given ID]
-     */
-    KmerQuantity weight_( KmerID k ) {
-        return 1.0 / (kmerGroupPromiscuities_[k] );
-    }
-
-    KmerQuantity _computeMedian( const TranscriptInfo& ti ) {
-
-      using namespace boost::accumulators;
-      using Accumulator = accumulator_set<double, stats<tag::median(with_p_square_quantile)>>;
-
-      Accumulator acc;
-      for (auto binmer : ti.binMers) {
-        acc(binmer.second);
-      }
-
-      return median(acc);
-    }
-
-    /**
-     * Computes the sum of kmer counts within the transcript given by ti, but clamping
-     * all non-zero counts to the given quantile.  For example, if quantile was 0.25, and
-     * x and y represented the 1st and 3rd quantile of kmer counts, then every nonzero count c
-     * would be transformed as c = max(x, min(y,c));
-     *
-     * @param  ti       [description]
-     * @param  quantile [description]
-     * @return          [description]
-     */
-    KmerQuantity _computeSumQuantile( const TranscriptInfo& ti, double quantile ) {
-        using namespace boost::accumulators;
-        using accumulator_t = accumulator_set<double, stats<tag::p_square_quantile> >;
-        //using tail_t = accumulator_set<double, stats<tag::tail
-        KmerQuantity sum = 0.0;
-
-
-        accumulator_t accLow(quantile_probability = quantile);
-        accumulator_t accHigh(quantile_probability = 1.0-quantile);
-        for ( auto binmer : ti.binMers ) {
-            accLow(binmer.second);
-            accHigh(binmer.second);
-        }
-
-        auto cutLow = p_square_quantile(accLow);
-        auto cutHigh = p_square_quantile(accHigh);
-
-        for ( auto binmer : ti.binMers ) {
-            KmerQuantity res = std::min( cutHigh, std::max(cutLow, binmer.second ) );
-            if (res != binmer.second) {
-              std::cerr << "cutLow = " << cutLow << ", cutHigh = " << cutHigh << " :: ";
-              std::cerr << "res = " << res << ", binmer.second = " << binmer.second << "\n";
-            }
-            sum += std::min( cutHigh, std::max(cutLow, binmer.second ) );
-        }
-        return sum;
-    }
-
-    KmerQuantity _computeSum( const TranscriptInfo& ti ) {
-        KmerQuantity sum = 0.0;
-        for ( auto binmer : ti.binMers ) {
-          sum += kmerGroupBiases_[binmer.first] * binmer.second;
-        }
-        return sum;
-    }
-
-    KmerQuantity _computeSumClamped( const TranscriptInfo& ti ) {
-        if (ti.binMers.size() < 5) {return _computeSum(ti);}
-        KmerQuantity sum = 0.0;
-
-        auto maxQuant = std::numeric_limits<KmerQuantity>::max();
-        auto minQuant = 0.0;
-
-        auto startValue = ti.binMers.begin()->second;
-        KmerQuantity lowestCount = startValue;
-        KmerQuantity secondLowestCount = startValue;
-
-        KmerQuantity highestCount = startValue;
-        KmerQuantity secondHighestCount = startValue;
-
-        for ( auto binmer : ti.binMers ) {
-          auto prevLowest = lowestCount;
-          lowestCount = std::min(binmer.second, lowestCount);
-          if (lowestCount < prevLowest) { secondLowestCount = prevLowest; }
-          auto prevHighest = highestCount;
-          highestCount = std::max(binmer.second, highestCount);
-          if (highestCount > prevHighest) { secondHighestCount = prevHighest; }
-        }
-        //std::cerr << lowestCount << ", " << secondLowestCount << ", " << highestCount << ", " << secondHighestCount << "\n";
-
-        for ( auto binmer : ti.binMers ) {
-          if (binmer.second > lowestCount and binmer.second < highestCount) {
-            sum += kmerGroupBiases_[binmer.first] * binmer.second;
-          } else if (binmer.second >= highestCount ) {
-            sum += secondHighestCount;
-          } else if (binmer.second <= lowestCount ) {
-            sum += secondLowestCount;
-          }
-        }
-
-        return sum;
-    }
-
-
-    bool _discard( const TranscriptInfo& ti) {
-        if ( ti.mean == 0.0 ) {
-            return false;
-        } else {
-            ti.mean = 0.0;
-            ti.binMers.clear();
-            return true;
-        }
-    }
-
-    KmerQuantity _computeSumVec( const TranscriptInfo& ti ) {
-        KmerQuantity sum = 0.0;
-        for ( auto w : ti.weights ) {
-          sum += w;
-        }
-        return sum;
-    }
-
-    KmerQuantity _computeClampedMean( const TranscriptInfo& ti ) {
-        return (ti.effectiveLength > 0.0) ? (_computeSumClamped(ti) / ti.effectiveLength) : 0.0;
-    }
-
-    KmerQuantity _computeMean( const TranscriptInfo& ti ) {
-        return (ti.effectiveLength > 0.0) ? (_computeSum(ti) / ti.effectiveLength) : 0.0;
-        //return (ti.effectiveLength > 0.0) ? (ti.totalWeight.load() / ti.effectiveLength) : 0.0;
-        //return (ti.effectiveLength > 0.0) ? (_computeSumVec(ti) / ti.effectiveLength) : 0.0;
-    }
-
-    KmerQuantity _computeWeightedMean( const TranscriptInfo& ti ) {
-        using namespace boost::accumulators;
-        accumulator_set<double, stats<tag::count, tag::weighted_mean>, double> acc;
-
-        for ( auto binmer : ti.binMers ) {
-          if ( this->genePromiscuousKmers_.find(binmer.first) == this->genePromiscuousKmers_.end() ){
-            acc(binmer.second, weight=kmerGroupBiases_[binmer.first] * weight_(binmer.first));
-          }
-        }
-
-        auto nnz = count(acc);
-
-        if ( nnz < ti.effectiveLength ) {
-            acc(0.0, weight=ti.effectiveLength-nnz);
-        }
-
-        auto sum = sum_of_weights(acc);
-        return sum > 0.0 ? weighted_mean(acc) : 0.0;
-    }
-
-    double _effectiveLength( const TranscriptInfo& ts ) {
-        double length = 0.0;
-        for ( auto binmer : ts.binMers ) {
-            length += weight_(binmer.first);
-        }
-        return length;
-    }
-
-    template <typename T>
-    T dotProd_(std::vector<T>& u, std::vector<T>& v) {
-
-      auto dot = tbb::parallel_reduce(
-        BlockedIndexRange(size_t(0), v.size()),
-            T(0.0),  // identity element for summation
-            [&]( const BlockedIndexRange& r, T current_sum ) -> T {
-             for (size_t i=r.begin(); i!=r.end(); ++i) {
-               current_sum += (u[i]*v[i]);
-             }
-             return current_sum; // body returns updated value of the accumulator
-             },
-             []( double s1, double s2 ) -> double {
-                return s1+s2;       // "joins" two accumulated values
-      });
-
-      return dot;
-    }
-
-    void normalizeTranscriptMeans_(){
-        //auto sumMean = 0.0;
-        //for ( auto ti : transcripts_ ) { sumMean += ti.mean; }
-
-        auto sumMean = tbb::parallel_reduce(
-            BlockedIndexRange(size_t(0), transcripts_.size()),
-            double(0.0),  // identity element for summation
-            [&, this]( const BlockedIndexRange& r, double current_sum ) -> double {
-                 for (size_t i=r.begin(); i!=r.end(); ++i) {
-                     double x = this->transcripts_[i].mean;
-                     current_sum += x;
-                 }
-                 return current_sum; // body returns updated value of the accumulator
-             },
-             []( double s1, double s2 ) -> double {
-                 return s1+s2;       // "joins" two accumulated values
-             });
-
-        // compute the new mean for each transcript
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts_.size())),
-            [this, sumMean](const BlockedIndexRange& range) -> void {
-              for(size_t tid = range.begin(); tid != range.end(); ++tid) {
-                this->transcripts_[tid].mean /= sumMean;
-              }
-            });
-
-    }
-
-    template <typename T>
-    T psumAccumulate(const tbb::blocked_range<T*>& r, T value) {
-            return std::accumulate(r.begin(),r.end(),value);
-    }
-
-    template <typename T>
-    T psum_(std::vector<T>& v) {
-      auto func = std::bind( std::mem_fn(&CollapsedIterativeOptimizer<ReadHash>::psumAccumulate<T>),
-                             this, std::placeholders::_1, std::placeholders::_2 );
-      auto sum = tbb::parallel_reduce(
-        tbb::blocked_range<T*>(&v[0], &v[v.size()]),
-          T{0},  // identity element for summation
-          func,
-          std::plus<T>()
-        );
-      return sum;
-    }
-
-    template <typename T>
-    T pdiff_(std::vector<T>& v0, std::vector<T>& v1) {
-        auto diff = tbb::parallel_reduce(
-        BlockedIndexRange(size_t(0), v0.size()),
-          double(0.0),  // identity element for difference
-          [&](const BlockedIndexRange& r, double currentDiff ) -> double {
-            for (size_t i=r.begin(); i!=r.end(); ++i) {
-              currentDiff += v0[i] - v1[i];
-            }
-            return currentDiff; // body returns updated value of the accumulator
-          },
-          []( double s1, double s2 ) -> double {
-               return s1+s2;       // "joins" two accumulated values
-          }
-        );
-
-        return diff;
-    }
-
-    template <typename T>
-    T pabsdiff_(std::vector<T>& v0, std::vector<T>& v1) {
-        auto diff = tbb::parallel_reduce(
-        BlockedIndexRange(size_t(0), v0.size()),
-          double(0.0),  // identity element for difference
-          [&]( const BlockedIndexRange& r, double currentDiff ) -> double {
-            for (size_t i=r.begin(); i!=r.end(); ++i) {
-              currentDiff = std::abs(v0[i] - v1[i]);
-            }
-            return currentDiff; // body returns updated value of the accumulator
-          },
-          []( double s1, double s2 ) -> double {
-               return s1+s2;       // "joins" two accumulated values
-          }
-        );
-
-        return diff;
-    }
-
-    template <typename T>
-    std::vector<T> relAbsDiff_(std::vector<T>& v0, std::vector<T>& v1, T minVal) {
-        std::vector<T> relDiff(v0.size(), T());
-        // compute the new mean for each transcript
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(v0.size())),
-                          [&v0, &v1, &relDiff, minVal](const BlockedIndexRange& range ) -> void {
-                              for (auto tid : boost::irange(range.begin(), range.end())) {
-                                  T oldVal = v0[tid];
-                                  T newVal = v1[tid];
-                                  if (oldVal > minVal or newVal > minVal) {
-                                      relDiff[tid] = std::abs(newVal - oldVal) / oldVal;
-                                  }
-                              }
-                          });
-        return relDiff;
-    }
-
-    void normalize_(std::vector<double>& means) {
-        auto sumMean = psum_(means);
-        auto invSumMean = 1.0 / sumMean;
-
-        // compute the new mean for each transcript
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts_.size())),
-            [&means, invSumMean](const BlockedIndexRange& range ) -> void {
-              for (auto tid : boost::irange(range.begin(), range.end())) {
-                means[tid] *= invSumMean;
-              }
-            });
-    }
-
-    double averageCount(const TranscriptInfo& ts){
-        if ( ts.binMers.size() == 0 ) { return 0.0; }
-        double sum = 0.0;
-        for ( auto binmer : ts.binMers ) {
-            sum += kmerGroupBiases_[binmer.first] * binmer.second;
-        }
-        return sum / ts.binMers.size();
-
-    }
-
-    /**
-     * This function should be run only after <b>after</b> an EM loop.
-     * It estimates kmer specific biases based on how much a kmer's count
-     * deviates from it's corresponding transcript's mean
-     */
-    void computeKmerFidelities_() {
-
-      // For each transcript, compute it's overall fidelity.  This is related
-      // to the variance of coverage across the transcript.  The more uniform
-      // the coverage, the more we believe the transcript.
-        std::vector<double> transcriptFidelities(transcripts_.size(), 0.0);
-        tbb::parallel_for( BlockedIndexRange(size_t(0), transcripts_.size()),
-            [this, &transcriptFidelities]( const BlockedIndexRange& range ) -> void {
-               for (auto tid = range.begin(); tid != range.end(); ++tid) {
-                double sumDiff = 0.0;
-                //if (tid >= this->transcripts_.size()) { std::cerr << "attempting to access transcripts_ out of range\n";}
-                auto& ts = this->transcripts_[tid];
-
-                //std::cerr << "transcript " << tid << "\n";
-                for ( auto& b : ts.binMers ) {
-                    //if (b.first >= this->kmerGroupBiases_.size()) { std::cerr << "attempting to access kmerGroupBiases_ out of range\n";}
-                    auto scaledMean = this->kmerGroupSizes_[b.first] * ts.mean;
-                    auto diff = std::abs(b.second - scaledMean);
-                    sumDiff += diff;//*diff;
-                }
-                // The rest of the positions have 0 coverage have an error
-                // of |0 - \mu_t| = \mu_t.  There are l(t) - ts.binMers.size() of these.
-                sumDiff += ts.mean * (ts.length - ts.binMers.size());
-                auto fidelity = (ts.length > 0.0) ? sumDiff / ts.length : 0.0;
-                fidelity = 1.0 / (fidelity + 1.0);
-                //if (tid >= transcriptFidelities.size()) { std::cerr << "attempting to access transcriptFidelities out of range\n";}
-                transcriptFidelities[tid] = fidelity;
-                //std::cerr << "fidelity (" << tid << ") = " << fidelity << "\n";
-               }
-            });
-
-        tbb::parallel_for(BlockedIndexRange(size_t(0), transcriptsForKmer_.size()),
-            [this, &transcriptFidelities](const BlockedIndexRange& range) -> void {
-              // Each transcript this kmer group appears in votes on the bias of this kmer.
-              // Underrepresented kmers get bias values > 1.0 while overrepresented kmers get
-              // bias values < 1.0.  The vote of each transcript is weigted by it's fidelity
-                for (auto kid = range.begin(); kid != range.end(); ++kid) {
-                  double totalBias = 0.0;
-                  double totalFidelity = 0.0;
-                  for( auto tid : this->transcriptsForKmer_[kid] ) {
-                    auto& transcript = this->transcripts_[tid];
-                    auto fidelity = transcriptFidelities[tid];
-                    auto totalMean = transcript.mean * this->kmerGroupSizes_[kid];
-                    auto curAlloc = transcript.binMers[kid];
-                    totalBias += (curAlloc > 0.0) ? fidelity * (totalMean / curAlloc) : 0.0;
-                    totalFidelity += fidelity;
-                  }
-                  double bias = totalBias / totalFidelity;
-                  double alpha = 0.25; //std::min( 1.0, 10.0*averageFidelity / confidence);
-                  double prevBias = this->kmerGroupBiases_[kid];
-                  this->kmerGroupBiases_[kid] = alpha * bias + (1.0 - alpha) * prevBias;
-              }
-            }
-        );
-
-
-    }
-
-    double logLikelihood_(std::vector<double>& means) {
-
-      const auto numTranscripts = transcripts_.size();
-      std::vector<double> likelihoods(numTranscripts, 0.0);
-
-        // Compute the log-likelihood
-        tbb::parallel_for(BlockedIndexRange(size_t(0), numTranscripts),
-          // for each transcript
-          [&likelihoods, &means, this](const BlockedIndexRange& range) ->void {
-            auto epsilon = 1e-40;
-            for (auto tid = range.begin(); tid != range.end(); ++tid) {
-              auto& ti = transcripts_[tid];
-              double relativeAbundance = means[tid];
-
-              if (ti.binMers.size() > 0 ) { likelihoods[tid] = 1.0; }
-              // For each kmer in this transcript
-              for ( auto& binmer : ti.binMers ) {
-                likelihoods[tid] *=  binmer.second /
-                           (this->kmerGroupBiases_[binmer.first] * this->kmerGroupCounts_[binmer.first]);
-              }
-              likelihoods[tid] = (relativeAbundance > epsilon and likelihoods[tid] > epsilon) ?
-                                 std::log(relativeAbundance * likelihoods[tid]) : 0.0;
-            }
-          });
-
-      return psum_(likelihoods);
-
-    }
-
-    double expectedLogLikelihood_(std::vector<double>& means) {
-      // alpha in arXiv:1104.3889v2
-      std::vector<double> sampProbs(means.size(), 0.0);
-
-      tbb::parallel_for(BlockedIndexRange(size_t(0), means.size()),
-          // for each transcript
-          [&sampProbs, &means, this](const BlockedIndexRange& range) ->void {
-            for (auto tid = range.begin(); tid != range.end(); ++tid) {
-              auto& ti = transcripts_[tid];
-              double relativeAbundance = means[tid];
-              sampProbs[tid] = ti.length * relativeAbundance;
-            }});
-
-      normalize_(sampProbs);
-
-      return (means.size() > 0) ? logLikelihood2_(sampProbs) / means.size() : 0.0;
-    }
-
-    double logLikelihood2_(std::vector<double>& sampProbs) {
-
-      std::vector<double> likelihoods(transcriptsForKmer_.size(), 0.0);
-
-        // Compute the log-likelihood
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcriptsForKmer_.size())),
-          // for each transcript
-          [&likelihoods, &sampProbs, this](const BlockedIndexRange& range) ->void {
-            for (auto kid = range.begin(); kid != range.end(); ++kid) {
-              double kmerLikelihood = 0.0;
-              KmerQuantity totalKmerMass = 0.0;
-              for (auto& tid : this->transcriptsForKmer_[kid]) {
-                double kmerMass{this->transcripts_[tid].binMers[kid]};
-                kmerLikelihood += kmerMass * (sampProbs[tid] / this->transcripts_[tid].length);
-                totalKmerMass += kmerMass;
-              }
-              // If
-              if (totalKmerMass > 0) {
-                  if (kmerLikelihood < 1e-20) {
-                      std::cerr << "kmer group: " << kid << " has probability (" << kmerLikelihood << "); too low\n";
-                  } else {
-                      likelihoods[kid] = std::log(kmerLikelihood);
-                  }
-              }
-            }
-          });
-
-      return psum_(likelihoods);
-
-    }
-
-
-    // Since there's no built in hash code for vectors
-  template <typename T>
-  class my_hasher{
-  public:
-    size_t operator()(const T& x) const {
-        if (x.size() == 0 ) { return 0; }
-        size_t seed = x[0];
-        for (auto i : boost::irange(size_t(1), x.size())) {
-            boost::hash_combine(seed, static_cast<size_t>(x[i]));
-        }
-        return seed;
-    }
-  };
-
-/**
- * Collapses all kmer which share the same transcript multiset.  Such kmers can be
- * treated as a "batch" with a count whose value is the sum of individual "batch"
- * members.
- * @param  isActiveKmer       [A bitvector which designates, for each kmer,
- *                             whether or not that kmer is active in the current
- *                             read set.]
- */
- void collapseKmers_( boost::dynamic_bitset<>& isActiveKmer ) {
-
-    auto numTranscripts = transcriptGeneMap_.numTranscripts();
-
-    /**
-     * Map from a vector of transcript IDs to the list of kmers that have this
-     * transcript list.  This allows us to collapse all kmers that exist in the
-     * exact same set of transcripts into a single kmer group.
-     */
-    tbb::concurrent_unordered_map< TranscriptIDVector,
-                                   tbb::concurrent_vector<KmerID>,
-                                   my_hasher<std::vector<TranscriptID>> > m;
-
-     // Asynchronously print out the progress of our hashing procedure
-     std::atomic<size_t> prog{0};
-     std::thread t([this, &prog]() -> void {
-        ez::ezETAProgressBar pb(this->transcriptsForKmer_.size());
-        pb.start();
-        size_t prevProg{0};
-        while ( prevProg < this->transcriptsForKmer_.size() ) {
-            if (prog > prevProg) {
-                auto diff = prog - prevProg;
-                pb += diff;
-                prevProg += diff;
-            }
-            boost::this_thread::sleep_for(boost::chrono::seconds(1));
-        }
-        if (!pb.isDone()) { pb.done(); }
-     });
-
-     //For every kmer, compute it's kmer group.
-     tbb::parallel_for(BlockedIndexRange(size_t(0), transcriptsForKmer_.size()),
-        [&](const BlockedIndexRange& range ) -> void {
-          for (auto j = range.begin(); j != range.end(); ++j) {
-            if (isActiveKmer[j]) {
-              m[ transcriptsForKmer_[j]  ].push_back(j);
-            }
-            ++prog;
-          }
-     });
-
-     // wait for the parallel hashing to finish
-     t.join();
-
-     // TESTING
-     std::ofstream eqFile("KMER_EQUIV_CLASSES.txt");
-     for (auto& kv : m ) {
-       for (auto e : kv.second) { eqFile << e << "\t"; }
-       eqFile << "\n";
-     }
-     eqFile.close();
-     // END TESTING
-
-     std::cerr << "Out of " << transcriptsForKmer_.size() << " potential kmers, "
-               << "there were " << m.size() << " distinct groups\n";
-
-     size_t totalKmers = 0;
-     size_t index = 0;
-     std::vector<KmerQuantity> kmerGroupCounts(m.size());
-     std::vector<Promiscutity> kmerGroupPromiscuities(m.size());
-     std::vector<TranscriptIDVector> transcriptsForKmer(m.size());
-     kmerGroupSizes_.resize(m.size(), 0);
-
-     using namespace boost::accumulators;
-     std::cerr << "building collapsed transcript map\n";
-     for ( auto& kv : m ) {
-
-        // For each transcript covered by this kmer group, add this group to the set of kmer groups contained in
-        // the transcript.  For efficiency, we also compute the kmer promiscuity values for each kmer
-        // group here --- the promiscuity of a kmer group is simply the number of distinct transcripts in
-        // which this group of kmers appears.
-        auto prevTID = std::numeric_limits<TranscriptID>::max();
-        KmerQuantity numDistinctTranscripts = 0.0;
-        for ( auto& tid : kv.first ) {
-          transcripts_[tid].binMers[index] += 1;
-          // Since the transcript IDs are sorted we just have to check
-          // if this id is different from the previous one
-          if (tid != prevTID) { numDistinctTranscripts += 1.0; }
-          prevTID = tid;
-        }
-        // Set the promiscuity and the set of transcripts for this kmer group
-        kmerGroupPromiscuities[index] = numDistinctTranscripts;
-        transcriptsForKmer[index] = kv.first;
-
-        // Aggregate the counts attributable to each kmer into its repective
-        // group's counts.
-        for (auto kid : kv.second) {
-            kmerGroupCounts[index] += readHash_.atIndex(kid);
-        }
-        kmerGroupSizes_[index] = kv.second.size();
-
-        // Update the total number of kmers we're accounting for
-        // and the index of the current kmer group.
-        totalKmers += kv.second.size();
-        ++index;
-      }
-
-      std::cerr << "Verifying that the unique set encodes " << totalKmers << " kmers\n";
-      std::cerr << "collapsedCounts.size() = " << transcriptsForKmer.size() << "\n";
-
-      // update the relevant structures holding info for the full kmer
-      // set with those holding the info for our collapsed kmer sets
-      std::swap(kmerGroupPromiscuities, kmerGroupPromiscuities_);
-      std::swap(kmerGroupCounts, kmerGroupCounts_);
-      std::swap(transcriptsForKmer, transcriptsForKmer_);
-
-      /*
-      uint64_t groupCounts = 0;
-      for(auto c : kmerGroupCounts_) { groupCounts += c; }
-      auto tmp = psum_(kmerGroupCounts_);
-      std::cerr << "groupCount(" << groupCounts << ") - parallelCount(" << tmp << ") = " << groupCounts - tmp << "\n";
-      std::atomic<uint64_t> individualCounts{0};
-      tbb::parallel_for(size_t{0}, readHash_.size(),
-        [&](size_t i) {
-          if (isActiveKmer[i]) {
-            individualCounts += readHash_.atIndex(i);
-          } });
-      auto diff = groupCounts - individualCounts;
-      std::cerr << "groupTotal(" << groupCounts << ") - totalNumKmers(" << individualCounts << ") = " << diff << "\n";
-      */
-  }
-
-
-  /**
-   * NEW! Assuming that equivalence classes were computed in the index
-   **/
-  void prepareCollapsedMaps_(
-                            const std::string& kmerEquivClassFname,
-                            bool discardZeroCountKmers) {
-
-    using std::cerr;
-
-    cerr << "reading Kmer equivalence classes \n";
-
-    auto memberships = LUTTools::readKmerEquivClasses(kmerEquivClassFname);
-    size_t numKmers{readHash_.size()};
-    size_t numKmerClasses{(*std::max_element(memberships.begin(), memberships.end())) + 1};
-    boost::dynamic_bitset<> isActiveKmer(numKmers);
-
-    kmerGroupCounts_.resize(numKmerClasses, 0.0);
-    kmerGroupSizes_.resize(numKmerClasses, 0.0);
-    kmerGroupPromiscuities_.resize(numKmerClasses, 0.0);
-
-    cerr << "updating Kmer group counts\n";
-    // Update the kmer group counts using the information from the read hash
-    for (auto kid : boost::irange(size_t{0}, numKmers)) {
-
-      size_t count = readHash_.atIndex(kid);
-      if (!discardZeroCountKmers or count != 0) {
-        auto kmerClassID = memberships[kid];
-        isActiveKmer[kmerClassID] = true;
-        kmerGroupCounts_[kmerClassID] += count;
-        kmerGroupSizes_[kmerClassID]++;
-      }
-
-    }
-
-    cerr << "updating transcript map\n";
-    for (auto kmerClassID : boost::irange(size_t{0}, numKmerClasses)) {
-
-      // For each transcript covered by this kmer group, add this group to the set of kmer groups contained in
-      // the transcript.  For efficiency, we also compute the kmer promiscuity values for each kmer
-      // group here --- the promiscuity of a kmer group is simply the number of distinct transcripts in
-      // which this group of kmers appears.
-      auto prevTID = std::numeric_limits<TranscriptID>::max();
-      KmerQuantity numDistinctTranscripts = 0.0;
-
-      //cerr << "numKmerClasses = " << numKmerClasses << ", kmerClassID = " << kmerClassID << ", transcriptsForKmer_.size() = " << transcriptsForKmer_.size() << "\n";
-      for (auto& tid : transcriptsForKmer_[kmerClassID]) {
-        transcripts_[tid].binMers[kmerClassID] += 1;
-        // Since the transcript IDs are sorted we just have to check
-        // if this id is different from the previous one
-        if (tid != prevTID) { numDistinctTranscripts += 1.0; }
-        prevTID = tid;
-      }
-      // Set the promiscuity and the set of transcripts for this kmer group
-      kmerGroupPromiscuities_[kmerClassID] = numDistinctTranscripts;
-      //kmerGroupPromiscuities_[kmerClassID] = transcriptsForKmer_[kmerClassID].size();
-    }
-    cerr << "done\n";
-
-    //promiscuousKmerCutoff_ = 50;
-    for (auto kmerClassID : boost::irange(size_t{0}, numKmerClasses)) {
-        if (kmerGroupPromiscuities_[kmerClassID] > promiscuousKmerCutoff_ ) {
-            kmerGroupCounts_[kmerClassID] = 0;
-        }
-    }
-    // By now we should have properly filled out the vairables
-    // kmerGroupPromiscuities_
-    // kmerGroupCounts_
-    // transcripts_
-  }
-
-
-  /**
-   * This function should be called before performing any optimization procedure.
-   * It builds all of the necessary data-structures which are used during the transcript
-   * quantification procedure.
-   * @param  klutfname [The name of the file containing the kmer lookup table.]
-   * @param  tlutfname [The name of the file containing the transcript lookup table.]
-   */
-    void initialize_(
-        const std::string& klutfname,
-        const std::string& tlutfname,
-        const std::string& kmerEquivClassFname,
-        const bool discardZeroCountKmers) {
-
-        // So we can concisely identify each transcript
-        TranscriptID transcriptIndex {0};
-
-        size_t numTranscripts = transcriptGeneMap_.numTranscripts();
-        size_t numKmers = readHash_.size();
-        auto merSize = readHash_.kmerLength();
-
-        size_t numActors = numThreads_;
-        std::vector<std::thread> threads;
-
-        transcripts_.resize(transcriptGeneMap_.numTranscripts());
-
-        // Get the kmer look-up-table from file
-        LUTTools::readKmerLUT(klutfname, transcriptsForKmer_);
-
-/*        size_t numContainingTranscripts = transcriptsForKmer_[kid].size();
-          assert(numContainingTranscripts > 0);
-          if (numContainingTranscripts == 1 or
-              std::all_of(transcriptsForKmer_[kid].begin(), transcriptsForKmer_[kid].end(), [this, kid](TranscriptID tid) -> bool {
-                return tid == this->transcriptsForKmer_[kid][0]; } )) {
-              uniquelyAnchoredTranscripts.insert(transcriptsForKmer_[kid][0]);
-          }
-
-        }
-*/
-        //std::cerr << "\nIn the original set, there were " << uniquelyAnchoredTranscripts.size() << " transcripts with unique nonzero anchors\n";
-
-        std::cerr << "\n";
-        //  collapseKmers_(isActiveKmer); // equiv-classes
-        prepareCollapsedMaps_(kmerEquivClassFname, discardZeroCountKmers);
-
-
-        // we have no k-mer-specific biases currently
-        kmerGroupBiases_.resize(transcriptsForKmer_.size(), 1.0);
-
-        // Get transcript lengths
-        std::ifstream ifile(tlutfname, std::ios::binary);
-        size_t numRecords {0};
-        ifile.read(reinterpret_cast<char *>(&numRecords), sizeof(numRecords));
-
-        std::cerr << "Transcript LUT contained " << numRecords << " records\n";
-        for (auto i : boost::irange(size_t(0), numRecords)) {
-            auto ti = LUTTools::readTranscriptInfo(ifile);
-            // copy over the length, then we're done.
-            transcripts_[ti->transcriptID].length = ti->length;
-            transcripts_[ti->transcriptID].effectiveLength = ti->length - merSize + 1;
-        }
-        ifile.close();
-        // --- done ---
-
-       // tbb::parallel_for( size_t(0), size_t(transcripts_.size()),
-       //     [this]( size_t idx ) {
-       //         auto& transcript = this->transcripts_[idx];
-       //         transcript.effectiveLength = transcript.effectiveLength - transcript.binMers.size();
-       //         for (auto binmer : transcript.binMers) {
-       //          transcript.effectiveLength += this->_weight(binmer.first);
-       //         }
-       // });
-
-        size_t numRes = 0;
-        std::cerr << "\n\nRemoving duplicates from kmer transcript lists ... ";
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcriptsForKmer_.size())),
-            [&numRes, this](const BlockedIndexRange& range) -> void {
-                for (auto idx = range.begin(); idx != range.end(); ++idx) {
-                  auto& transcripts = this->transcriptsForKmer_[idx];
-                  // should already be sorted -- extra check can be removed eventually
-                  std::is_sorted(transcripts.begin(), transcripts.end());
-                  // Uniqify the transcripts
-                  auto it = std::unique(transcripts.begin(), transcripts.end());
-                  transcripts.resize(std::distance(transcripts.begin(), it));
-                  ++numRes;
-                }
-         });
-
-         std::cerr << "done\n";
-
-         std::cerr << "Computing kmer group promiscuity rates\n";
-         /* -- done
-         kmerGroupPromiscuities_.resize(transcriptsForKmer_.size());
-         tbb::parallel_for( size_t{0}, kmerGroupPromiscuities_.size(),
-            [this]( KmerID kid ) -> void { this->kmerGroupPromiscuities_[kid] = this->_weight(kid); }
-         );
-         */
-
-        tbb::parallel_for(BlockedIndexRange(size_t{0}, transcripts_.size()),
-          [&, this](const BlockedIndexRange& range) -> void {
-            for (auto tid = range.begin(); tid != range.end(); ++tid) {
-              auto& ti = this->transcripts_[tid];
-              for (auto& binmer : ti.binMers) {
-                if (binmer.second > promiscuousKmerCutoff_) {
-                  ti.effectiveLength -= 1.0;
-                }
-              }
-            }
-        });
-
-        /**
-         * gene-promiscuous kmers can never be added to a transcript's counts, so
-         * it's unfair to consider them in the transcripts effective length.
-         */
-        /*
-        std::for_each( genePromiscuousKmers_.begin(), genePromiscuousKmers_.end(),
-            [this]( KmerID kmerId ) -> void {
-                for ( auto tid : transcriptsForKmer_[kmerId] ) {
-                    transcripts_[tid].effectiveLength -= 1.0;
-                }
-            });
-        */
-        std::cerr << "done\n";
-
-        //return mappedReads;
-    }
-
-    void _dumpCoverage( const boost::filesystem::path &cfname) {
-        auto memberships = LUTTools::readKmerEquivClasses(kmerEquivClassFname_);
-
-        size_t numTrans = transcripts_.size();
-        size_t numProc = 0;
-        std::ifstream kmerStructFile("zm_transcript.map", std::ios::in | std::ios::binary);
-        std::unordered_map<std::string, std::vector<KmerID>> kstruct;
-        uint64_t tlen{0};
-        uint32_t nameLen{0};
-        uint64_t tid{0};
-        for (size_t i = 0; i < numTrans; ++i) {
-            kmerStructFile.read(reinterpret_cast<char*>(&nameLen), sizeof(nameLen));
-            std::string name(nameLen, ' ');
-            kmerStructFile.read(reinterpret_cast<char*>(&name[0]), nameLen * sizeof(name[0]));
-            kmerStructFile.read(reinterpret_cast<char*>(&tlen), sizeof(tlen));
-            kstruct[name].resize(tlen);
-            auto& tvec = kstruct[name];
-            kmerStructFile.read(reinterpret_cast<char*>(&tvec[0]), tlen * sizeof(kstruct[name][0]));
-        }
-
-        kmerStructFile.close();
-
-       std::ofstream ofile(cfname.native());
-
-        ofile << "# numtranscripts_\n";
-        ofile << "# transcript_name_{1} num_kmer_classes{1} class_1_count class_2_count ... class_{num_eq_clases}_count\n";
-        ofile << "# ... \n";
-
-        ofile << transcripts_.size() << "\n";
-
-        std::cerr << "Dumping coverage statistics to " << cfname << "\n";
-
-
-        tbb::concurrent_queue<StringPtr> covQueue;
-
-        tbb::parallel_for(BlockedIndexRange(size_t{0}, transcripts_.size()),
-             [this, &covQueue, &memberships, &kstruct] (const BlockedIndexRange& range) -> void {
-                for (auto index = range.begin(); index != range.end(); ++index) {
-                  auto& td = this->transcripts_[index];
-                  const auto& name = this->transcriptGeneMap_.transcriptName(index);
-
-                  std::stringstream ostream;
-                  ostream << this->transcriptGeneMap_.transcriptName(index) << " " << kstruct[name].size();
-
-                  std::map<uint64_t, double> kmerClassRelativeMass;
-
-                  for (auto bm : kstruct[name]) {
-                      auto kclass = memberships[bm];
-                      double totalMass = 0.0;
-                      for (auto tid : this->transcriptsForKmer_[kclass]) {
-                          totalMass += this->transcripts_[tid].mean;
-                      }
-                      kmerClassRelativeMass[kclass] = (totalMass > 0.0) ? td.mean / totalMass : 0.0;
-                  }
-
-                  for (auto bm : kstruct[name]) {
-                      auto kclass = memberships[bm];
-                      //double classSize = this->kmerGroupSizes_[kclass];
-                      //double mass = td.binMers[kclass];
-                      //ostream << " " << mass / classSize;
-                      auto count = readHash_.atIndex(bm);
-                      ostream << " " << count * kmerClassRelativeMass[kclass];
-                  }
-                  ostream << "\n";
-                  std::string* ostr = new std::string(ostream.str());
-                  covQueue.push(ostr);
-                  // for boost lockfree
-                  // while(!covQueue.push(ostr));
-              }
-            }
-        );
-
-        ez::ezETAProgressBar pb(transcripts_.size());
-        pb.start();
-
-        std::string* sptr = nullptr;
-        while ( numProc < numTrans ) {
-            while( covQueue.try_pop(sptr) ) {
-                ofile << (*sptr);
-                ++pb;
-                ++numProc;
-                delete sptr;
-            }
-        }
-
-        ofile.close();
-
-    }
-
-    void filterByCoverage_( double coverageCutoff, std::vector<double>& means  ) {
-
-        tbb::parallel_for(BlockedIndexRange(size_t{0}, transcripts_.size()),
-                          [this, coverageCutoff, &means] (const BlockedIndexRange& range) -> void {
-                              for (auto index = range.begin(); index != range.end(); ++index) {
-                                  double numCovered{1.0};
-                                  double totalNumKmers{1.0};
-                                  bool unAnchored{true};
-                                  const auto& td = this->transcripts_[index];
-
-                                  for ( auto bm : td.binMers ) {
-                                      double numKmersToCover = this->kmerGroupSizes_[bm.first];
-                                      numCovered += (bm.second > 0.0) ? numKmersToCover : 0.0;
-                                      totalNumKmers += numKmersToCover;
-                                      //if (bm.second > 1000 * numKmersToCover and (std::abs(bm.second - this->kmerGroupCounts_[bm.first]) < 1e-5)) {
-                                      //    unAnchored = false;
-                                      //    break;
-                                      //}
-
-                                  }
-
-                              if (unAnchored and ((numCovered / totalNumKmers) < coverageCutoff)) {
-                                  means[index] = this->transcripts_[index].mean = 0.0;
-                              }
-                              }
-                          }
-                          );
-
-    }
-
-    void EMUpdate_( const std::vector<double>& meansIn, std::vector<double>& meansOut ) {
-      assert(meansIn.size() == meansOut.size());
-
-      auto reqNumJobs = transcriptsForKmer_.size();
-
-      std::atomic<size_t> numJobs{0};
-      std::atomic<size_t> completedJobs{0};
-
-      // Print out our progress
-      auto pbthread = std::thread(
-        [&completedJobs, reqNumJobs]() -> bool {
-          auto prevNumJobs = 0;
-          ez::ezETAProgressBar show_progress(reqNumJobs);
-          show_progress.start();
-          while ( prevNumJobs < reqNumJobs ) {
-            if ( prevNumJobs < completedJobs ) {
-              show_progress += completedJobs - prevNumJobs;
-            }
-            prevNumJobs = completedJobs.load();
-            boost::this_thread::sleep_for(boost::chrono::seconds(1));
-          }
-          if (!show_progress.isDone()) { show_progress.done(); }
-          return true;
-        });
-
-        //  E-Step : reassign the kmer group counts proportionally to each transcript
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcriptsForKmer_.size())),
-          // for each kmer group
-          [&completedJobs, &meansIn, &meansOut, this](const BlockedIndexRange& range) -> void {
-            for (auto kid : boost::irange(range.begin(), range.end())) {
-                auto kmer = kid;
-                // for each transcript containing this kmer group
-                auto& transcripts = this->transcriptsForKmer_[kmer];
-
-                double totalMass = 0.0;
-                for ( auto tid : transcripts ) {
-                    totalMass += meansIn[tid];
-                    /** TRY SPARSE
-                    auto& trans = this->transcripts_[tid];
-                    totalMass += trans.effectiveLength ? (meansIn[tid] - (trans.binMers[kmer] / trans.effectiveLength)) : 0.0;
-                    **/
-                }
-
-                double norm = (totalMass > 0.0) ? 1.0 / totalMass : 0.0;
-                for ( auto tid : transcripts ) {
-                  auto& trans = this->transcripts_[tid];
-                  auto lastIndex = trans.binMers.size()  - 1;
-                  // binMer based
-                  //auto idx = trans.weightNum++;
-                  //auto kmerIt = trans.binMers.find(kmer);
-
-                  trans.binMers[kmer] = meansIn[tid] * norm *
-                                        kmerGroupBiases_[kmer] * this->kmerGroupCounts_[kmer];
-                  /** TRY SPARSE
-                  double myMass = trans.effectiveLength ? (meansIn[tid] - (trans.binMers[kmer] / trans.effectiveLength)) : 0.0;
-                  trans.binMers[kmer] = myMass * norm *
-                      kmerGroupBiases_[kmer] * this->kmerGroupCounts_[kmer];
-                  **/
-
-                  // If we've seen all of the k-mers that appear in this transcript,
-                  // then we can compute it's new estimated abundance
-                  if (trans.updated++ == lastIndex) {
-                    trans.mean = meansOut[tid] = this->_computeMean(trans);
-                    //trans.mean = meansOut[tid] = this->_computeClampedMean(trans);
-
-                    //trans.weightNum.store(0);
-                    trans.updated.store(0);
-                  }
-                        //this->transcripts_[tid].binMers[kmer] =
-                        //meansIn[tid] * norm * kmerGroupBiases_[kmer] * this->kmerGroupCounts_[kmer];
-
-
-
-
-                        /*
-                        auto idx = trans.weightNum++;
-                        auto weight = meansIn[tid] * norm * kmerGroupBiases_[kmer] * this->kmerGroupCounts_[kmer];
-                        trans.logLikes[idx] = (weight > 0.0) ? std::log(weight / this->kmerGroupCounts_[kmer] ) : 0.0;
-                        trans.weights[idx] = weight;
-                        ++trans.updated;
-                        if (idx == trans.weights.size()-1) {
-                          while (trans.updated.load() < trans.weightNum.load() ) {}
-                          meansOut[tid] = this->_computeMean(trans);
-                          trans.weightNum.store(0);
-                          trans.updated.store(0);
-                        }*/
-
-                        /*
-                        auto& weight = trans.totalWeight;
-                        auto current = weight.load();
-                        auto updated = current + delta;
-                        while( !weight.compare_exchange_strong(current, updated) ) {
-                          current = weight.load(); updated = current + delta;
-                        }
-                        */
-                  }
-
-
-              ++completedJobs;
-            } // for kid in range
-
-          });
-
-          // wait for all kmer groups to be processed
-          pbthread.join();
-
-          /*
-          double delta = 0.0;
-          double norm = 1.0 / transcripts_.size();
-          size_t discard = 0;
-
-
-          // M-Step
-          // compute the new mean for each transcript
-          tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts_.size())),
-            [this, &meansOut](const BlockedIndexRange& range) -> void {
-              for (auto tid : boost::irange(range.begin(), range.end())) {
-                auto& ts = this->transcripts_[tid];
-                auto tsNorm = 1.0;//(ts.effectiveLength > 0.0) ? 1.0 / std::sqrt(ts.effectiveLength) : 1.0;
-                meansOut[tid] = tsNorm * this->_computeMean( ts );
-                ts.weightNum.store(0);
-              }
-          });
-          */
-
-          normalize_(meansOut);
-    }
-
-
-public:
-    /**
-     * Construct the solver with the read and transcript hashes
-     */
-    CollapsedIterativeOptimizer( ReadHash &readHash, TranscriptGeneMap& transcriptGeneMap,
-                                 BiasIndex& biasIndex, uint32_t numThreads ) :
-                                 readHash_(readHash), merLen_(readHash.kmerLength()),
-                                 transcriptGeneMap_(transcriptGeneMap), biasIndex_(biasIndex),
-                                 numThreads_(numThreads) {}
-
-
-    KmerQuantity optimize(const std::string& klutfname,
-                          const std::string& tlutfname,
-                          const std::string& kmerEquivClassFname,
-                          size_t numIt,
-                          double minMean,
-                          double maxDelta) {
-
-        kmerEquivClassFname_ = kmerEquivClassFname;
-        const bool discardZeroCountKmers = true;
-        initialize_(klutfname, tlutfname, kmerEquivClassFname, discardZeroCountKmers);
-
-        KmerQuantity globalError {0.0};
-        bool done {false};
-        std::atomic<size_t> numJobs {0};
-        std::atomic<size_t> completedJobs {0};
-        std::vector<KmerID> kmerList( transcriptsForKmer_.size(), 0 );
-        size_t idx = 0;
-
-        tbb::task_scheduler_init tbb_init(numThreads_);
-
-        std::cerr << "Computing initial coverage estimates ... ";
-
-        std::vector<double> means0(transcripts_.size(), 0.0);
-        std::vector<double> means1(transcripts_.size(), 0.0);
-        std::vector<double> means2(transcripts_.size(), 0.0);
-        std::vector<double> meansPrime(transcripts_.size(), 0.0);
-
-        std::vector<double> r(transcripts_.size(), 0.0);
-        std::vector<double> v(transcripts_.size(), 0.0);
-        std::atomic<size_t> uniquelyAnchoredTranscripts{0};
-        std::atomic<size_t> nonZeroTranscripts{0};
-        
-        loggedCounts_ = true;
-        logKmerGroupCounts_();
-
-        // Compute the initial mean for each transcript
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcriptGeneMap_.numTranscripts())),
-        [this, &uniquelyAnchoredTranscripts, &nonZeroTranscripts, &means0](const BlockedIndexRange& range) -> void {
-            for (auto tid = range.begin(); tid != range.end(); ++tid) {
-              auto& transcriptData = this->transcripts_[tid];
-              KmerQuantity total = 0.0;
-              //transcriptData.weights.resize(transcriptData.binMers.size());
-              //transcriptData.logLikes.resize(transcriptData.binMers.size());
-
-              bool notTotallyPromiscuous{false};
-              bool nonZero{false};
-
-              for ( auto & kv : transcriptData.binMers ) {
-                auto kmer = kv.first;
-                if ( this->genePromiscuousKmers_.find(kmer) == this->genePromiscuousKmers_.end() ){
-                    // count is the number of times kmer appears in transcript (tid)
-                    auto logCount = std::log(kv.second);
-                    kv.second = logCount * std::log(this->kmerGroupCounts_[kmer]) * std::log(this->weight_(kmer));
-                    if (kv.second > 0.0 and kmerGroupPromiscuities_[kmer] == 1) { notTotallyPromiscuous = true; }
-                    if (kv.second > 0.0) { nonZero = true; }
-                    //transcriptData.weights[transcriptData.weightNum++] = kv.second;
-                    //total += kv.second;
-                }
-              }
-              //transcriptData.totalWeight.store(total);
-              //transcriptData.weightNum.store(0);
-
-              transcriptData.mean = means0[tid] = this->_computeMean(transcriptData);
-              //transcriptData.mean = means0[tid] = this->_computeClampedMean(transcriptData);
-
-              //transcriptData.mean = notTotallyPromiscuous ? means0[tid] = this->_computeMean(transcriptData) : 0.0;
-              if (notTotallyPromiscuous) { ++uniquelyAnchoredTranscripts; }
-              if (nonZero) { ++nonZeroTranscripts; }
-              //transcriptData.mean = means0[tid] = this->_computeWeightedMean(transcriptData);
-              //transcriptData.mean = means0[tid] = 1.0 / this->transcripts_.size();//this->_computeMean(transcriptData);
-              //transcriptData.mean = this->_computeWeightedMean(transcriptData);
-              //transcriptData.mean = distribution(generator);
-              //this->_computeWeightedMean( transcriptData );
-            }
-        }
-        );
-        normalizeTranscriptMeans_();
-        normalize_(means0);
-
-        std::cerr << "\nThere were " << uniquelyAnchoredTranscripts.load() << " uniquely anchored transcripts\n";
-        std::cerr << "There were " << nonZeroTranscripts.load() << " transcripts with at least one overlapping k-mer\n";
-        std::cerr << "done\n";
-        size_t outerIterations = 1;
-        /*
-        for ( size_t iter = 0; iter < numIt; ++iter ) {
-          std::cerr << "EM iteraton: " << iter << "\n";
-          EMUpdate_(means0, means1);
-          std::swap(means0, means1);
-        }
-        */
-
-        /**
-         * Defaults for these values taken from the R implementation of
-         * [SQUAREM](http://cran.r-project.org/web/packages/SQUAREM/index.html).
-         */
-        double minStep0, minStep, maxStep0, maxStep, mStep, nonMonotonicity;
-        minStep0 = 1.0; minStep = 1.0;
-        maxStep0 = 1.0; maxStep = 1.0;
-        mStep = 4.0;
-        nonMonotonicity = 1.0;
-
-        double negLogLikelihoodOld = std::numeric_limits<double>::infinity();
-        double negLogLikelihoodNew = std::numeric_limits<double>::infinity();
-
-        std::function<bool(std::vector<double>&, std::vector<double>&)> hasConverged;
-        if (std::isfinite(maxDelta)) {
-            hasConverged = [maxDelta, this] (std::vector<double>& v0, std::vector<double>& v1) -> bool {
-                double minVal = 1e-7;
-                auto relDiff = this->relAbsDiff_(v0, v1, minVal);
-                double maxRelativeChange = *std::max_element( relDiff.begin(), relDiff.end() );
-                std::cerr << "max relative change: " << maxRelativeChange << "\n";
-                return maxRelativeChange < maxDelta;
-            };
-        } else {
-            hasConverged = [] (std::vector<double>& v0, std::vector<double>& v1) -> bool {
-                std::cerr << "no data-driven convergence criterion specified\n";
-                return false;
-            };
-        }
-
-        std::string clearline = "                                                                                \r\r";
-
-        // Until we've reached the specified maximum number of iterations, or hit ourt
-        // tolerance threshold
-        for ( size_t iter = 0; iter < numIt; ++iter ) {
-            std::string jumpBack = "\x1b[A";
-            std::cerr << clearline << "SQUAREM iteraton [" << iter << "]\n";
-            jumpBack += "\x1b[A";
-
-
-          // Theta_1 = EMUpdate(Theta_0)
-          std::cerr << clearline << "1/3\n";
-          EMUpdate_(means0, means1);
-          jumpBack += "\x1b[A\x1b[A";
-
-          if (!std::isfinite(negLogLikelihoodOld)) {
-            negLogLikelihoodOld = -expectedLogLikelihood_(means0);
-          }
-
-          // Theta_2 = EMUpdate(Theta_1)
-          std::cerr << clearline << "2/3\n";
-          EMUpdate_(means1, means2);
-          jumpBack += "\x1b[A\x1b[A";
-
-          // Check for data-driven convergence criteria
-          if (hasConverged(means1, means2)) {
-              std::cerr << "convergence criteria met; terminating SQUAREM\n";
-              break;
-          }
-
-          double delta = pabsdiff_(means1, means2);
-          std::cerr << clearline << "delta = " << delta << "\n";
-          jumpBack += "\x1b[A";
-
-          // r = Theta_1 - Theta_0
-          // v = (Theta_2 - Theta_1) - r
-          tbb::parallel_for(BlockedIndexRange(size_t(0), transcripts_.size()),
-            [&means0, &means1, &means2, &r, &v](const BlockedIndexRange& range) -> void {
-              for (auto tid = range.begin(); tid != range.end(); ++tid) {
-                r[tid] = means1[tid] - means0[tid];
-                v[tid] = (means2[tid] - means1[tid]) - r[tid];
-              }
-            }
-          );
-
-          double rNorm = std::sqrt(dotProd_(r,r));
-          double vNorm = std::sqrt(dotProd_(v,v));
-          double alphaS = rNorm / vNorm;
-
-          alphaS = std::max(minStep, std::min(maxStep, alphaS));
-
-          tbb::parallel_for(BlockedIndexRange(size_t(0), transcripts_.size()),
-            [&r, &v, alphaS, &means0, &meansPrime](const BlockedIndexRange& range) -> void {
-              for (auto tid = range.begin(); tid != range.end(); ++tid) {
-                meansPrime[tid] = std::max(0.0, means0[tid] + 2*alphaS*r[tid] + (alphaS*alphaS)*v[tid]);
-              }
-            }
-          );
-
-          // Stabilization step
-          if ( std::abs(alphaS - 1.0) > 0.01) {
-            std::cerr << clearline << "alpha = " << alphaS << ". ";
-            std::cerr << "Performing a stabilization step.\n";
-            EMUpdate_(meansPrime, meansPrime);
-            jumpBack += "\x1b[A\x1b[A";
-          }
-
-          /** Check for an error in meansPrime **/
-
-          /** If there is **/
-          if (std::isfinite(nonMonotonicity)) {
-            negLogLikelihoodNew = -expectedLogLikelihood_(meansPrime);
-          } else {
-            negLogLikelihoodNew = negLogLikelihoodOld;
-          }
-
-          if (negLogLikelihoodNew > negLogLikelihoodOld + nonMonotonicity) {
-            std::swap(meansPrime, means2);
-            negLogLikelihoodNew = -expectedLogLikelihood_(meansPrime);
-            if (alphaS == maxStep) { maxStep = std::max(maxStep0, maxStep/mStep); }
-            alphaS = 1.0;
-          }
-          std::cerr << clearline << "alpha = " << alphaS << ", ";
-
-          if (alphaS == maxStep) { maxStep = mStep * maxStep; }
-          if (minStep < 0 and alphaS == minStep) { minStep = mStep * minStep; }
-          std::swap(meansPrime, means0);
-
-          if (!std::isnan(negLogLikelihoodNew)) {
-            std::cerr << "negLogLikelihood = " << negLogLikelihoodNew << "\n";
-            jumpBack += "\x1b[A";
-            negLogLikelihoodOld = negLogLikelihoodNew;
-
-          }
-
-          bool doFilter{true};
-          if ( doFilter and ((iter == numIt - 1) or (iter > 0 and iter % 10 == 0))) {
-              double itFrac = iter / static_cast<double>(numIt);
-              // cutoff =  e^{x^2 - 0.5}
-              double cutoff = std::min(1.0, std::exp((itFrac*itFrac) - 0.12));//std::max(0.5, std::min(1.0, (iter + 10) / static_cast<double>(numIt)));
-              std::cerr << clearline << "Iteration[" << iter << "] :: Filter cutoff = " << cutoff << "\n";
-              jumpBack += "\x1b[A";
-              filterByCoverage_(cutoff, means0);
-          }
-
-          // If it's not the final iteration, then jump back and clear the console.
-          if (iter < numIt - 1) { std::cerr << jumpBack; }
-
-
-          /** Filter out super low-abundance transcripts **/
-          /*          double epsilon = 10e-8;
-          tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts_.size())),
-                            [&means0, epsilon, this](const BlockedIndexRange& range) -> void {
-                                 for (auto tid : boost::irange(range.begin(), range.end())) {
-                                     if (means0[tid] < epsilon) {
-                                         means0[tid] = 0.0;
-                                         this->transcripts_[tid].mean = 0.0;
-                                     }
-                                 }
-                             });
-          */
-
-          // if (iter > 0 and iter % 5 == 0 and iter < numIt - 1) {
-          //   std::cerr << "updating kmer biases: . . . ";
-          //   tbb::parallel_for( BlockedIndexRange(size_t(0), size_t(transcripts_.size())),
-          //     [&means0, this](const BlockedIndexRange& range) -> void {
-          //       for (auto tid : boost::irange(range.begin(), range.end())) {
-          //         this->transcripts_[tid].mean = means0[tid];
-          //       }
-          //     });
-
-          //   computeKmerFidelities_();
-          //   std::cerr << "done\n";
-          // }
-
-
-        }
-
-
-        /*
-        for ( size_t oiter = 0; oiter < outerIterations; ++oiter ) {
-            for ( size_t iter = 0; iter < numIt; ++iter ) {
-
-                auto reqNumJobs = transcriptsForKmer_.size();
-                std::cerr << "iteraton: " << iter << "\n";
-
-                globalError = 0.0;
-                numJobs = 0;
-                completedJobs = 0;
-
-                // Print out our progress
-                auto pbthread = std::thread(
-                    [&completedJobs, reqNumJobs]() -> bool {
-                        auto prevNumJobs = 0;
-                        ez::ezETAProgressBar show_progress(reqNumJobs);
-                        show_progress.start();
-                        while ( prevNumJobs < reqNumJobs ) {
-                            if ( prevNumJobs < completedJobs ) {
-                                show_progress += completedJobs - prevNumJobs;
-                            }
-                            prevNumJobs = completedJobs.load();
-                            boost::this_thread::sleep_for(boost::chrono::seconds(1));
-                        }
-                        return true;
-                    });
-
-                //  E-Step : reassign the kmer group counts proportionally to each transcript
-                tbb::parallel_for( size_t(0), size_t(transcriptsForKmer_.size()),
-                    // for each kmer group
-                    [&kmerList, &completedJobs, this]( size_t kid ) {
-                        auto kmer = kid;
-                        if ( this->genePromiscuousKmers_.find(kmer) == this->genePromiscuousKmers_.end() ){
-
-                            // for each transcript containing this kmer group
-                            auto &transcripts = this->transcriptsForKmer_[kmer];
-                            if ( transcripts.size() > 0 ) {
-
-                                double totalMass = 0.0;
-                                for ( auto tid : transcripts ) {
-                                    totalMass += this->transcripts_[tid].mean;
-                                }
-
-                                if ( totalMass > 0.0 ) {
-                                    double norm = 1.0 / totalMass;
-                                    for ( auto tid : transcripts ) {
-                                        if ( this->transcripts_[tid].mean > 0.0 ) {
-                                            this->transcripts_[tid].binMers[kmer] =
-                                            this->transcripts_[tid].mean * norm * kmerGroupBiases_[kmer] * this->kmerGroupCounts_[kmer];
-                                        }
-                                    }
-                                }
-
-                            }
-                        }
-                        ++completedJobs;
-                    });
-
-                // wait for all kmer groups to be processed
-                pbthread.join();
-
-                // reset the job counter
-                completedJobs = 0;
-
-                double delta = 0.0;
-                double norm = 1.0 / transcripts_.size();
-
-                std::vector<KmerQuantity> prevMeans( transcripts_.size(), 0.0 );
-                tbb::parallel_for( size_t(0), size_t(transcripts_.size()),
-                    [this, &prevMeans]( size_t tid ) -> void { prevMeans[tid] = this->transcripts_[tid].mean; });
-
-                std::cerr << "\ncomputing new means ... ";
-                size_t discard = 0;
-
-                // M-Step
-                // compute the new mean for each transcript
-                tbb::parallel_for( size_t(0), size_t(transcripts_.size()),
-                    [this, iter, numIt, norm, minMean, &discard]( size_t tid ) -> void {
-                        auto& ts = this->transcripts_[tid];
-                        auto tsNorm = 1.0;//(ts.effectiveLength > 0.0) ? 1.0 / std::sqrt(ts.effectiveLength) : 1.0;
-                        //ts.mean = tsNorm * this->_computeWeightedMean( ts );
-                        //ts->mean = tsNorm * this->averageCount( ts );
-                        ts.mean = tsNorm * this->_computeMean( ts );
-                        //ts.mean = tsNorm * this->_computeMedian( ts );
-                });
-
-                normalizeTranscriptMeans_();
-                for( auto tid : boost::irange(size_t{0}, prevMeans.size()) ){
-                    delta += std::abs( transcripts_[tid].mean - prevMeans[tid] );
-                }
-
-                std::cerr << "done\n";
-                std::cerr << "total variation in mean = " << delta << "\n";
-                std::cerr << "discarded " << discard << " transcripts in this round whose mean was below " << minMean << "\n";
-            }
-
-            std::cerr << "end of outer iteration " << oiter << " recomputing biases\n";
-            // Thresholding
-            tbb::parallel_for( size_t(0), size_t(transcripts_.size()),
-                [this, minMean](size_t tid) -> void {
-                    auto& ts = this->transcripts_[tid];
-                    if (ts.mean < minMean) {
-                        ts.mean = 0.0;
-                        for (auto& kv : ts.binMers) {
-                            kv.second = 0.0;
-                        }
-                    }
-            });
-            //computeKmerFidelities_();
-        }
-        */
-
-    }
-
-
-    void writeAbundances(const boost::filesystem::path& outputFilePath,
-                         const std::string& headerLines,
-                         double minAbundance,
-                         bool haveCI) {
-
-        haveCI = false;
-
-        auto writeCoverageInfo = true;
-        if ( writeCoverageInfo ) {
-            boost::filesystem::path covPath = outputFilePath.parent_path();
-            covPath /= "equivClassCoverage.txt";
-            _dumpCoverage( covPath );
-        }
-
-        std::cerr << "Writing output\n";
-        ez::ezETAProgressBar pb(transcripts_.size());
-        pb.start();
-
-        auto estimatedGroupTotal = psum_(kmerGroupCounts_);
-        auto totalNumKmers = readHash_.totalLength() * (std::ceil(readHash_.averageLength()) - readHash_.kmerLength() + 1);
-        std::vector<KmerQuantity> fracTran(transcripts_.size(), 0.0);
-        std::vector<KmerQuantity> fracTranLow(transcripts_.size(), 0.0);
-        std::vector<KmerQuantity> fracTranHigh(transcripts_.size(), 0.0);
-
-        std::vector<KmerQuantity> numKmersFromTranscript(transcripts_.size(), 0.0);
-
-        // Compute nucleotide fraction (\nu_i in RSEM)
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcripts_.size())),
-           [this, &numKmersFromTranscript, &fracTran, totalNumKmers, estimatedGroupTotal](const BlockedIndexRange& range) -> void {
-            for (auto tid = range.begin(); tid != range.end(); ++tid) {
-              auto& ts = this->transcripts_[tid];
-              //fracTran[tid] = this->_computeMean(ts);
-
-              // EM
-              numKmersFromTranscript[tid] = this->_computeSum(ts);
-
-              // VB
-              //numKmersFromTranscript[tid] = ts.mean * estimatedGroupTotal;//this->_computeSum(ts);
-              //fracTran[tid] = this->_computeClampedMean( ts );
-            }
-        });
-        // noise transcript
-        // fracTran[transcripts_.size()] = totalNumKmers - estimatedGroupTotal;
-
-        // Normalize to obtain the fraction of kmers that originated from each transcript
-        normalize_(numKmersFromTranscript);
-        auto& fracNuc = numKmersFromTranscript;
-
-
-        // Compute transcript fraction (\tau_i in RSEM)
-        tbb::parallel_for(BlockedIndexRange(size_t(0), transcripts_.size()),
-          [&, this](const BlockedIndexRange& range) -> void {
-            for (auto i = range.begin(); i != range.end(); ++i) {
-              auto& ts = transcripts_[i];
-
-              fracTran[i] = this->_computeMean(ts);
-
-              //fracTran[i] = (ts.effectiveLength > 0) ? ts.mean / ts.effectiveLength : 0.0;
-              fracTranLow[i] = (ts.effectiveLength > 0) ? ts.fracLow / ts.effectiveLength : 0.0;
-              fracTranHigh[i] = (ts.effectiveLength > 0) ? ts.fracHigh / ts.effectiveLength : 0.0;
-
-            }
-          });
-        //normalize_(fracTran);
-
-        auto fracNorm = 1.0 / psum_(fracTran);
-        tbb::parallel_for(BlockedIndexRange(size_t(0), transcripts_.size()),
-                          [&, fracNorm](const BlockedIndexRange& range) -> void {
-                              for (auto i = range.begin(); i != range.end(); ++i) {
-                                  auto& ts = transcripts_[i];
-                                  fracTran[i] *= fracNorm;
-                                  fracTranLow[i] *= fracNorm;
-                                  fracTranHigh[i] *= fracNorm;
-                              }
-        });
-
-
-        std::ofstream ofile( outputFilePath.string() );
-        size_t index = 0;
-        double million = std::pow(10.0, 6);
-        double billion = std::pow(10.0, 9);
-        double estimatedReadLength = readHash_.averageLength();
-        double kmersPerRead = ((estimatedReadLength - merLen_) + 1);
-        double totalMappedKmers = std::accumulate(numKmersFromTranscript.begin(),
-                                                  numKmersFromTranscript.end(),
-                                                  0.0);
-        double totalMappedReads = totalMappedKmers / kmersPerRead;
-
-        ofile << headerLines;
-
-        ofile << "# " <<
-            "Transcript" << '\t' <<
-            "Length" << '\t' <<
-            "TPM" << '\t' <<
-            "RPKM" << '\t' <<
-            "KPKM" << '\t' <<
-            "EstimatedNumReads";
-
-        if (haveCI) {
-            ofile << '\t' << "TPM_LOW" << '\t' << "TPM_HIGH";
-        }
-
-        ofile << '\n';
-
-        for ( auto i : boost::irange(size_t{0}, transcripts_.size()) ) {
-          auto& ts = transcripts_[i];
-          // expected # of kmers coming from transcript i
-
-          // auto ci = estimatedGroupTotal * fracNuc[i];
-          auto ci = numKmersFromTranscript[i];
-
-          // expected # of reads coming from transcript i
-          auto ri = ci / kmersPerRead;
-          double effectiveLength = ts.length - std::floor(estimatedReadLength) + 1;
-          double effectiveLengthKmer = ts.length - merLen_ + 1;
-
-          auto kpkm = (effectiveLengthKmer > 0) ?
-            (ci * billion) / (effectiveLengthKmer * totalMappedKmers) : 0.0;
-          kpkm = (kpkm < minAbundance) ? 0.0 : kpkm;
-
-          auto rpkm = (effectiveLength > 0) ?
-            (ri * billion) / (effectiveLength * totalMappedReads) : 0.0;
-          rpkm = (kpkm < minAbundance) ? 0.0 : rpkm;
-
-          // PREVIOUS
-          // auto ci = estimatedGroupTotal * fracNuc[i];
-          /*
-          auto kpkm = (effectiveLengthKmer > 0) ?
-            (billion * (fracNuc[i] / ts.effectiveLength)) : 0.0;
-          kpkm = (kpkm < minAbundance) ? 0.0 : kpkm;
-          auto rpkm = (effectiveLength > 0) ?
-          (billion * (fracNuc[i] / ts.effectiveLength)) : 0.0;
-          rpkm = (kpkm < minAbundance) ? 0.0 : rpkm;
-
-          */
-
-
-          auto tpm = fracTran[i] * million;
-          tpm = (kpkm < minAbundance) ? 0.0 : tpm;
-
-          ofile << transcriptGeneMap_.transcriptName(index) <<
-                   '\t' << ts.length << '\t' <<
-                   tpm << '\t' <<
-                   rpkm << '\t' <<
-                   kpkm << '\t' <<
-                   ri;
-          if (haveCI) {
-              ofile << '\t' <<
-                  fracTranLow[i] * million << '\t' <<
-                  fracTranHigh[i] * million;
-          }
-          ofile << '\n';
-
-          ++index;
-          ++pb;
-        }
-        ofile.close();
-    }
-
-
-    /**
-     *  Instead of using the EM (SQUAREM) algorithm, infer the posterior distribution
-     *  using Streaming, Distributed, Asynchronous (SDA) variational Bayes
-     */
-    KmerQuantity optimizeVB(const std::string& klutfname,
-                          const std::string& tlutfname,
-                          const std::string& kmerEquivClassFname,
-                          size_t numIt,
-                          double minMean,
-                          double maxDelta) {
-
-
-         kmerEquivClassFname_ = kmerEquivClassFname;
-        // Prepare the necessary structures
-        const bool discardZeroCountKmers = false;
-        initialize_(klutfname, tlutfname, kmerEquivClassFname, discardZeroCountKmers);
-
-        const size_t numTranscripts = transcripts_.size();
-        const size_t numKmers = transcriptsForKmer_.size();
-
-        // Set the appropriate, user-specified, convergence criteria
-        std::function<bool(std::vector<double>&, std::vector<double>&)> hasConverged;
-        if (std::isfinite(maxDelta)) {
-            hasConverged = [maxDelta, this] (std::vector<double>& v0, std::vector<double>& v1) -> bool {
-                double maxVal = *std::max_element(v1.begin(), v1.end());
-                std::cerr << "maxVal: " << maxVal << "\n";
-                double minVal = 1e-7;
-                auto relDiff = this->relAbsDiff_(v0, v1, minVal);
-                double maxRelativeChange = *std::max_element( relDiff.begin(), relDiff.end() );
-                std::cerr << "max relative change: " << maxRelativeChange << "\n";
-                return maxRelativeChange < maxDelta;
-            };
-        } else {
-            hasConverged = [] (std::vector<double>& v0, std::vector<double>& v1) -> bool {
-                std::cerr << "no data-driven convergence criterion specified\n";
-                return false;
-            };
-        }
-
-
-        // We'll use these to check convergence
-        std::vector<double> meansOld(transcripts_.size(), 0.0);
-        std::vector<double> meansNew(transcripts_.size(), 0.0);
-
-
-        std::atomic<size_t> uniquelyAnchoredTranscripts{0};
-        std::atomic<size_t> nonZeroTranscripts{0};
-
-        const double DirichletPriorAlpha = 0.1;
-        std::vector<double> priorAlphas(transcripts_.size(), 0.0);
-        std::vector<double> posteriorAlphas(transcripts_.size(), 0.0);
-        /*
-        std::vector<double> kmerWeights(numKmers, 0.0);
-        for (auto tid : boost::irange(size_t{0}, numTranscripts)) {
-            auto& transcriptData = transcripts_[tid];
-            for ( auto & kv : transcriptData.binMers ) {
-                kmerWeights[kv.first] += kv.second * weight_(kv.first);
-            }
-        }
-        for (auto kw : kmerWeights) {
-            if (std::abs(kw - 1.0) > 1e-3) {
-                std::cerr << "Kmer had weight " << kw << "\n";
-            }
-        }
-        */
-
-        // Compute the initial mean for each transcript
-        tbb::parallel_for(BlockedIndexRange(size_t(0), numTranscripts),
-                          [this, DirichletPriorAlpha, &uniquelyAnchoredTranscripts, &nonZeroTranscripts, &meansOld, &posteriorAlphas](const BlockedIndexRange& range) -> void {
-            for (auto tid = range.begin(); tid != range.end(); ++tid) {
-              auto& transcriptData = this->transcripts_[tid];
-              KmerQuantity total = 0.0;
-
-              bool notTotallyPromiscuous{false};
-              bool nonZero{false};
-
-              auto effLength = transcriptData.effectiveLength;
-              for ( auto & kv : transcriptData.binMers ) {
-                auto kmer = kv.first;
-
-                // count is the number of times occurrences of the k-mer in this transcript
-                auto count = kv.second;
-
-                // weight(kmer) = 1 / total # occurences
-                // count = # occurences in this transcript
-                // kmerGroupCounts(kmer) = # of observations of kmer in the read set
-                // The weight attributed to this transcript (kv.second) is:
-                // count * weight(kmer) * kmerGroupCounts(kmer) = (# occurrences in ts / total # occurrences) * total count
-                kv.second = (effLength  > 0.0) ?
-                    (count * this->kmerGroupCounts_[kmer] * this->weight_(kmer)) :
-                    0.0;
-
-                if (kv.second > 0.0 and kmerGroupPromiscuities_[kmer] == 1) { notTotallyPromiscuous = true; }
-                if (kv.second > 0.0) { nonZero = true; }
-              }
-
-              //transcriptData.mean = this->_computeMean(transcriptData);
-              auto weightedKmerSum = this->_computeSum(transcriptData);
-              // Set \alpha_t = \alpha_0 + \mu_t
-              posteriorAlphas[tid] = DirichletPriorAlpha ;//+ weightedKmerSum;
-              if (notTotallyPromiscuous) { ++uniquelyAnchoredTranscripts; }
-              if (nonZero) { ++nonZeroTranscripts; }
-            }
-        }
-        );
-
-        auto posteriorAlphaSum = psum_(posteriorAlphas);
-        std::cerr << "posteriorAlphaSum : " << posteriorAlphaSum << " [before iterations]\n";
-        for (size_t i : boost::irange({0}, numTranscripts)) {
-            meansOld[i] = posteriorAlphas[i] / posteriorAlphaSum;
-        }
-
-        std::vector<double> expctedLogThetas(transcripts_.size(), 0.0);
-        std::vector<double> logRho(transcripts_.size(), -std::numeric_limits<double>::infinity());
-        std::string clearline = "                                                                                \r\r";
-
-
-        for (size_t currIt : boost::irange({0}, numIt)) {
-            std::string jumpBack = "\x1b[A";
-            std::cerr << clearline << "iteration : " << currIt << "\n";
-            // log rho_{ntsoa} = E_{theta}[log theta_t] + log P(S_n | T_n) + [other terms sum to 0]
-            // E_{theta}[log theta_t] = digamma(alpha_t) - digamma(\sum_{t'} alpha_{t'})
-            double sumAlpha = psum_(posteriorAlphas);
-            double digammaSumAlpha = boost::math::digamma(sumAlpha);
-            for (size_t i : boost::irange({0}, numTranscripts)) {
-                auto& ts = transcripts_[i];
-                //double digammaAlphaT = boost::math::digamma(posteriorAlphas[i]);
-
-                logRho[i] = (ts.effectiveLength > 0 and posteriorAlphas[i] > 0.0) ?
-                    (boost::math::digamma(posteriorAlphas[i]) - digammaSumAlpha) + std::log(1.0 / ts.effectiveLength) :
-                    -std::numeric_limits<double>::infinity();
-            }
-
-            std::atomic<size_t> numUpdated{0};
-
-            //  E-Step : reassign the kmer group counts proportionally to each transcript
-            tbb::parallel_for(BlockedIndexRange(size_t(0), numKmers),
-                 // for each kmer group
-                 [&meansOld, &meansNew, &logRho, &posteriorAlphas, &numUpdated, DirichletPriorAlpha, this](const BlockedIndexRange& range) -> void {
-                     for (auto kid : boost::irange(range.begin(), range.end())) {
-                         auto kmer = kid;
-                         // for each transcript containing this kmer group
-                         auto& transcripts = this->transcriptsForKmer_[kmer];
-
-                         double totalMass = 0.0;
-                         for ( auto tid : transcripts ) {
-                             auto el = this->transcripts_[tid].effectiveLength;
-                             double rho = (el > 0) ? std::exp(logRho[tid]) : 0.0;
-                             totalMass += rho;
-                         }
-
-                         double norm = (totalMass > 0.0) ? (1.0 / totalMass) : 0.0;
-                         for ( auto tid : transcripts ) {
-                             auto& trans = this->transcripts_[tid];
-                             auto lastIndex = trans.binMers.size()  - 1;
-                             auto el = trans.effectiveLength;
-                             double rho = (el > 0) ? std::exp(logRho[tid]) : 0.0;
-                             trans.binMers[kmer] = rho * norm *
-                                 kmerGroupBiases_[kmer] * this->kmerGroupCounts_[kmer];
-
-                             // If we've seen all of the k-mers that appear in this transcript,
-                             // then we can compute it's new estimated abundance
-                             if (trans.updated++ == lastIndex) {
-                                 ++numUpdated;
-
-                                 // We're folding the length term into the binMer weights now
-                                 // should this computeSum be a computeMean?
-                                 //trans.mean = this->_computeSum(trans);//this->_computeMean(trans);
-                                 auto sumWeightedReadMass = this->_computeSum(trans);
-
-                                 // with filter
-                                 // posteriorAlphas[tid] = DirichletPriorAlpha + sumWeightedReadMass;
-                                 posteriorAlphas[tid] = (posteriorAlphas[tid] > 0.0) ? DirichletPriorAlpha + sumWeightedReadMass : 0.0;
-
-                                 // without filter
-                                 //posteriorAlphas[tid] = DirichletPriorAlpha + trans.mean;
-                                 if (std::isnan(posteriorAlphas[tid])) {
-                                     std::cerr << "posterior is nan at " << tid << "\n";
-                                     std::cerr << "mean: " << trans.mean << "\n";
-                                     std::cerr << "norm: " << norm << "\n";
-                                 }
-                                 trans.updated.store(0);
-                             }
-                         }
-                     } // for kid in range
-           });
-
-            std::cerr << clearline << "numUpdated = " << numUpdated << " : numTranscripts = " << numTranscripts << "\n";
-            jumpBack += "\x1b[A";
-
-            auto posteriorAlphaSum = psum_(posteriorAlphas);
-            std::cerr << clearline << "posterior alpha sum: " << posteriorAlphaSum << "\n";
-            jumpBack += "\x1b[A";
-            for (size_t i : boost::irange({0}, numTranscripts)) {
-                meansNew[i] = posteriorAlphas[i] / posteriorAlphaSum;
-                transcripts_[i].mean = meansNew[i];
-            }
-
-            // Check for data-driven convergence criteria
-            if (hasConverged(meansOld, meansNew)) {
-              std::cerr << clearline << "convergence criteria met; terminating VB\n";
-              break;
-            }
-            jumpBack += "\x1b[A";
-
-            std::swap(meansNew, meansOld);
-
-            bool doFilter{false};
-            double itFrac = currIt / static_cast<double>(numIt);
-            double cutoff = std::min(1.0, std::exp(itFrac - 0.3));
-            if (doFilter and currIt > 0 and currIt % 100 == 0) {
-                // cutoff =  e^{x^2 - 0.5}
-                filterByCoverage_(cutoff, posteriorAlphas);
-            }
-
-            std::cerr << clearline << "cutoff = " << cutoff << "\n";
-            jumpBack += "\x1b[A";
-            std::cerr << jumpBack;
-        }
-
-        // Compute the confidence intervals for the expression values
-        // The marginal for each transcript fraction takes the form of a Beta distribution
-        // we compute the confidence intervals by the appropriate quantiles of the distribution
-        double sumOfAlphas = psum_(posteriorAlphas);
-        tbb::parallel_for(BlockedIndexRange(size_t(0), size_t(transcriptGeneMap_.numTranscripts())),
-             [this, &posteriorAlphas, sumOfAlphas](const BlockedIndexRange& range) -> void {
-                    for (auto tid = range.begin(); tid != range.end(); ++tid) {
-                        auto& transcriptData = this->transcripts_[tid];
-                        auto alphaT = posteriorAlphas[tid];
-                        if (alphaT > 0) {
-                            boost::math::beta_distribution<double> marginalDist(alphaT, sumOfAlphas - alphaT);
-                            transcriptData.mean = boost::math::mean(marginalDist);
-                            transcriptData.fracLow = boost::math::quantile(marginalDist, 0.025);
-                            transcriptData.fracHigh = boost::math::quantile(marginalDist, 0.975);
-                        } else {
-                            transcriptData.mean = transcriptData.fracLow = transcriptData.fracHigh = 0.0;
-                        }
-                    }
-        });
-
-    }
-
-};
-
-#endif // ITERATIVE_OPTIMIZER_HPP
diff --git a/src/SalmonQuantify.cpp.bak b/src/SalmonQuantify.cpp.bak
deleted file mode 100644
index 0c2acd3..0000000
--- a/src/SalmonQuantify.cpp.bak
+++ /dev/null
@@ -1,1314 +0,0 @@
-/**
->HEADER
-    Copyright (c) 2013 Rob Patro robp at cs.cmu.edu
-
-    This file is part of Salmon.
-
-    Salmon is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    Salmon is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with Salmon.  If not, see <http://www.gnu.org/licenses/>.
-<HEADER
-**/
-
-
-#include <algorithm>
-#include <atomic>
-#include <cassert>
-#include <cmath>
-#include <unordered_map>
-#include <map>
-#include <vector>
-#include <unordered_set>
-#include <mutex>
-#include <thread>
-#include <sstream>
-#include <exception>
-#include <random>
-#include <queue>
-#include "btree_map.h"
-#include "btree_set.h"
-
-
-/** BWA Includes */
-#include <cstdio>
-#include <unistd.h>
-#include <cstdlib>
-#include <cstring>
-#include <cctype>
-
-extern "C" {
-#include "bwa.h"
-#include "bwamem.h"
-#include "kvec.h"
-#include "utils.h"
-#include "kseq.h"
-#include "utils.h"
-}
-
-/** Boost Includes */
-#include <boost/filesystem.hpp>
-#include <boost/container/flat_map.hpp>
-#include <boost/dynamic_bitset/dynamic_bitset.hpp>
-#include <boost/range/irange.hpp>
-#include <boost/program_options.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/scoped_ptr.hpp>
-#include <boost/lockfree/queue.hpp>
-#include <boost/thread/thread.hpp>
-
-#include "tbb/concurrent_unordered_set.h"
-#include "tbb/concurrent_vector.h"
-#include "tbb/concurrent_unordered_map.h"
-#include "tbb/concurrent_queue.h"
-#include "tbb/parallel_for.h"
-#include "tbb/parallel_for_each.h"
-#include "tbb/parallel_reduce.h"
-#include "tbb/blocked_range.h"
-#include "tbb/task_scheduler_init.h"
-#include "tbb/partitioner.h"
-
-#if HAVE_LOGGER
-#include "g2logworker.h"
-#include "g2log.h"
-#endif
-
-#include "cereal/types/vector.hpp"
-#include "cereal/archives/binary.hpp"
-
-#include "jellyfish/mer_dna.hpp"
-
-#include "ClusterForest.hpp"
-#include "PerfectHashIndex.hpp"
-#include "LookUpTableUtils.hpp"
-#include "SalmonMath.hpp"
-#include "Transcript.hpp"
-#include "LibraryFormat.hpp"
-#include "SailfishUtils.hpp"
-#include "ReadLibrary.hpp"
-
-#include "PairSequenceParser.hpp"
-
-//#include "google/dense_hash_map"
-
-extern unsigned char nst_nt4_table[256];
-char* bwa_pg = "cha";
-
-typedef pair_sequence_parser<char**> sequence_parser;
-
-using TranscriptID = uint32_t;
-using TranscriptIDVector = std::vector<TranscriptID>;
-using KmerIDMap = std::vector<TranscriptIDVector>;
-using my_mer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;
-
-uint32_t transcript(uint64_t enc) {
-    uint32_t t = (enc & 0xFFFFFFFF00000000) >> 32;
-    return t;
-}
-
-uint32_t offset(uint64_t enc) {
-    uint32_t o = enc & 0xFFFFFFFF;
-    return o;
-}
-
-class Alignment {
-    public:
-        Alignment(TranscriptID transcriptIDIn, uint32_t kCountIn = 1, double logProbIn = salmon::math::LOG_0) :
-            transcriptID_(transcriptIDIn), kmerCount(kCountIn), logProb(logProbIn) {}
-
-        inline TranscriptID transcriptID() { return transcriptID_; }
-        uint32_t kmerCount;
-        double logProb;
-
-    private:
-        TranscriptID transcriptID_;
-};
-
-void processMiniBatch(
-        double logForgettingMass,
-        std::vector<std::vector<Alignment>>& batchHits,
-        std::vector<Transcript>& transcripts,
-        ClusterForest& clusterForest
-        ) {
-
-    using salmon::math::LOG_0;
-    using salmon::math::logAdd;
-    using salmon::math::logSub;
-
-    size_t numTranscripts{transcripts.size()};
-
-    bool burnedIn{true};
-    // Build reverse map from transcriptID => hit id
-    using HitID = uint32_t;
-    btree::btree_map<TranscriptID, std::vector<Alignment*>> hitsForTranscript;
-    size_t hitID{0};
-    for (auto& hv : batchHits) {
-        for (auto& tid : hv) {
-            hitsForTranscript[tid.transcriptID()].push_back(&tid);
-        }
-        ++hitID;
-    }
-
-    double clustTotal = std::log(batchHits.size()) + logForgettingMass;
-    {
-        // E-step
-
-        // Iterate over each group of alignments (a group consists of all alignments reported
-        // for a single read).  Distribute the read's mass proportionally dependent on the
-        // current hits
-        for (auto& alnGroup : batchHits) {
-            if (alnGroup.size() == 0) { continue; }
-            double sumOfAlignProbs{LOG_0};
-            // update the cluster-level properties
-            bool transcriptUnique{true};
-            auto firstTranscriptID = alnGroup.front().transcriptID();
-            std::unordered_set<size_t> observedTranscripts;
-            for (auto& aln : alnGroup) {
-                auto transcriptID = aln.transcriptID();
-                auto& transcript = transcripts[transcriptID];
-                transcriptUnique = transcriptUnique and (transcriptID == firstTranscriptID);
-
-                double refLength = transcript.RefLength > 0 ? transcript.RefLength : 1.0;
-
-                double logFragProb = salmon::math::LOG_1;
-
-                // The alignment probability is the product of a transcript-level term (based on abundance and) an alignment-level
-                // term below which is P(Q_1) * P(Q_2) * P(F | T)
-                double logRefLength = std::log(refLength);
-                double transcriptLogCount = transcript.mass();
-                if ( transcriptLogCount != LOG_0 ) {
-                    double errLike = salmon::math::LOG_1;
-                    if (burnedIn) {
-                        //errLike = errMod.logLikelihood(aln, transcript);
-                    }
-
-                    aln.logProb = std::log(std::pow(aln.kmerCount,2.0)) + (transcriptLogCount - logRefLength);// + qualProb + errLike;
-                    //aln.logProb = (transcriptLogCount - logRefLength);// + qualProb + errLike;
-
-
-                    sumOfAlignProbs = logAdd(sumOfAlignProbs, aln.logProb);
-                    if (observedTranscripts.find(transcriptID) == observedTranscripts.end()) {
-                        transcripts[transcriptID].addTotalCount(1);
-                        observedTranscripts.insert(transcriptID);
-                    }
-                } else {
-                    aln.logProb = LOG_0;
-                }
-            }
-
-            // normalize the hits
-            if (sumOfAlignProbs == LOG_0) { std::cerr << "0 probability fragment; skipping\n"; continue; }
-            for (auto& aln : alnGroup) {
-                aln.logProb -= sumOfAlignProbs;
-                auto transcriptID = aln.transcriptID();
-                auto& transcript = transcripts[transcriptID];
-                /*
-                double r = uni(eng);
-                if (!burnedIn and r < std::exp(aln.logProb)) {
-                    errMod.update(aln, transcript, aln.logProb, logForgettingMass);
-                    if (aln.fragType() == ReadType::PAIRED_END) {
-                        double fragLength = aln.fragLen();//std::abs(aln.read1->core.pos - aln.read2->core.pos) + aln.read2->core.l_qseq;
-                        fragLengthDist.addVal(fragLength, logForgettingMass);
-                    }
-                }
-                */
-
-            } // end normalize
-
-            // update the single target transcript
-            if (transcriptUnique) {
-                transcripts[firstTranscriptID].addUniqueCount(1);
-                clusterForest.updateCluster(firstTranscriptID, 1, logForgettingMass);
-            } else { // or the appropriate clusters
-                clusterForest.mergeClusters<Alignment>(alnGroup.begin(), alnGroup.end());
-                clusterForest.updateCluster(alnGroup.front().transcriptID(), 1, logForgettingMass);
-            }
-
-            } // end read group
-        }// end timer
-
-        double individualTotal = LOG_0;
-        {
-            // M-step
-            double totalMass{0.0};
-            for (auto kv = hitsForTranscript.begin(); kv != hitsForTranscript.end(); ++kv) {
-                auto transcriptID = kv->first;
-                // The target must be a valid transcript
-                if (transcriptID >= numTranscripts or transcriptID < 0) {std::cerr << "index " << transcriptID << " out of bounds\n"; }
-
-                auto& transcript = transcripts[transcriptID];
-
-                // The prior probability
-                double hitMass{LOG_0};
-
-                // The set of alignments that match transcriptID
-                auto& hits = kv->second;
-                std::for_each(hits.begin(), hits.end(), [&](Alignment* aln) -> void {
-                        if (!std::isfinite(aln->logProb)) { std::cerr << "hitMass = " << aln->logProb << "\n"; }
-                        hitMass = logAdd(hitMass, aln->logProb);
-                        });
-
-                double updateMass = logForgettingMass + hitMass;
-                individualTotal = logAdd(individualTotal, updateMass);
-                totalMass = logAdd(totalMass, updateMass);
-                transcript.addMass(updateMass);
-            } // end for
-        } // end timer
-        //if (processedReads >= 5000000 and !burnedIn) { burnedIn = true; }
-}
-
-uint32_t basesCovered(std::vector<uint32_t>& kmerHits) {
-    std::sort(kmerHits.begin(), kmerHits.end());
-    uint32_t covered{0};
-    uint32_t lastHit{0};
-    uint32_t kl{20};
-    for (auto h : kmerHits) {
-        covered += std::min(h - lastHit, kl);
-        lastHit = h;
-    }
-    return covered;
-}
-
-uint32_t basesCovered(std::vector<uint32_t>& posLeft, std::vector<uint32_t>& posRight) {
-    return basesCovered(posLeft) + basesCovered(posRight);
-}
-
-class KmerVote {
-    public:
-        KmerVote(uint32_t vp, uint32_t rp, uint32_t vl) : votePos(vp), readPos(rp), voteLen(vl) {}
-        uint32_t votePos{0};
-        uint32_t readPos{0};
-        uint32_t voteLen{0};
-};
-
-
-class TranscriptHitList {
-    public:
-        uint32_t bestHitPos{0};
-        uint32_t bestHitScore{0};
-        std::vector<KmerVote> votes;
-
-        void addVote(uint32_t tpos, uint32_t readPos, uint32_t voteLen) {
-            uint32_t transcriptPos = (readPos > tpos) ? 0 : tpos - readPos;
-            votes.emplace_back(transcriptPos, readPos, voteLen);
-        }
-
-        void addVoteRC(uint32_t tpos, uint32_t readPos, uint32_t voteLen) {
-            uint32_t transcriptPos = (readPos > tpos) ? 0 : tpos + readPos;
-            votes.emplace_back(transcriptPos, readPos, voteLen);
-        }
-
-
-        uint32_t totalNumHits() { return votes.size(); }
-
-        bool computeBestHit() {
-            std::sort(votes.begin(), votes.end(),
-                    [](const KmerVote& v1, const KmerVote& v2) -> bool {
-                        if (v1.votePos == v2.votePos) {
-                            return v1.readPos < v2.readPos;
-                        }
-                        return v1.votePos < v2.votePos;
-                    });
-
-            /*
-            std::cerr << "(" << votes.size() << ") { ";
-            for (auto v : votes) {
-                std::cerr << v.votePos << " ";
-            }
-            std::cerr << "}\n";
-            */
-            uint32_t maxClusterPos{0};
-            uint32_t maxClusterCount{0};
-
-            struct VoteInfo {
-                uint32_t coverage;
-                uint32_t rightmostBase;
-            };
-
-            boost::container::flat_map<uint32_t, VoteInfo> hitMap;
-            int32_t currClust{votes.front().votePos};
-            for (size_t j = 0; j < votes.size(); ++j) {
-
-                uint32_t votePos = votes[j].votePos;
-                uint32_t readPos = votes[j].readPos;
-                uint32_t voteLen = votes[j].voteLen;
-
-                if (votePos >= currClust) {
-                    if (votePos - currClust > 10) {
-                        currClust = votePos;
-                    }
-                    auto& hmEntry = hitMap[currClust];
-
-                    hmEntry.coverage += std::min(voteLen, (readPos + voteLen) - hmEntry.rightmostBase);
-                    hmEntry.rightmostBase = readPos + voteLen;
-                } else if (votePos < currClust) {
-                    std::cerr << "CHA?!?\n";
-                    if (currClust - votePos > 10) {
-                        currClust = votePos;
-                    }
-                    auto& hmEntry = hitMap[currClust];
-                    hmEntry.coverage += std::min(voteLen, (readPos + voteLen) - hmEntry.rightmostBase);
-                    hmEntry.rightmostBase = readPos + voteLen;
-                }
-
-                if (hitMap[currClust].coverage > maxClusterCount) {
-                    maxClusterCount = hitMap[currClust].coverage;
-                    maxClusterPos = currClust;
-                }
-
-            }
-
-            bestHitPos = maxClusterPos;
-            bestHitScore = maxClusterCount;
-            return true;
-        }
-};
-
-// To use the parser in the following, we get "jobs" until none is
-// available. A job behaves like a pointer to the type
-// jellyfish::sequence_list (see whole_sequence_parser.hpp).
-void add_sizes(sequence_parser* parser, std::atomic<uint64_t>* total_fwd, std::atomic<uint64_t>* total_bwd,
-               std::atomic<uint64_t>& totalHits,
-               std::atomic<uint64_t>& rn,
-               PerfectHashIndex& phi,
-               std::vector<Transcript>& transcripts,
-               std::atomic<uint64_t>& batchNum,
-               double& logForgettingMass,
-               std::mutex& ffMutex,
-               ClusterForest& clusterForest,
-               std::vector<uint64_t>& offsets,
-               std::vector<uint64_t>& kmerLocs,
-               uint32_t sampleRate
-               //google::dense_hash_map<uint64_t, uint64_t>& khash
-               //std::vector<std::vector<uint64_t>>& kmerLocMap
-               ) {
-  uint64_t count_fwd = 0, count_bwd = 0;
-  auto INVALID = phi.INVALID;
-  auto merLen = phi.kmerLength();
-
-  double forgettingFactor{0.65};
-
-  std::vector<std::vector<Alignment>> hitLists;
-  hitLists.resize(5000);
-
-  uint64_t leftHitCount{0};
-  uint64_t hitListCount{0};
-
-  class DirHit {
-      uint32_t fwd{0};
-      uint32_t rev{0};
-  };
-
-  size_t locRead{0};
-  while(true) {
-    sequence_parser::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
-    if(j.is_empty()) break;          // If got nothing, quit
-
-    my_mer kmer;
-    my_mer rkmer;
-
-    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read we got
-
-        hitLists.resize(j->nb_filled);
-
-        /*
-        btree::btree_map<uint64_t, uint32_t> eqLeftFwdPos;
-        btree::btree_map<uint64_t, uint32_t> eqLeftBwdPos;
-        btree::btree_map<uint64_t, uint32_t> eqRightFwdPos;
-        btree::btree_map<uint64_t, uint32_t> eqRightBwdPos;
-        */
-
-        std::unordered_map<uint64_t, TranscriptHitList> leftFwdHits;
-        std::unordered_map<uint64_t, TranscriptHitList> leftBwdHits;
-
-        std::unordered_map<uint64_t, TranscriptHitList> rightFwdHits;
-        std::unordered_map<uint64_t, TranscriptHitList> rightBwdHits;
-
-        uint32_t totFwdLeft = 0;
-        uint32_t totBwdLeft = 0;
-        uint32_t totFwdRight = 0;
-        uint32_t totBwdRight = 0;
-        uint32_t readLength = 0;
-
-        uint32_t leftReadLength{0};
-        uint32_t rightReadLength{0};
-
-        uint64_t prevEQ{std::numeric_limits<uint64_t>::max()};
-
-        count_fwd += j->data[i].first.seq.size();        // Add up the size of the sequence
-        count_bwd += j->data[i].second.seq.size();        // Add up the size of the sequence
-
-        //---------- Left End ----------------------//
-        {
-            const char* start     = j->data[i].first.seq.c_str();
-            uint32_t readLen      = j->data[i].first.seq.size();
-            leftReadLength = readLen;
-            const char* const end = start + readLen;
-            uint32_t cmlen{0};
-            uint32_t rbase{0};
-            uint32_t phase{0};
-            // iterate over the read base-by-base
-            while(start < end) {
-                ++rbase; ++readLength;
-                char base = *start; ++start;
-                auto c = jellyfish::mer_dna::code(base);
-                kmer.shift_left(c);
-                rkmer.shift_right(jellyfish::mer_dna::complement(c));
-                switch(c) {
-                    case jellyfish::mer_dna::CODE_IGNORE:
-                        break;
-                    case jellyfish::mer_dna::CODE_COMMENT:
-                        std::cerr << "ERROR: unexpected character " << base << " in read!\n";
-                        // Fall through
-                    case jellyfish::mer_dna::CODE_RESET:
-                        cmlen = 0;
-                        kmer.polyA(); rkmer.polyA();
-                        break;
-
-                    default:
-                        if(++cmlen >= merLen) {
-                            if (phase++ % sampleRate != 0) { continue; }
-                            cmlen = merLen;
-                            auto merID = phi.index(kmer.get_bits(0, 2*merLen));
-                            //auto merIt = khash.find(kmer.get_bits(0, 2*merLen));
-                            //auto merID = (merIt == khash.end()) ? INVALID : merIt->second;
-
-
-                            if (merID != INVALID) {
-                                // Locations
-                                auto first = offsets[merID];
-                                auto last = offsets[merID+1];
-                                for (size_t ei = first; ei < last; ++ei) {
-                                    uint64_t e = kmerLocs[ei];
-                                    //for (uint64_t e : kmerLocMap[merID]) {
-                                    leftFwdHits[transcript(e)].addVote(offset(e), rbase - merLen, merLen);
-                                }
-                                /*
-                                auto eqClass = memberships[merID];
-                                eqLeftFwdPos[eqClass]++;//.push_back(rbase);
-                                */
-                                totFwdLeft++;
-                            }
-
-                            auto rmerID = phi.index(rkmer.get_bits(0, 2*merLen));
-                            //auto rmerIt = khash.find(rkmer.get_bits(0, 2*merLen));
-                            //auto rmerID = (rmerIt == khash.end()) ? INVALID : rmerIt->second;
-
-                            if (rmerID != INVALID) {
-                                // Locations
-                                auto first = offsets[rmerID];
-                                auto last = offsets[rmerID+1];
-                                for (size_t ei = first ; ei < last; ++ei) {
-                                    uint64_t e = kmerLocs[ei];
-                                    //for (uint64_t e : kmerLocMap[rmerID]) {
-                                    leftFwdHits[transcript(e)].addVoteRC(offset(e), rbase - merLen, merLen);
-                                }
-
-                                /*
-                                auto eqClass = memberships[rmerID];
-                                eqLeftBwdPos[eqClass]++;//.push_back(rbase);
-                                */
-                                totBwdLeft++;
-                            }
-
-                        }
-
-                        } // end switch(c)
-                } // while (start < end)
-        }
-
-        hitLists[i].clear();
-        auto& hitList = hitLists[i];
-        prevEQ = std::numeric_limits<uint64_t>::max();
-        //---------- Right End ----------------------//
-        {
-            kmer.polyA();
-            rkmer.polyA();
-            const char* start     = j->data[i].second.seq.c_str();
-            uint32_t readLen      = j->data[i].second.seq.size();
-            rightReadLength = readLen;
-            const char* const end = start + readLen;
-            uint32_t cmlen{0};
-            uint32_t rbase{0};
-
-            uint32_t phase{0};
-            // iterate over the read base-by-base
-            while(start < end) {
-                ++rbase; ++readLength;
-                char base = *start; ++start;
-                auto c = jellyfish::mer_dna::code(base);
-                kmer.shift_left(c);
-                rkmer.shift_right(jellyfish::mer_dna::complement(c));
-                switch(c) {
-                    case jellyfish::mer_dna::CODE_IGNORE:
-                        break;
-                    case jellyfish::mer_dna::CODE_COMMENT:
-                        std::cerr << "ERROR: unexpected character " << base << " in read!\n";
-                        // Fall through
-                    case jellyfish::mer_dna::CODE_RESET:
-                        cmlen = 0;
-                        kmer.polyA(); rkmer.polyA();
-                        break;
-
-                    default:
-                        if(++cmlen >= merLen) {
-                            if (phase++ % sampleRate != 0) { continue; }
-                            cmlen = merLen;
-                            auto merID = phi.index(kmer.get_bits(0, 2*merLen));
-                            //auto merIt = khash.find(kmer.get_bits(0, 2*merLen));
-                            //auto merID = (merIt == khash.end()) ? INVALID : merIt->second;
-
-                            if (merID != INVALID) {
-                                // Locations
-                                auto first = offsets[merID];
-                                auto last = offsets[merID+1];
-                                for (size_t ei = first; ei < last; ++ei) {
-                                    uint64_t e = kmerLocs[ei];
-                                    //for (uint64_t e : kmerLocMap[merID]) {
-                                     rightFwdHits[transcript(e)].addVote(offset(e), rbase - merLen, merLen);
-                                }
-                                /*
-                                auto eqClass = memberships[merID];
-                                eqRightFwdPos[eqClass]++;//.push_back(rbase);
-                                */
-                                totFwdRight++;
-
-                            }
-
-
-                            auto rmerID = phi.index(rkmer.get_bits(0, 2*merLen));
-                            //auto rmerIt = khash.find(rkmer.get_bits(0, 2*merLen));
-                            //auto rmerID = (rmerIt == khash.end()) ? INVALID : rmerIt->second;
-
-                            if (rmerID != INVALID) {
-                                // Locations
-                                auto first = offsets[rmerID];
-                                auto last = offsets[rmerID+1];
-                                for (size_t ei = first; ei < last; ++ei) {
-                                    uint64_t e = kmerLocs[ei];
-                                    //for (uint64_t e : kmerLocMap[rmerID]) {
-                                    rightFwdHits[transcript(e)].addVoteRC(offset(e), rbase - merLen, merLen);
-                                }
-                                /*
-                                auto eqClass = memberships[rmerID];
-                                eqRightBwdPos[eqClass]++;//.push_back(rbase);
-                                */
-                                totBwdRight++;
-                            }
-
-
-
-                        }
-
-                } // end switch(c)
-            } // while (start < end)
-        }
-
-        /*
-        std::unordered_map<TranscriptID, uint32_t> leftHits;
-        std::unordered_map<TranscriptID, uint32_t> rightHits;
-
-       auto& eqLeftPos = (totFwdLeft >= totBwdLeft) ? eqLeftFwdPos : eqLeftBwdPos;
-       auto& eqRightPos = (totFwdRight >= totBwdRight) ? eqRightFwdPos : eqRightBwdPos;
-
-      for (auto& leqPos: eqLeftPos) {
-          for (auto t : klut[leqPos.first]) {
-              // --- counts
-              leftHits[t] += leqPos.second;//.size();
-
-              // --- positions
-             //auto& thits = leftHits[t];
-             //thits.insert(thits.end(), leqPos.second.begin(), leqPos.second.end());
-         }
-      }
-
-      for (auto& reqPos : eqRightPos) {
-          for (auto t : klut[reqPos.first]) {
-              auto it = leftHits.find(t);
-              if (it != leftHits.end() and it->second > 40) {
-                  // --- counts
-                  rightHits[t] += reqPos.second;//.size();
-
-                  // --- positions
-                  //auto score = basesCovered(it->second);
-                  //if (score < 70) { continue; }
-                  //auto& thits = rightHits[t];
-                  //thits.insert(thits.end(), reqPos.second.begin(), reqPos.second.end());
-              }
-          }
-      }
-
-      uint32_t readHits{0};
-      for (auto& rightHit : rightHits) {
-          if (rightHit.second > 40) {
-              // --- counts
-              uint32_t score = leftHits[rightHit.first] + rightHit.second;
-              hitList.emplace_back(rightHit.first, score);
-              readHits += score;
-
-              // --- positions
-              //auto rscore = basesCovered(rightHit.second);
-               //if (rscore >= 70) {
-              //uint32_t score = leftHits[rightHit.first] + rightHit.second;
-              //hitList.emplace_back(rightHit.first, score);
-              //readHits += score;
-          }
-      }
-      leftHitCount += leftHits.size();
-      hitListCount += hitList.size();
-
-        */
-
-        size_t readHits{0};
-    std::unordered_map<TranscriptID, uint32_t> leftHitCounts;
-    std::unordered_map<TranscriptID, uint32_t> rightHitCounts;
-
-    auto& leftHits = leftFwdHits;//(totFwdLeft >= totBwdLeft) ? leftFwdHits : leftBwdHits;
-    auto& rightHits = rightFwdHits;//(totFwdRight >= totBwdRight) ? rightFwdHits : rightBwdHits;
-
-    for (auto& tHitList : leftHits) {
-        // Coverage score
-        tHitList.second.computeBestHit();
-        //leftHitCounts[tHitList.first] = tHitList.second.totalNumHits();
-        ++leftHitCount;
-    }
-
-    double cutoffLeft{ 0.75 * leftReadLength};
-    double cutoffRight{ 0.75 * rightReadLength};
-
-    double cutoffLeftCount{ 0.80 * leftReadLength - merLen + 1};
-    double cutoffRightCount{ 0.80 * rightReadLength - merLen + 1};
-    for (auto& tHitList : rightHits) {
-        auto it = leftHits.find(tHitList.first);
-        // Simple counting score
-        /*
-        if (it != leftHits.end() and it->second.totalNumHits() >= cutoffLeftCount) {
-            if (tHitList.second.totalNumHits() < cutoffRightCount) { continue; }
-            uint32_t score = it->second.totalNumHits() + tHitList.second.totalNumHits();
-        */
-        // Coverage score
-        if (it != leftHits.end() and it->second.bestHitScore >= cutoffLeft) {
-            tHitList.second.computeBestHit();
-            if (tHitList.second.bestHitScore < cutoffRight) { continue; }
-            uint32_t score = it->second.bestHitScore + tHitList.second.bestHitScore;
-
-
-            hitList.emplace_back(tHitList.first, score);
-            readHits += score;
-            ++hitListCount;
-        }
-    }
-
-
-      totalHits += hitList.size() > 0;
-      locRead++;
-      ++rn;
-      if (rn % 50000 == 0) {
-          std::cerr << "\r\rprocessed read "  << rn;
-          std::cerr << "\n leftHits.size() " << leftHits.size()
-                    << ", rightHits.size() " << rightHits.size() << ", hit list of size = " << hitList.size() << "\n";
-          std::cerr << "average leftHits = " << leftHitCount / static_cast<float>(locRead)
-                    << ", average hitList = " << hitListCount / static_cast<float>(locRead) << "\n";
-      }
-
-      if (hitList.size() > 100) { hitList.clear(); }
-
-      double invHits = 1.0 / readHits;
-      for (auto t : hitList) {
-          transcripts[t.transcriptID()].addSharedCount( t.kmerCount * invHits );
-      }
-
-    } // end for i < j->nb_filled
-
-    auto oldBatchNum = batchNum++;
-    if (oldBatchNum > 1) {
-        ffMutex.lock();
-        logForgettingMass += forgettingFactor * std::log(static_cast<double>(oldBatchNum-1)) -
-        std::log(std::pow(static_cast<double>(oldBatchNum), forgettingFactor) - 1);
-        ffMutex.unlock();
-    }
-
-    //
-    processMiniBatch(logForgettingMass, hitLists, transcripts, clusterForest);
-
-  }
-
-  *total_fwd += count_fwd;
-  *total_bwd += count_bwd;
-}
-
-// To use the parser in the following, we get "jobs" until none is
-// available. A job behaves like a pointer to the type
-// jellyfish::sequence_list (see whole_sequence_parser.hpp).
-void processReadsMEM(sequence_parser* parser, std::atomic<uint64_t>* total_fwd, std::atomic<uint64_t>* total_bwd,
-               std::atomic<uint64_t>& totalHits,
-               std::atomic<uint64_t>& rn,
-               bwaidx_t *idx,
-               std::vector<Transcript>& transcripts,
-               std::atomic<uint64_t>& batchNum,
-               double& logForgettingMass,
-               std::mutex& ffMutex,
-               ClusterForest& clusterForest
-               ) {
-  uint64_t count_fwd = 0, count_bwd = 0;
-
-  double forgettingFactor{0.65};
-
-  std::vector<std::vector<Alignment>> hitLists;
-  hitLists.resize(5000);
-
-  uint64_t leftHitCount{0};
-  uint64_t hitListCount{0};
-
-  class DirHit {
-      uint32_t fwd{0};
-      uint32_t rev{0};
-  };
-
-
-  smem_i *itr = smem_itr_init(idx->bwt);
-  const bwtintv_v *a;
-  int minLen{20};
-  int minIWidth{20};
-  int splitWidth{0};
-
-  size_t locRead{0};
-  while(true) {
-    sequence_parser::job j(*parser); // Get a job from the parser: a bunch of read (at most max_read_group)
-    if(j.is_empty()) break;          // If got nothing, quit
-
-    my_mer kmer;
-    my_mer rkmer;
-
-    for(size_t i = 0; i < j->nb_filled; ++i) { // For all the read we got
-
-        hitLists.resize(j->nb_filled);
-
-        std::unordered_map<uint64_t, TranscriptHitList> leftFwdHits;
-        std::unordered_map<uint64_t, TranscriptHitList> leftBwdHits;
-
-        std::unordered_map<uint64_t, TranscriptHitList> rightFwdHits;
-        std::unordered_map<uint64_t, TranscriptHitList> rightBwdHits;
-
-        uint32_t totFwdLeft = 0;
-        uint32_t totBwdLeft = 0;
-        uint32_t totFwdRight = 0;
-        uint32_t totBwdRight = 0;
-        uint32_t readLength = 0;
-
-        uint32_t leftReadLength{0};
-        uint32_t rightReadLength{0};
-
-        uint64_t prevEQ{std::numeric_limits<uint64_t>::max()};
-
-        count_fwd += j->data[i].first.seq.size();        // Add up the size of the sequence
-        count_bwd += j->data[i].second.seq.size();        // Add up the size of the sequence
-
-        //---------- Left End ----------------------//
-        {
-            std::string& readStr   = j->data[i].first.seq;
-            uint32_t readLen      = j->data[i].first.seq.size();
-
-            for (int p = 0; p < readLen; ++p) {
-                readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
-            }
-
-            char* readPtr = const_cast<char*>(readStr.c_str());
-
-            smem_set_query(itr, readLen, reinterpret_cast<uint8_t*>(readPtr));
-
-            // while there are more matches on the query
-            while ((a = smem_next(itr, minLen<<1, splitWidth)) != 0) {
-                for (size_t mi = 0; mi < a->n; ++mi) {
-                    bwtintv_t *p = &a->a[mi];
-                    if (static_cast<uint32_t>(p->info) - (p->info>>32) < minLen) continue;
-                    uint32_t qstart = static_cast<uint32_t>(p->info>>32);
-                    uint32_t qend = static_cast<uint32_t>(p->info);
-                    long numHits = static_cast<long>(p->x[2]);
-
-                    if ( p->x[2] <= minIWidth) {
-                        for (bwtint_t k = 0; k < p->x[2]; ++k) {
-                            bwtint_t pos;
-                            int len, isRev, refID;
-                            len = static_cast<uint32_t>(p->info - (p->info >> 32));
-                            pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &isRev);
-                            if (isRev) { pos -= len - 1; }
-                            bns_cnt_ambi(idx->bns, pos, len, &refID);
-                            long hitLoc = static_cast<long>(pos - idx->bns->anns[refID].offset) + 1;
-                            leftFwdHits[refID].addVote(hitLoc, hitLoc, len);
-                        } // for k
-                    } // if <= minIWidth
-
-                } // for mi
-            } // for all query matches
-        }
-
-        hitLists[i].clear();
-        auto& hitList = hitLists[i];
-        prevEQ = std::numeric_limits<uint64_t>::max();
-        //---------- Right End ----------------------//
-        {
-            std::string& readStr   = j->data[i].second.seq;
-            uint32_t readLen      = j->data[i].second.seq.size();
-
-            for (int p = 0; p < readLen; ++p) {
-                readStr[p] = nst_nt4_table[static_cast<int>(readStr[p])];
-            }
-
-            char* readPtr = const_cast<char*>(readStr.c_str());
-            smem_set_query(itr, readLen, reinterpret_cast<uint8_t*>(readPtr));
-
-            // while there are more matches on the query
-            while ((a = smem_next(itr, minLen<<1, splitWidth)) != 0) {
-                for (size_t mi = 0; mi < a->n; ++mi) {
-                    bwtintv_t *p = &a->a[mi];
-                    if (static_cast<uint32_t>(p->info) - (p->info>>32) < minLen) continue;
-                    uint32_t qstart = static_cast<uint32_t>(p->info>>32);
-                    uint32_t qend = static_cast<uint32_t>(p->info);
-                    long numHits = static_cast<long>(p->x[2]);
-
-                    if ( p->x[2] <= minIWidth) {
-                        for (bwtint_t k = 0; k < p->x[2]; ++k) {
-                            bwtint_t pos;
-                            int len, isRev, refID;
-                            len = static_cast<uint32_t>(p->info - (p->info >> 32));
-                            pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &isRev);
-                            if (isRev) { pos -= len - 1; }
-                            bns_cnt_ambi(idx->bns, pos, len, &refID);
-                            long hitLoc = static_cast<long>(pos - idx->bns->anns[refID].offset) + 1;
-                            rightFwdHits[refID].addVote(hitLoc, hitLoc, len);
-                        } // for k
-                    } // if <= minIWidth
-
-                } // for mi
-            } // for all query matches
-
-        } // end right
-
-        size_t readHits{0};
-        std::unordered_map<TranscriptID, uint32_t> leftHitCounts;
-        std::unordered_map<TranscriptID, uint32_t> rightHitCounts;
-
-        auto& leftHits = leftFwdHits;
-        auto& rightHits = rightFwdHits;
-
-        for (auto& tHitList : leftHits) {
-            // Coverage score
-            tHitList.second.computeBestHit();
-            ++leftHitCount;
-        }
-
-        double cutoffLeft{ 0.75 * leftReadLength};
-        double cutoffRight{ 0.75 * rightReadLength};
-
-        for (auto& tHitList : rightHits) {
-            auto it = leftHits.find(tHitList.first);
-            // Coverage score
-            if (it != leftHits.end() and it->second.bestHitScore >= cutoffLeft) {
-                tHitList.second.computeBestHit();
-                if (tHitList.second.bestHitScore < cutoffRight) { continue; }
-                uint32_t score = it->second.bestHitScore + tHitList.second.bestHitScore;
-                hitList.emplace_back(tHitList.first, score);
-                readHits += score;
-                ++hitListCount;
-            }
-        }
-
-        totalHits += hitList.size() > 0;
-        locRead++;
-        ++rn;
-        if (rn % 50000 == 0) {
-            std::cerr << "\r\rprocessed read "  << rn;
-            std::cerr << "\n leftHits.size() " << leftHits.size()
-                << ", rightHits.size() " << rightHits.size() << ", hit list of size = " << hitList.size() << "\n";
-//            std::cerr << "average leftHits = " << leftHitCount / static_cast<float>(locRead)
-//               << ", average hitList = " << hitListCount / static_cast<float>(locRead) << "\n";
-        }
-
-        if (hitList.size() > 100) { hitList.clear(); }
-
-        double invHits = 1.0 / readHits;
-        for (auto t : hitList) {
-            transcripts[t.transcriptID()].addSharedCount( t.kmerCount * invHits );
-        }
-
-        } // end for i < j->nb_filled
-
-    auto oldBatchNum = batchNum++;
-    if (oldBatchNum > 1) {
-        ffMutex.lock();
-        logForgettingMass += forgettingFactor * std::log(static_cast<double>(oldBatchNum-1)) -
-        std::log(std::pow(static_cast<double>(oldBatchNum), forgettingFactor) - 1);
-        ffMutex.unlock();
-    }
-
-    processMiniBatch(logForgettingMass, hitLists, transcripts, clusterForest);
-  }
-  smem_itr_destroy(itr);
-  *total_fwd += count_fwd;
-  *total_bwd += count_bwd;
-}
-
-
-
-int salmonQuantify(int argc, char *argv[]) {
-    using std::cerr;
-    using std::vector;
-    using std::string;
-    namespace bfs = boost::filesystem;
-    namespace po = boost::program_options;
-
-    uint32_t maxThreads = std::thread::hardware_concurrency();
-    uint32_t sampleRate{1};
-
-    vector<string> unmatedReadFiles;
-    vector<string> mate1ReadFiles;
-    vector<string> mate2ReadFiles;
-
-    po::options_description generic("salmon quant options");
-    generic.add_options()
-    ("version,v", "print version string")
-    ("help,h", "produce help message")
-    ("index,i", po::value<string>(), "sailfish index.")
-    ("libtype,l", po::value<std::string>(), "Format string describing the library type")
-    ("unmated_reads,r", po::value<vector<string>>(&unmatedReadFiles)->multitoken(),
-     "List of files containing unmated reads of (e.g. single-end reads)")
-    ("mates1,1", po::value<vector<string>>(&mate1ReadFiles)->multitoken(),
-        "File containing the #1 mates")
-    ("mates2,2", po::value<vector<string>>(&mate2ReadFiles)->multitoken(),
-        "File containing the #2 mates")
-    ("threads,p", po::value<uint32_t>()->default_value(maxThreads), "The number of threads to use concurrently.")
-    ("sample,s", po::value<uint32_t>(&sampleRate)->default_value(1), "Sample rate --- only consider every s-th k-mer in a read.")
-    ("output,o", po::value<std::string>(), "Output quantification file");
-
-    po::variables_map vm;
-    try {
-        auto orderedOptions = po::command_line_parser(argc,argv).
-            options(generic).run();
-
-        po::store(orderedOptions, vm);
-
-        if ( vm.count("help") ) {
-            auto hstring = R"(
-Quant
-==========
-Perform streaming k-mer-group-based estimation of
-transcript abundance from RNA-seq reads
-)";
-            std::cout << hstring << std::endl;
-            std::cout << generic << std::endl;
-            std::exit(1);
-        }
-
-        po::notify(vm);
-
-
-        for (auto& opt : orderedOptions.options) {
-            std::cerr << "[ " << opt.string_key << "] => {";
-            for (auto& val : opt.value) {
-                std::cerr << " " << val;
-            }
-            std::cerr << " }\n";
-        }
-
-        bfs::path outputDirectory(vm["output"].as<std::string>());
-        bfs::create_directory(outputDirectory);
-        if (!(bfs::exists(outputDirectory) and bfs::is_directory(outputDirectory))) {
-            std::cerr << "Couldn't create output directory " << outputDirectory << "\n";
-            std::cerr << "exiting\n";
-            std::exit(1);
-        }
-
-        bfs::path indexDirectory(vm["index"].as<string>());
-        bfs::path logDirectory = outputDirectory.parent_path() / "logs";
-
-#if HAVE_LOGGER
-        bfs::create_directory(logDirectory);
-        if (!(bfs::exists(logDirectory) and bfs::is_directory(logDirectory))) {
-            std::cerr << "Couldn't create log directory " << logDirectory << "\n";
-            std::cerr << "exiting\n";
-            std::exit(1);
-        }
-        std::cerr << "writing logs to " << logDirectory.string() << "\n";
-        g2LogWorker logger(argv[0], logDirectory.string());
-        g2::initializeLogging(&logger);
-#endif
-
-        vector<ReadLibrary> readLibraries;
-        for (auto& opt : orderedOptions.options) {
-            if (opt.string_key == "libtype") {
-                LibraryFormat libFmt = salmon::utils::parseLibraryFormatString(opt.value[0]);
-                if (libFmt.check()) {
-                    std::cerr << libFmt << "\n";
-                } else {
-                    std::stringstream ss;
-                    ss << libFmt << " is invalid!";
-                    throw std::invalid_argument(ss.str());
-                }
-                readLibraries.emplace_back(libFmt);
-            } else if (opt.string_key == "mates1") {
-                readLibraries.back().addMates1(opt.value);
-            } else if (opt.string_key == "mates2") {
-                readLibraries.back().addMates2(opt.value);
-            } else if (opt.string_key == "unmated_reads") {
-                readLibraries.back().addUnmated(opt.value);
-            }
-        }
-
-        for (auto& rl : readLibraries) { rl.checkValid(); }
-
-        auto& rl = readLibraries.front();
-        // Handle all the proper cases here
-        char* readFiles[] = { const_cast<char*>(rl.mates1().front().c_str()),
-                              const_cast<char*>(rl.mates2().front().c_str()) };
-
-        uint32_t nbThreads = vm["threads"].as<uint32_t>();
-
-        size_t maxReadGroup{2000}; // Number of files to read simultaneously
-        size_t concurrentFile{1}; // Number of reads in each "job"
-        sequence_parser parser(4 * nbThreads, maxReadGroup, concurrentFile,
-                readFiles, readFiles + 2);
-
-        // kmer-based
-        /*
-        bfs::path sfIndexPath = indexDirectory / "transcriptome.sfi";
-        std::string sfTrascriptIndexFile(sfIndexPath.string());
-        std::cerr << "reading index . . . ";
-        auto phi = PerfectHashIndex::fromFile(sfTrascriptIndexFile);
-        std::cerr << "done\n";
-        std::cerr << "index contained " << phi.numKeys() << " kmers\n";
-
-        size_t nkeys = phi.numKeys();
-        size_t merLen = phi.kmerLength();
-        */
-
-        /*
-        google::dense_hash_map<uint64_t, uint64_t> khash;
-        khash.set_empty_key(std::numeric_limits<uint64_t>::max());
-        uint64_t i{0};
-        for (auto k : phi.kmers()) {
-            khash[k] = phi.index(k);
-        }
-        size_t merLen = 20;
-        std::cerr << "kmer length = " << merLen << "\n";
-        my_mer::k(merLen);
-
-        bfs::path tlutPath = indexDirectory / "transcriptome.tlut";
-        // Get transcript lengths
-        std::ifstream ifile(tlutPath.string(), std::ios::binary);
-        size_t numRecords {0};
-        ifile.read(reinterpret_cast<char *>(&numRecords), sizeof(numRecords));
-
-        std::vector<Transcript> transcripts_tmp;
-
-        std::cerr << "Transcript LUT contained " << numRecords << " records\n";
-        //transcripts_.resize(numRecords);
-        for (auto i : boost::irange(size_t(0), numRecords)) {
-            auto ti = LUTTools::readTranscriptInfo(ifile);
-            // copy over the length, then we're done.
-            transcripts_tmp.emplace_back(ti->transcriptID, ti->name.c_str(), ti->length);
-        }
-
-        std::sort(transcripts_tmp.begin(), transcripts_tmp.end(),
-                [](const Transcript& t1, const Transcript& t2) -> bool {
-                return t1.id < t2.id;
-                });
-        std::vector<Transcript> transcripts_;
-        for (auto& t : transcripts_tmp) {
-            transcripts_.emplace_back(t.id, t.RefName.c_str(), t.RefLength);
-        }
-        transcripts_tmp.clear();
-        ifile.close();
-        // --- done ---
-
-        */
-
-        bwaidx_t *idx;
-
-        { // mem-based
-            bfs::path indexPath = indexDirectory / "bwaidx";
-            if ((idx = bwa_idx_load(indexPath.string().c_str(), BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
-        }
-
-        size_t numRecords = idx->bns->n_seqs;
-        std::vector<Transcript> transcripts_tmp;
-
-        std::cerr << "Transcript LUT contained " << numRecords << " records\n";
-        //transcripts_.resize(numRecords);
-        for (auto i : boost::irange(size_t(0), numRecords)) {
-            uint32_t id = i;
-            char* name = idx->bns->anns[i].name;
-            uint32_t len = idx->bns->anns[i].len;
-            // copy over the length, then we're done.
-            transcripts_tmp.emplace_back(id, name, len);
-        }
-
-        std::sort(transcripts_tmp.begin(), transcripts_tmp.end(),
-                [](const Transcript& t1, const Transcript& t2) -> bool {
-                return t1.id < t2.id;
-                });
-        std::vector<Transcript> transcripts_;
-        for (auto& t : transcripts_tmp) {
-            transcripts_.emplace_back(t.id, t.RefName.c_str(), t.RefLength);
-        }
-        transcripts_tmp.clear();
-        // --- done ---
-
-        /*
-        { // k-mer based
-        bfs::path locPath = indexDirectory / "fullLookup.kmap";
-        // Make sure we have the file we need to look up k-mer hits
-        if (!boost::filesystem::exists(locPath)) {
-            std::cerr << "could not find k-mer location index; expected at " << locPath << "\n";
-            std::cerr << "please ensure that you've run salmon index before attempting to run salmon quant\n";
-            std::exit(1);
-        }
-
-        std::cerr << "Loading k-mer location index from " << locPath << " . . .";
-
-        //std::vector<std::vector<uint64_t>> kmerLocMap;
-        std::vector<uint64_t> offsets;
-        std::vector<uint64_t> kmerLocs;
-
-        std::ifstream binstream(locPath.string(), std::ios::binary);
-        cereal::BinaryInputArchive archive(binstream);
-        //archive(kmerLocMap);
-        archive(offsets, kmerLocs);
-        binstream.close();
-        std::cerr << "done\n";
-        */
-        std::vector<std::thread> threads;
-        std::atomic<uint64_t> total_fwd(0);
-        std::atomic<uint64_t> total_bwd(0);
-        std::atomic<uint64_t> totalHits(0);
-        std::atomic<uint64_t> rn{0};
-        std::atomic<uint64_t> batchNum{0};
-
-        size_t numTranscripts{transcripts_.size()};
-        ClusterForest clusterForest(numTranscripts, transcripts_);
-
-        double logForgettingMass{salmon::math::LOG_1};
-        std::mutex ffmutex;
-        for(int i = 0; i < nbThreads; ++i)  {
-            std::cerr << "here\n";
-            /*
-            threads.push_back(std::thread(add_sizes, &parser, &total_fwd, &total_bwd,
-                        std::ref(totalHits), std::ref(rn),
-                        std::ref(phi),
-                        std::ref(transcripts_),
-                        std::ref(batchNum),
-                        std::ref(logForgettingMass),
-                        std::ref(ffmutex),
-                        std::ref(clusterForest),
-                        std::ref(offsets),
-                        std::ref(kmerLocs),
-                        sampleRate
-                        //std::ref(khash)
-                        //std::ref(kmerLocMap)
-                        ));
-            */
-            threads.push_back(std::thread(processReadsMEM, &parser, &total_fwd, &total_bwd,
-                        std::ref(totalHits), std::ref(rn),
-                        idx,
-                        std::ref(transcripts_),
-                        std::ref(batchNum),
-                        std::ref(logForgettingMass),
-                        std::ref(ffmutex),
-                        std::ref(clusterForest)
-                        ));
-        }
-
-        for(int i = 0; i < nbThreads; ++i)
-            threads[i].join();
-
-        std::cerr << "\n\n";
-        std::cerr << "processed " << rn << " total reads\n";
-        std::cout << "Total bases: " << total_fwd << " " << total_bwd << "\n";
-        std::cout << "Had a hit for " << totalHits  / static_cast<double>(rn) * 100.0 << "% of the reads\n";
-
-        size_t tnum{0};
-
-        std::cerr << "writing output \n";
-
-        bfs::path outputFile = outputDirectory / "quant.sf";
-        std::ofstream output(outputFile.string());
-        output << "# SDAFish v0.01\n";
-        output << "# ClusterID\tName\tLength\tFPKM\tNumReads\n";
-
-        const double logBillion = std::log(1000000000.0);
-        const double logNumFragments = std::log(static_cast<double>(rn));
-        auto clusters = clusterForest.getClusters();
-        size_t clusterID = 0;
-        for(auto cptr : clusters) {
-            double logClusterMass = cptr->logMass();
-            double logClusterCount = std::log(static_cast<double>(cptr->numHits()));
-
-            if (logClusterMass == salmon::math::LOG_0) {
-                std::cerr << "Warning: cluster " << clusterID << " has 0 mass!\n";
-            }
-
-            bool requiresProjection{false};
-
-            auto& members = cptr->members();
-            size_t clusterSize{0};
-            for (auto transcriptID : members) {
-                Transcript& t = transcripts_[transcriptID];
-                t.uniqueCounts = t.uniqueCount();
-                t.totalCounts = t.totalCount();
-            }
-
-            for (auto transcriptID : members) {
-                Transcript& t = transcripts_[transcriptID];
-                double logTranscriptMass = t.mass();
-                double logClusterFraction = logTranscriptMass - logClusterMass;
-                t.projectedCounts = std::exp(logClusterFraction + logClusterCount);
-                requiresProjection |= t.projectedCounts > static_cast<double>(t.totalCounts) or
-                    t.projectedCounts < static_cast<double>(t.uniqueCounts);
-                ++clusterSize;
-            }
-
-            if (clusterSize > 1 and requiresProjection) {
-                cptr->projectToPolytope(transcripts_);
-            }
-
-            // Now posterior has the transcript fraction
-            size_t tidx = 0;
-            for (auto transcriptID : members) {
-                auto& transcript = transcripts_[transcriptID];
-                double logLength = std::log(transcript.RefLength);
-                double fpkmFactor = std::exp(logBillion - logLength - logNumFragments);
-                double count = transcripts_[transcriptID].projectedCounts;
-                double countTotal = transcripts_[transcriptID].totalCounts;
-                double countUnique = transcripts_[transcriptID].uniqueCounts;
-                double fpkm = count > 0 ? fpkmFactor * count : 0.0;
-                output << clusterID << '\t' << transcript.RefName << '\t' <<
-                    transcript.RefLength << '\t' << fpkm << '\t' << countTotal << '\t' << countUnique << '\t' << count <<  '\t' << transcript.mass() << '\n';
-                ++tidx;
-            }
-
-            ++clusterID;
-        }
-        output.close();
-
-    } catch (po::error &e) {
-        std::cerr << "Exception : [" << e.what() << "]. Exiting.\n";
-        std::exit(1);
-    } catch (std::exception& e) {
-        std::cerr << "Exception : [" << e.what() << "]\n";
-        std::cerr << argv[0] << " quant was invoked improperly.\n";
-        std::cerr << "For usage information, try " << argv[0] << " quant --help\nExiting.\n";
-    }
-
-
-    return 0;
-}
-
-

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/salmon.git



More information about the debian-med-commit mailing list