[med-svn] [Git][med-team/kallisto][upstream] New upstream version 0.46.2+dfsg

Steffen Möller gitlab at salsa.debian.org
Fri May 15 12:14:47 BST 2020



Steffen Möller pushed to branch upstream at Debian Med / kallisto


Commits:
53c35c62 by Steffen Moeller at 2020-05-15T13:11:58+02:00
New upstream version 0.46.2+dfsg
- - - - -


20 changed files:

- CMakeLists.txt
- src/BUSData.cpp
- src/BUSData.h
- src/Bootstrap.cpp
- src/Bootstrap.h
- src/CMakeLists.txt
- src/H5Writer.cpp
- src/H5Writer.h
- src/ProcessReads.cpp
- src/ProcessReads.h
- src/PseudoBam.cpp
- src/PseudoBam.h
- src/common.h
- src/h5utils.cpp
- src/h5utils.h
- − src/kseq.h
- src/main.cpp
- test/Snakefile
- + test/sc_reads_1.fastq.gz
- + test/sc_reads_2.fastq.gz


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -4,6 +4,13 @@ project(kallisto)
 
 include(GNUInstallDirs)
 
+
+option(USE_HDF5 "Compile with HDF5 support" OFF) #OFF by default
+
+if(USE_HDF5)
+    add_compile_definitions("USE_HDF5=ON")
+endif(USE_HDF5)
+
 set(EXT_PROJECTS_DIR ${PROJECT_SOURCE_DIR}/ext)
 set(CMAKE_CXX_FLAGS_PROFILE "-g")
 


=====================================
src/BUSData.cpp
=====================================
@@ -33,4 +33,21 @@ uint64_t stringToBinary(const char* s, const size_t len, uint32_t &flag) {
     flag = (numN & 3) | (posN & 31) << 2;
   }
   return r;
+}
+
+std::string binaryToString(uint64_t x, size_t len) {
+  std::string s(len, 'N');
+  size_t sh = len-1;
+  for (size_t i = 0; i < len; i++) {
+    char c = 'N';
+    switch((x >> (2*sh)) & 0x03ULL) {
+      case 0x00: c = 'A'; break;
+      case 0x01: c = 'C'; break;
+      case 0x02: c = 'G'; break;
+      case 0x03: c = 'T'; break;
+    }
+    sh--;
+    s.at(i) = c;
+  }
+  return std::move(s);
 }
\ No newline at end of file


=====================================
src/BUSData.h
=====================================
@@ -37,5 +37,5 @@ struct BUSData {
 
 uint64_t stringToBinary(const std::string &s, uint32_t &flag);
 uint64_t stringToBinary(const char* s, const size_t len, uint32_t &flag);
-
+std::string binaryToString(uint64_t x, size_t len);
 #endif // KALLISTO_BUSDATA_H
\ No newline at end of file


=====================================
src/Bootstrap.cpp
=====================================
@@ -1,4 +1,5 @@
 #include "Bootstrap.h"
+#include "PlaintextWriter.h"
 
 EMAlgorithm Bootstrap::run_em() {
     auto counts = mult_.sample();
@@ -19,7 +20,7 @@ BootstrapThreadPool::BootstrapThreadPool(
     const MinCollector& tc,
     const std::vector<double>& eff_lens,
     const ProgramOptions& p_opts,
-    H5Writer& h5writer,
+    BootstrapWriter  *bswriter,
     const std::vector<double>& mean_fls
     ) :
   n_threads_(n_threads),
@@ -30,7 +31,7 @@ BootstrapThreadPool::BootstrapThreadPool(
   tc_(tc),
   eff_lens_(eff_lens),
   opt_(p_opts),
-  writer_(h5writer),
+  writer_(bswriter),
   mean_fls_(mean_fls)
 {
   for (size_t i = 0; i < n_threads_; ++i) {
@@ -78,7 +79,7 @@ void BootstrapWorker::operator() (){
       std::unique_lock<std::mutex> lock(pool_.write_lock_);
       ++pool_.n_complete_;
       std::cerr << "[bstrp] number of EM bootstraps complete: " << pool_.n_complete_ << "\r";
-      pool_.writer_.write_bootstrap(res, cur_id);
+      pool_.writer_->write_bootstrap(res, cur_id);
       // release write lock
     } else {
       // can write out plaintext in parallel


=====================================
src/Bootstrap.h
=====================================
@@ -9,7 +9,7 @@
 #include "weights.h"
 #include "EMAlgorithm.h"
 #include "Multinomial.hpp"
-#include "H5Writer.h"
+
 
 class Bootstrap {
     // needs:
@@ -49,6 +49,21 @@ private:
   const ProgramOptions& opt;
 };
 
+class BootstrapWriter {
+  public:
+    virtual ~BootstrapWriter() {};
+
+    virtual void init(const std::string& fname, int num_bootstrap, int num_processed,
+      const std::vector<int>& fld, const std::vector<int>& preBias, const std::vector<double>& postBias, uint compression, size_t index_version,
+      const std::string& shell_call, const std::string& start_time) = 0;
+
+    virtual void write_main(const EMAlgorithm& em,
+        const std::vector<std::string>& targ_ids,
+        const std::vector<int>& lengths) = 0;
+
+    virtual void write_bootstrap(const EMAlgorithm& em, int bs_id) = 0;
+};
+
 class BootstrapThreadPool {
   friend class BootstrapWorker;
 
@@ -61,7 +76,7 @@ class BootstrapThreadPool {
         const MinCollector& tc,
         const std::vector<double>& eff_lens,
         const ProgramOptions& p_opts,
-        H5Writer& h5writer,
+        BootstrapWriter *bswriter,
         const std::vector<double>& mean_fls
         );
 
@@ -84,7 +99,7 @@ class BootstrapThreadPool {
     const MinCollector& tc_;
     const std::vector<double>& eff_lens_;
     const ProgramOptions& opt_;
-    H5Writer& writer_;
+    BootstrapWriter *writer_;
     const std::vector<double>& mean_fls_;
 };
 


=====================================
src/CMakeLists.txt
=====================================
@@ -27,23 +27,29 @@ if(LINK MATCHES static)
     SET_TARGET_PROPERTIES(kallisto kallisto_core PROPERTIES LINK_SEARCH_END_STATIC 1)
 endif(LINK MATCHES static)
 
-find_package( HDF5 REQUIRED )
+
+if(USE_HDF5)
+    find_package( HDF5 REQUIRED )
+endif(USE_HDF5)
+
 find_package( ZLIB REQUIRED )
 
 if ( ZLIB_FOUND )
     include_directories( ${ZLIB_INCLUDE_DIRS} )
+    target_link_libraries(kallisto kallisto_core ${ZLIB_LIBRARIES})
 else()
     message(FATAL_ERROR "zlib not found. Required for to output files" )
 endif( ZLIB_FOUND )
 
-if(HDF5_FOUND)
-    include_directories( ${HDF5_INCLUDE_DIRS} )
-    target_link_libraries( kallisto_core ${HDF5_LIBRARIES} )
-    target_link_libraries( kallisto ${HDF5_LIBRARIES} )
-else()
-    message(FATAL_ERROR "HDF5 not found. Required to output files")
-endif()
-
+if(USE_HDF5)
+    if(HDF5_FOUND)
+        include_directories( ${HDF5_INCLUDE_DIRS} )
+        target_link_libraries( kallisto_core ${HDF5_LIBRARIES} )
+        target_link_libraries( kallisto ${HDF5_LIBRARIES} )
+    else()
+        message(FATAL_ERROR "HDF5 not found. Required to output files")
+    endif()
+endif(USE_HDF5)
 
 if(LINK MATCHES static)
     if (UNIX AND NOT APPLE)


=====================================
src/H5Writer.cpp
=====================================
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include "H5Writer.h"
 
 void H5Writer::init(const std::string& fname, int num_bootstrap, int num_processed,
@@ -201,3 +202,5 @@ void H5Converter::rw_from_counts(hid_t group_id, const std::string& count_name,
 
   plaintext_writer(out_fname, targ_ids_, alpha, eff_lengths_, lengths_);  
 }
+
+#endif // USE_HDF5
\ No newline at end of file


=====================================
src/H5Writer.h
=====================================
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #ifndef KALLISTO_H5WRITER_H
 #define KALLISTO_H5WRITER_H
 
@@ -5,21 +6,22 @@
 
 #include "h5utils.h"
 #include "PlaintextWriter.h"
+#include "Bootstrap.h"
 
-class H5Writer {
+class H5Writer : public BootstrapWriter {
   public:
     H5Writer() : primed_(false) {}
     ~H5Writer();
 
-    void init(const std::string& fname, int num_bootstrap, int num_processed,
+    virtual void init(const std::string& fname, int num_bootstrap, int num_processed,
       const std::vector<int>& fld, const std::vector<int>& preBias, const std::vector<double>& postBias, uint compression, size_t index_version,
       const std::string& shell_call, const std::string& start_time);
 
-    void write_main(const EMAlgorithm& em,
+    virtual void write_main(const EMAlgorithm& em,
         const std::vector<std::string>& targ_ids,
         const std::vector<int>& lengths);
 
-    void write_bootstrap(const EMAlgorithm& em, int bs_id);
+    virtual void write_bootstrap(const EMAlgorithm& em, int bs_id);
 
   private:
     bool primed_;
@@ -76,3 +78,4 @@ class H5Converter {
 };
 
 #endif // KALLISTO_H5WRITER_H
+#endif // USE_HDF5
\ No newline at end of file


=====================================
src/ProcessReads.cpp
=====================================
@@ -167,6 +167,9 @@ int64_t ProcessReads(MasterProcessor& MP, const  ProgramOptions& opt) {
 
   // for each file
   std::cerr << "[quant] finding pseudoalignments for the reads ..."; std::cerr.flush();
+  if (opt.verbose) {
+    std::cerr << std::endl;
+  }
 
   /*if (opt.pseudobam) {
     bam_hdr_t *t = createPseudoBamHeader(index);
@@ -177,7 +180,11 @@ int64_t ProcessReads(MasterProcessor& MP, const  ProgramOptions& opt) {
   MP.processReads();
   numreads = MP.numreads;
   nummapped = MP.nummapped;
-  std::cerr << " done" << std::endl;
+  if (opt.verbose) {
+    std::cerr << std::endl << "[quant] done " << std::endl;
+  } else {
+    std::cerr << " done" << std::endl;
+  }
 
   //std::cout << "betterCount = " << betterCount << ", out of betterCand = " << betterCand << std::endl;
 
@@ -237,16 +244,70 @@ int64_t ProcessBUSReads(MasterProcessor& MP, const  ProgramOptions& opt) {
   // for each file
   std::cerr << "[quant] finding pseudoalignments for the reads ..."; std::cerr.flush();
 
-  /*if (opt.pseudobam) {
-    bam_hdr_t *t = createPseudoBamHeader(index);
-    index.writePseudoBamHeader(std::cout);
-  }*/
+  if (opt.genomebam) {
+    /*
+    // open bam files for writing
+    MP.bamh = createPseudoBamHeaderGenome(MP.model);
+    MP.bamfps = new htsFile*[MP.numSortFiles];
+    for (int i = 0; i < MP.numSortFiles; i++) {
+      MP.bamfps[i] = sam_open((opt.output + "/tmp." + std::to_string(i) + ".bam").c_str(), "wb1");
+      int r = sam_hdr_write(MP.bamfps[i], MP.bamh);
+    }
+
+    // assign breakpoints to chromosomes
+    MP.breakpoints.clear();
+    MP.breakpoints.assign(MP.numSortFiles -1 , (((uint64_t)-1)<<32));
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> chrWeights(MP.model.chr.size());
+    for (const auto& t : MP.model.genes) {
+      if (t.chr != -1 && t.stop > 0) {
+        chrWeights[t.chr].push_back({t.stop, 1});
+      }      
+    }
+
+    double sum = 0;
+    for (const auto &chrw : chrWeights) {
+      for (const auto &p : chrw) {
+        sum += p.second;
+      }
+    }
+    double bpLimit = sum / (MP.numSortFiles-1);
+
+    for (auto& chrw : chrWeights) {
+      // sort each by stop point
+      std::sort(chrw.begin(), chrw.end(), [](std::pair<uint32_t, uint32_t> a, std::pair<uint32_t, uint32_t> b) { return a.first < b.first;});
+    }
+
+    double bp = 0.0;
+    int ifile = 0;
+    for (int i = 0; i < chrWeights.size(); i++) {
+      const auto &chrw = chrWeights[i];
+      for (const auto &x : chrw) {
+        bp += x.second;
+        if (bp > bpLimit) {
+          uint64_t pos = ((uint64_t) i) << 32 | ((uint64_t) x.first+1) << 1;
+          MP.breakpoints[ifile] = pos;
+          ifile++;
+          while (bp > bpLimit) {
+            bp -= bpLimit;
+          }
+        }
+      }
+    }
+    */
+  }
+
+
+
 
   
   MP.processReads();
   numreads = MP.numreads;
   nummapped = MP.nummapped;
-  std::cerr << " done" << std::endl;
+  if (opt.verbose) {
+    std::cerr << std::endl << "[quant] done " << std::endl;
+  } else {
+    std::cerr << " done" << std::endl;
+  }
 
   //std::cout << "betterCount = " << betterCount << ", out of betterCand = " << betterCand << std::endl;
 
@@ -865,6 +926,7 @@ ReadProcessor::ReadProcessor(const KmerIndex& index, const ProgramOptions& opt,
    if (opt.batch_mode) {
      assert(id != -1);
      batchSR.files = opt.batch_files[id];
+     batchSR.nfiles = opt.batch_files[id].size();
      batchSR.reserveNfiles(opt.batch_files[id].size());
      if (opt.umi) {
        batchSR.umi_files = {opt.umi_files[id]};
@@ -951,6 +1013,7 @@ void ReadProcessor::processBuffer() {
   std::vector<int> vtmp;
   std::vector<int> u;
 
+
   u.reserve(1000);
   v1.reserve(1000);
   v2.reserve(1000);
@@ -1199,13 +1262,17 @@ void ReadProcessor::processBuffer() {
       */
     }
     
+    if (mp.opt.verbose && numreads > 0 && numreads % 1000000 == 0 ) {   
+      int nmap = mp.nummapped;
+      for (int i = 0; i < counts.size(); i++) {
+        nmap += counts[i];
+      }
+      nmap += newEcs.size();
 
-
-
-    /*
-    if (opt.verbose && numreads % 100000 == 0 ) {
-      std::cerr << "[quant] Processed " << pretty_num(numreads) << std::endl;
-    }*/
+      std::cerr << '\r' << (numreads/1000000) << "M reads processed (" 
+        << std::fixed << std::setw( 3 ) << std::setprecision( 1 ) << ((100.0*nmap)/double(numreads))
+        << "% pseudoaligned)"; std::cerr.flush();
+    }
   }
 
 }
@@ -1223,7 +1290,7 @@ void ReadProcessor::clear() {
 
 
 BUSProcessor::BUSProcessor(const KmerIndex& index, const ProgramOptions& opt, const MinCollector& tc, MasterProcessor& mp, int _id, int _local_id) :
- paired(!opt.single_end), bam(opt.bam), num(opt.num), tc(tc), index(index), mp(mp), id(_id), local_id(_local_id) {
+ paired(!opt.single_end), bam(opt.bam), num(opt.num), tc(tc), index(index), mp(mp), id(_id), local_id(_local_id), numreads(0) {
    // initialize buffer
    bufsize = mp.bufsize;
    buffer = new char[bufsize];  
@@ -1288,17 +1355,16 @@ void BUSProcessor::operator()() {
       // release the reader lock
     }
     // do the same for BUS ?!?
-    /*    
+    
     pseudobatch.aln.clear();
-    pseudobatch.batch_id = readbatch_id;*/
+    pseudobatch.batch_id = readbatch_id;
     // process our sequences
     processBuffer();
 
     // update the results, MP acquires the lock
     std::vector<std::pair<int, std::string>> ec_umi;
     std::vector<std::pair<std::vector<int>, std::string>> new_ec_umi;
-    PseudoAlignmentBatch pseudobatch;
-    mp.update(counts, newEcs, ec_umi, new_ec_umi, paired ? seqs.size()/2 : seqs.size(), flens, bias5, pseudobatch, bv, newB, &bc_len[0], &umi_len[0], id, local_id);
+    mp.update(counts, newEcs, ec_umi, new_ec_umi, seqs.size() / mp.opt.busOptions.nfiles , flens, bias5, pseudobatch, bv, newB, &bc_len[0], &umi_len[0], id, local_id);
     clear();
   }
 }
@@ -1316,28 +1382,35 @@ void BUSProcessor::processBuffer() {
   memset(&bc_len[0], 0, 33);
   memset(&umi_len[0], 0, 33);
 
-  const char* s1 = 0;
-  const char* s2 = 0;
-  const char* s3 = 0;
-  int l1=0,l2=0,l3=0;
-
-  const char *s[3] = {0,0,0};
-  int l[3] = {0,0,0};
+  const BUSOptions& busopt = mp.opt.busOptions;
+ 
   char buffer[100];
   memset(buffer, 0, 100);
   char *umi = &(buffer[50]);
   char *bc  = &(buffer[0]);
+  //char *seqbuffer[1000];
+  std::string seqbuffer;
+  seqbuffer.reserve(1000);
+  
+  // actually process the sequence
   
-  // actually process the sequences
-  const BUSOptions& busopt = mp.opt.busOptions;
   int incf, jmax;
   if (bam) {
     incf = 1;
     jmax = 2;
+
   } else {
     incf = busopt.nfiles-1;
     jmax = busopt.nfiles;
   }
+
+  std::vector<const char*> s(jmax, nullptr);
+  std::vector<int> l(jmax,0);
+
+  bool singleSeq = busopt.seq.size() ==1 ;
+  const BUSOptionSubstr seqopt = busopt.seq.front();
+
+
   //int incf = (bam) ? 1 : busopt.nfiles-1;
   for (int i = 0; i + incf < seqs.size(); i++) {
     for (int j = 0; j < jmax /*(bam) ? 2 : busopt.nfiles*/; j++) {
@@ -1347,8 +1420,24 @@ void BUSProcessor::processBuffer() {
     i += incf;
     
     // find where the sequence is
-    const char *seq = s[busopt.seq.fileno] + busopt.seq.start;
-    int seqlen = l[busopt.seq.fileno];
+    const char *seq = nullptr;
+    
+    size_t seqlen = 0;
+    if (singleSeq) {
+      seq = s[seqopt.fileno] + seqopt.start;
+      seqlen = l[seqopt.fileno];
+    } else {
+      seqbuffer.clear();
+      for (int j = 0; j < busopt.seq.size(); j++) {
+        const auto &sopt = busopt.seq[j];
+        int cplen = (sopt.stop == 0) ? l[sopt.fileno]-sopt.start : sopt.stop - sopt.start;
+        seqbuffer.append(s[sopt.fileno] + sopt.start, cplen);
+        seqbuffer.push_back('N');        
+      }
+      seqbuffer.push_back(0);
+      seq = seqbuffer.c_str();
+      seqlen = seqbuffer.size();
+    }
 
     // copy the umi
     int umilen = (busopt.umi.start == busopt.umi.stop) ? l[busopt.umi.fileno] - busopt.umi.start : busopt.umi.stop - busopt.umi.start;
@@ -1430,8 +1519,43 @@ void BUSProcessor::processBuffer() {
         // push back BUS record
         b.ec = ec;
         bv.push_back(b);
+      }      
+    }
+
+    if (mp.opt.pseudobam) {
+      PseudoAlignmentInfo info;
+      info.id = i;
+      info.paired = false;
+      uint32_t f = 0;
+      info.barcode = stringToBinary(bc, blen, f);
+      info.UMI = stringToBinary(umi, umilen, f);
+      if (!u.empty()) {
+        info.r1empty = v.empty();
+        KmerEntry val;
+        info.k1pos = findFirstMappingKmer(v,val);
+        info.k2pos = -1;
+                
+        if (ec != -1) {
+          info.ec_id = ec;
+        } else {
+          info.ec_id = -1;
+          info.u = u; // copy
+        }
       }
+      pseudobatch.aln.push_back(std::move(info));
     }
+
+    if (mp.opt.verbose && numreads > 0 && numreads % 1000000 == 0 ) {   
+        int nmap = mp.nummapped;
+        for (int i = 0; i < counts.size(); i++) {
+          nmap += counts[i];
+        }
+        nmap += newEcs.size();
+
+        std::cerr << '\r' << (numreads/1000000) << "M reads processed (" 
+          << std::fixed << std::setw( 3 ) << std::setprecision( 1 ) << ((100.0*nmap)/double(numreads))
+          << "% pseudoaligned)"; std::cerr.flush();
+      }
   }
 }
 
@@ -1458,6 +1582,7 @@ AlnProcessor::AlnProcessor(const KmerIndex& index, const ProgramOptions& opt, Ma
      /* need to check this later */
      assert(id != -1);
      batchSR.files = opt.batch_files[id];
+     batchSR.nfiles = opt.batch_files[id].size();
      batchSR.reserveNfiles(opt.batch_files[id].size());
      if (opt.umi) {
        batchSR.umi_files = {opt.umi_files[id]};
@@ -1976,26 +2101,56 @@ void AlnProcessor::processBufferGenome() {
   std::vector<std::pair<int,double>> ua;
   u.reserve(1000);
   ua.reserve(1000);
+
+  int bclen = 0;
+  int umilen = 0;
   
   int genome_auxlen = default_genome_auxlen;
   if (!useEM) {
     genome_auxlen -= 7;
   }
+  if (mp.opt.bus_mode) {
+    //todo replace with what is written to busfile
+    bclen = mp.opt.busOptions.getBCLength();
+    if (bclen == 0) {
+      bclen = 32;
+    }
+    umilen = mp.opt.busOptions.getUMILength();
+    if (umilen == 0) {
+      umilen = 32;
+    }
+
+    genome_auxlen += (bclen + umilen) + 8;
+  }
+  std::string bc, umi;
   
 
   Kmer km1,km2;
   KmerEntry val1, val2;
+  if  (mp.opt.bus_mode) {
+    paired = false;
+  }
+  bool readpaired = paired || (mp.opt.bus_mode && mp.opt.busOptions.nfiles == 2);
+  
+  
 
   for (int i = 0; i < n; i++) {
     bam1_t b1,b2, b1c, b2c;
-    int si1 = (paired) ? 2*i : i;
-    int si2 = (paired) ? 2*i +1 : -1;
+    int si1 = (readpaired) ? 2*i : i;
+    int si2 = (readpaired) ? 2*i +1 : -1;
+    
     int rlen1 = seqs[si1].second;
     int rlen2;
-    if (paired) {
+    if (readpaired) {
       rlen2 = seqs[si2].second;
     }
     
+    if (mp.opt.busOptions.seq.front().fileno == 1) {
+      std::swap(si1,si2);
+      std::swap(rlen1,rlen2);
+    }
+    
+
     // fill in the bam core info
     b1.core.tid = -1;
     b1.core.pos = -1;
@@ -2029,6 +2184,22 @@ void AlnProcessor::processBufferGenome() {
       fillBamRecord(b2, nullptr, seqs[si2].first, names[si2].first,  quals[si2].first, seqs[si2].second, names[si2].second, pi.r2empty, genome_auxlen);
       // b2.id = idnum++;
     }
+    
+    if (mp.opt.bus_mode) {
+      b1.data[b1.l_data] = 'C';
+      b1.data[b1.l_data+1] = 'R';
+      b1.data[b1.l_data+2] = 'Z';
+      bc = binaryToString(pi.barcode, bclen);
+      memcpy(b1.data + b1.l_data + 3, bc.c_str(), bclen+1);
+      b1.l_data += bclen + 4;
+      
+      b1.data[b1.l_data] = 'U';
+      b1.data[b1.l_data+1] = 'R';
+      b1.data[b1.l_data+2] = 'Z';
+      umi = binaryToString(pi.UMI, umilen);
+      memcpy(b1.data + b1.l_data + 3, umi.c_str(), umilen+2);
+      b1.l_data += umilen + 4;
+    }
 
     if (pi.r1empty && pi.r2empty) {
       bv.push_back(b1);
@@ -2111,7 +2282,7 @@ void AlnProcessor::processBufferGenome() {
           
         // set aux
         float zero = 0.0;
-        assert(b1.l_data + genome_auxlen <= b1.m_data);
+        //assert(b1.l_data + genome_auxlen <= b1.m_data);
         
         if (useEM) {
           b1.data[b1.l_data] = 'Z';
@@ -2121,7 +2292,7 @@ void AlnProcessor::processBufferGenome() {
           b1.l_data += 7;
 
           if (paired) {
-            assert(b2.l_data + genome_auxlen <= b2.m_data);
+            //assert(b2.l_data + genome_auxlen <= b2.m_data);
             b2.data[b2.l_data] = 'Z';
             b2.data[b2.l_data+1] = 'W';
             b2.data[b2.l_data+2] = 'f';
@@ -2165,7 +2336,6 @@ void AlnProcessor::processBufferGenome() {
         if (!pi.r1empty) {
           km1 = Kmer(seqs[si1].first + pi.k1pos);
           strInfo1 = strandednessInfo(km1, val1, ua);
-
         }
         if (paired && !pi.r2empty) {
           km2 = Kmer(seqs[si2].first + pi.k2pos);
@@ -2743,7 +2913,8 @@ bool FastqSequenceReader::fetchSequences(char *buf, const int limit, std::vector
           umis.emplace_back(std::move(umi));
         }
 
-        flags.push_back(numreads++);
+        numreads++;
+        flags.push_back(numreads);
       } else {
         return true; // read it next time
       }
@@ -2759,7 +2930,7 @@ bool FastqSequenceReader::fetchSequences(char *buf, const int limit, std::vector
 }
 
 // move constructor
-#if 1
+
 FastqSequenceReader::FastqSequenceReader(FastqSequenceReader&& o) :
   nfiles(o.nfiles),
   numreads(o.numreads),
@@ -2770,34 +2941,16 @@ FastqSequenceReader::FastqSequenceReader(FastqSequenceReader&& o) :
   files(std::move(o.files)),
   umi_files(std::move(o.umi_files)),
   f_umi(std::move(o.f_umi)),
-  current_file(o.current_file) {
+  current_file(o.current_file),
+  seq(std::move(o.seq)) {
 
   o.fp.resize(nfiles);
   o.l.resize(nfiles, 0);
   o.nl.resize(nfiles, 0);
+  o.seq.resize(nfiles, nullptr);
   o.state = false;
 }
-#endif
-#if 0
-FastqSequenceReader::FastqSequenceReader(FastqSequenceReader&& o) {
-  fp          = (std::move(o.fp));
-  l           = (std::move(o.l));
-  nl          = (std::move(o.nl));
-  paired      = (o.paired);
-  nfiles      = (o.nfiles);
-  files       = (std::move(o.files)),
-  umi_files   = (std::move(o.umi_files));
-  f_umi       = (std::move(o.f_umi));
-  current_file= (o.current_file);
-  seq         = (std::move(o.seq));
 
-  o.fp.resize(nfiles);
-  o.l.resize(nfiles, 0);
-  o.nl.resize(nfiles, 0);
-  o.state = false;
-  o.seq.resize(nfiles, nullptr);
-}
-#endif
 const std::string BamSequenceReader::seq_enc = "=ACMGRSVTWYHKDBN";
 
 BamSequenceReader::~BamSequenceReader() {


=====================================
src/ProcessReads.h
=====================================
@@ -161,7 +161,7 @@ public:
   char *umi;
   int l_umi;
   int err;
-  char seq[256]; // Is there a better limit?
+  
   
 private:
   static const std::string seq_enc;
@@ -334,6 +334,7 @@ public:
   int64_t numreads;
   int id;
   int local_id;
+  PseudoAlignmentBatch pseudobatch;
 
   int bc_len[33];
   int umi_len[33];


=====================================
src/PseudoBam.cpp
=====================================
@@ -407,6 +407,8 @@ void writePseudoAlignmentBatch(std::ofstream& of, const PseudoAlignmentBatch& ba
     of.write((char*)&k1, 1);
     of.write((char*)&k2, 1);
     of.write((char*)&x.ec_id,sizeof(int32_t));
+    of.write((char*)&x.barcode, sizeof(uint64_t));
+    of.write((char*)&x.UMI, sizeof(uint64_t));
     if (x.ec_id == -1) {
       // exceptional case, no ec_id, yet, but need to write the v vector
       uint32_t sz = x.u.size();
@@ -445,6 +447,8 @@ void readPseudoAlignmentBatch(std::ifstream& in, PseudoAlignmentBatch& batch) {
     info.k1pos = (k1 == 255) ? -1 : k1;
     info.k2pos = (k2 == 255) ? -1 : k2;
     in.read((char*)&info.ec_id, sizeof(int32_t));
+    in.read((char*)&info.barcode, sizeof(uint64_t));
+    in.read((char*)&info.UMI, sizeof(uint64_t));
     if (info.ec_id == -1) {
       uint32_t sz;
       in.read((char*)&sz, sizeof(uint32_t));


=====================================
src/PseudoBam.h
=====================================
@@ -31,7 +31,10 @@ struct PseudoAlignmentInfo {
   int k2pos;
   int32_t ec_id;
   std::vector<int32_t> u;
-  PseudoAlignmentInfo() : id(-1), r1empty (true), r2empty(true), paired(true), k1pos(-1), k2pos(-1), ec_id(-1) {}
+  uint64_t barcode;
+  uint64_t UMI;
+
+  PseudoAlignmentInfo() : id(-1), r1empty (true), r2empty(true), paired(true), k1pos(-1), k2pos(-1), ec_id(-1), barcode(0), UMI(0) {}
 };
 
 


=====================================
src/common.h
=====================================
@@ -1,7 +1,7 @@
 #ifndef KALLISTO_COMMON_H
 #define KALLISTO_COMMON_H
 
-#define KALLISTO_VERSION "0.46.1"
+#define KALLISTO_VERSION "0.46.2"
 
 #include <string>
 #include <vector>
@@ -24,7 +24,7 @@ struct BUSOptions {
   
   BUSOptionSubstr umi;
   std::vector<BUSOptionSubstr> bc;
-  BUSOptionSubstr seq;
+  std::vector<BUSOptionSubstr> seq;
 
   int getBCLength() const {
     int r =0 ;


=====================================
src/h5utils.cpp
=====================================
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include "h5utils.h"
 
 // allocate a contiguous block of memory, dependent on the largest string
@@ -138,3 +139,4 @@ void read_vector(
 
   delete [] pool;
 }
+#endif // USE_HDF5
\ No newline at end of file


=====================================
src/h5utils.h
=====================================
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #ifndef KALLISTO_H5_UTILS
 #define KALLISTO_H5_UTILS
 
@@ -144,3 +145,4 @@ void read_dataset(hid_t group_id,
 // end: reading utils
 
 #endif // KALLISTO_H5_UTILS
+#endif // USE_HDF5
\ No newline at end of file


=====================================
src/kseq.h deleted
=====================================
@@ -1,242 +0,0 @@
-/* The MIT License
-
-   Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor at live.co.uk>
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/* Last Modified: 05MAR2012 */
-
-#ifndef AC_KSEQ_H
-#define AC_KSEQ_H
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
-#define KS_SEP_TAB   1 // isspace() && !' '
-#define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
-#define KS_SEP_MAX   2
-
-#define __KS_TYPE(type_t)						\
-	typedef struct __kstream_t {				\
-		unsigned char *buf;						\
-		int begin, end, is_eof;					\
-		type_t f;								\
-	} kstream_t;
-
-#define ks_err(ks) ((ks)->end == -1)
-#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
-#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
-
-#define __KS_BASIC(type_t, __bufsize)								\
-	static inline kstream_t *ks_init(type_t f)						\
-	{																\
-		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
-		ks->f = f;													\
-		ks->buf = (unsigned char*)malloc(__bufsize);				\
-		return ks;													\
-	}																\
-	static inline void ks_destroy(kstream_t *ks)					\
-	{																\
-		if (ks) {													\
-			free(ks->buf);											\
-			free(ks);												\
-		}															\
-	}
-
-#define __KS_GETC(__read, __bufsize)						\
-	static inline int ks_getc(kstream_t *ks)				\
-	{														\
-		if (ks_err(ks)) return -3;							\
-		if (ks->is_eof && ks->begin >= ks->end) return -1;	\
-		if (ks->begin >= ks->end) {							\
-			ks->begin = 0;									\
-			ks->end = __read(ks->f, ks->buf, __bufsize);	\
-			if (ks->end == 0) { ks->is_eof = 1; return -1;}	\
-			if (ks->end == -1) { ks->is_eof = 1; return -3;}\
-		}													\
-		return (int)ks->buf[ks->begin++];					\
-	}
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
-	size_t l, m;
-	char *s;
-} kstring_t;
-#endif
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#define __KS_GETUNTIL(__read, __bufsize)								\
-	static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
-	{																	\
-		int gotany = 0;													\
-		if (dret) *dret = 0;											\
-		str->l = append? str->l : 0;									\
-		for (;;) {														\
-			int i;														\
-			if (ks_err(ks)) return -3;									\
-			if (ks->begin >= ks->end) {									\
-				if (!ks->is_eof) {										\
-					ks->begin = 0;										\
-					ks->end = __read(ks->f, ks->buf, __bufsize);		\
-					if (ks->end == 0) { ks->is_eof = 1; break; }		\
-					if (ks->end == -1) { ks->is_eof = 1; return -3; }	\
-				} else break;											\
-			}															\
-			if (delimiter == KS_SEP_LINE) { \
-				for (i = ks->begin; i < ks->end; ++i) \
-					if (ks->buf[i] == '\n') break; \
-			} else if (delimiter > KS_SEP_MAX) {						\
-				for (i = ks->begin; i < ks->end; ++i)					\
-					if (ks->buf[i] == delimiter) break;					\
-			} else if (delimiter == KS_SEP_SPACE) {						\
-				for (i = ks->begin; i < ks->end; ++i)					\
-					if (isspace(ks->buf[i])) break;						\
-			} else if (delimiter == KS_SEP_TAB) {						\
-				for (i = ks->begin; i < ks->end; ++i)					\
-					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
-			} else i = 0; /* never come to here! */						\
-			if (str->m - str->l < (size_t)(i - ks->begin + 1)) {		\
-				str->m = str->l + (i - ks->begin) + 1;					\
-				kroundup32(str->m);										\
-				str->s = (char*)realloc(str->s, str->m);				\
-			}															\
-			gotany = 1;													\
-			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
-			str->l = str->l + (i - ks->begin);							\
-			ks->begin = i + 1;											\
-			if (i < ks->end) {											\
-				if (dret) *dret = ks->buf[i];							\
-				break;													\
-			}															\
-		}																\
-		if (!gotany && ks_eof(ks)) return -1;							\
-		if (str->s == 0) {												\
-			str->m = 1;													\
-			str->s = (char*)calloc(1, 1);								\
-		} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
-		str->s[str->l] = '\0';											\
-		return str->l;													\
-	} \
-	static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
-	{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
-
-#define KSTREAM_INIT(type_t, __read, __bufsize) \
-	__KS_TYPE(type_t)							\
-	__KS_BASIC(type_t, __bufsize)				\
-	__KS_GETC(__read, __bufsize)				\
-	__KS_GETUNTIL(__read, __bufsize)
-
-#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
-
-#define __KSEQ_BASIC(SCOPE, type_t)										\
-	SCOPE kseq_t *kseq_init(type_t fd)									\
-	{																	\
-		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
-		s->f = ks_init(fd);												\
-		return s;														\
-	}																	\
-	SCOPE void kseq_destroy(kseq_t *ks)									\
-	{																	\
-		if (!ks) return;												\
-		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
-		ks_destroy(ks->f);												\
-		free(ks);														\
-	}
-
-/* Return value:
-   >=0  length of the sequence (normal)
-   -1   end-of-file
-   -2   truncated quality string
-   -3   error reading stream
- */
-#define __KSEQ_READ(SCOPE) \
-	SCOPE int kseq_read(kseq_t *seq) \
-	{ \
-		int c,r; \
-		kstream_t *ks = seq->f; \
-		if (seq->last_char == 0) { /* then jump to the next header line */ \
-			while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \
-			if (c < 0) return c; /* end of file or error*/ \
-			seq->last_char = c; \
-		} /* else: the first header char has been read in the previous call */ \
-		seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
-		if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r;  /* normal exit: EOF or error */ \
-		if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
-		if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
-			seq->seq.m = 256; \
-			seq->seq.s = (char*)malloc(seq->seq.m); \
-		} \
-		while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \
-			if (c == '\n') continue; /* skip empty lines */ \
-			seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
-			ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
-		} \
-		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
-		if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
-			seq->seq.m = seq->seq.l + 2; \
-			kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
-			seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
-		} \
-		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
-		if (c != '+') return seq->seq.l; /* FASTA */ \
-		if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
-			seq->qual.m = seq->seq.m; \
-			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
-		} \
-		while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \
-		if (c == -1) return -2; /* error: no quality string */ \
-		while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \
-		if (c == -3) return -3; /* stream error */ \
-		seq->last_char = 0;	/* we have not come to the next header line */ \
-		if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
-		return seq->seq.l; \
-	}
-
-#define __KSEQ_TYPE(type_t)						\
-	typedef struct {							\
-		kstring_t name, comment, seq, qual;		\
-		int last_char;							\
-		kstream_t *f;							\
-	} kseq_t;
-
-#define KSEQ_INIT2(SCOPE, type_t, __read)		\
-	KSTREAM_INIT(type_t, __read, 16384)			\
-	__KSEQ_TYPE(type_t)							\
-	__KSEQ_BASIC(SCOPE, type_t)					\
-	__KSEQ_READ(SCOPE)
-
-#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
-
-#define KSEQ_DECLARE(type_t) \
-	__KS_TYPE(type_t) \
-	__KSEQ_TYPE(type_t) \
-	extern kseq_t *kseq_init(type_t fd); \
-	void kseq_destroy(kseq_t *ks); \
-	int kseq_read(kseq_t *seq);
-
-#endif


=====================================
src/main.cpp
=====================================
@@ -23,6 +23,7 @@
 #include "Inspect.h"
 #include "Bootstrap.h"
 #include "H5Writer.h"
+#include "PlaintextWriter.h"
 #include "GeneModel.h"
 #include "Merge.h"
 
@@ -238,6 +239,7 @@ void ParseOptionsEM(int argc, char **argv, ProgramOptions& opt) {
     }
     case 'c': {
       stringstream(optarg) >> opt.chromFile;
+      break;
     }
     case 'd': {
       stringstream(optarg) >> opt.seed;
@@ -542,8 +544,12 @@ void ListSingleCellTechnologies() {
  }
 
 void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
-  const char *opt_string = "i:o:x:t:lbn";
+  int verbose_flag = 0;
+  int gbam_flag = 0;
+
+  const char *opt_string = "i:o:x:t:lbng:c:";
   static struct option long_options[] = {
+    {"verbose", no_argument, &verbose_flag, 1},
     {"index", required_argument, 0, 'i'},
     {"output-dir", required_argument, 0, 'o'},
     {"technology", required_argument, 0, 'x'},
@@ -551,6 +557,9 @@ void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
     {"threads", required_argument, 0, 't'},
     {"bam", no_argument, 0, 'b'},
     {"num", no_argument, 0, 'n'},
+    {"genomebam", no_argument, &gbam_flag, 1},
+    {"gtf", required_argument, 0, 'g'},
+    {"chromosomes", required_argument, 0, 'c'},
     {0,0,0,0}
   };
 
@@ -596,6 +605,14 @@ void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
       opt.num = true;
       break;
     }
+    case 'g': {
+      stringstream(optarg) >> opt.gtfFile;
+      break;
+    }
+    case 'c': {
+      stringstream(optarg) >> opt.chromFile;
+      break;
+    }
     default: break;
     }
   }
@@ -605,6 +622,16 @@ void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
     exit(1);
   }
   
+  if (verbose_flag) {
+    opt.verbose = true;
+  }
+
+  if (gbam_flag) {
+    opt.pseudobam = true;
+    opt.genomebam = true;    
+  }
+
+  
   // all other arguments are fast[a/q] files to be read
   for (int i = optind; i < argc; i++) {
     opt.files.push_back(argv[i]);
@@ -612,107 +639,115 @@ void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
  
 }
 
-bool ParseTechnology(const std::string &techstr, std::vector<BUSOptionSubstr> &values, std::vector<int> &files, std::vector<std::string> &errorList, std::vector<BUSOptionSubstr> &bcValues) {
-  int lastIndex = 0; //the last place in the string a colon and punctuation was found
-  int currValue = 1; //tells us the index of the three sequences needed barcode, umi, sequence
-  vector<int> numbers; //stores the numbers in a pairs
-  const char punctuationCompare = ',';
-  const char colonCompare = ':';
-  bool colon;
-  bool duplicate = false;
-  std::string stringVal;
-  int val;
-
-  for(int i = 1; i < techstr.length(); i++) {
-    colon = false;
-    if(techstr[i] == colonCompare) {
-      colon = true;
-    }
-
-    if(techstr[i] == punctuationCompare || colon) {
-      stringVal = techstr.substr(lastIndex, i-lastIndex);
+bool ParseTechnology(const std::string &techstr, BUSOptions& busopt, std::vector<std::string> &errorList) {
+  auto i1 = techstr.find(':');
+  if (i1 == std::string::npos) {
+    errorList.push_back("Error: technology string must contain two colons (:), none found: \"" + techstr + "\"");    
+    return false;
+  }
+  auto i2 = techstr.find(':', i1+1);
+  if (i2 == std::string::npos) {
+    errorList.push_back("Error: technology string must contain two colons (:), only one found: \"" + techstr + "\"");    
+    return false;
+  }
+  auto ip = techstr.find(':', i2+1);
+  if (ip != std::string::npos) {
+    errorList.push_back("Error: technology string must contain two colons (:), three found: \"" + techstr + "\"");    
+    return false;
+  }
+  auto bcstr = techstr.substr(0, i1);
+  auto umistr = techstr.substr(i1+1,i2-i1-1);
+  auto seqstr = techstr.substr(i2+1);
+  
+
+
+  int maxnf = 0;
+
+  auto convert_commas_to_vector = [&](const std::string &s, std::vector<BUSOptionSubstr> &v) -> bool {
+    std::vector<int> vv;
+    v.clear();
+    std::stringstream ss(s);
+    std::string t;
+    while (std::getline(ss, t, ',')) {
       try {
-        val = stoi(stringVal);
-        numbers.push_back(val);
-        if(numbers.size() == 1) {
-          if(!files.empty()) {
-            for(int j = 0; j < files.size(); j++) {
-              if(val == files[j]) {
-                duplicate = true;
-                break;
-              }
-            }
-            if(!duplicate) {
-              files.push_back(val);
-            } else {
-              duplicate = false;
-            }
-          } else {
-            files.push_back(val);
-          }
-        }
-        lastIndex = i + 1;
-      } catch(const std:: invalid_argument& ia) {
-        errorList.push_back("Error: Invalid argument");
-        return true;
+        int i = stoi(t);
+        vv.push_back(i);
+      } catch (std::invalid_argument &e) {
+        errorList.push_back("Error: converting to int: \"" + t + "\"");
+        return false;
       }
+    }
 
-      if(colon) {
-        if(numbers.size() != 3) {
-          errorList.push_back("Error: Wrong number of pairs provided");
-          return true;
+    int nv = vv.size();
+    if (nv % 3 == 0) {
+      for (int i = 0; i+2 < nv; i+=3) {
+        int f = vv[i];
+        int a = vv[i+1];
+        int b = vv[i+2];
+        if (f < 0) {
+          errorList.push_back("Error: invalid file number (" + to_string(f) + ")  " + s);
+        }
+        if (a <  0) {
+          errorList.push_back("Error: invalid start (" + to_string(a) + ")  " + s);
+        }
+        if (b != 0 && b <= a) {
+          errorList.push_back("Error: invalid stop (" + to_string(b) + ") has to be after start (" + to_string(a) + ")  " + s);
+        }
+        v.push_back(BUSOptionSubstr(f,a,b));
+        if (f > maxnf) {
+          maxnf = f;
         }
       }
+    } else {
+      errorList.push_back("Error: number of values has to be multiple of 3 " + s);
+      return false;
     }
-    if(numbers.size() == 3) {
-      if(currValue == 1) {
-        bcValues.push_back(BUSOptionSubstr(numbers[0], numbers[1], numbers[2]));
-      } else {
-        values.push_back(BUSOptionSubstr(numbers[0], numbers[1], numbers[2]));
-      }
-      numbers.clear();
-    }
-    if(colon) {
-      currValue++;
-    }
-  }
 
-  if (files.empty()) {
-    errorList.push_back(std::string("Error: parsing technology string \"") + techstr + "\"");
+    busopt.nfiles = maxnf+1;
     return true;
+  };
+
+  
+
+  std::vector<BUSOptionSubstr> v;
+  if (!convert_commas_to_vector(bcstr,v)) {
+    return false;
   }
+  if (v.empty()) {
+    errorList.push_back("Error: empty barcode list " + bcstr);
+    return false;
+  }
+  busopt.bc = std::move(v);
 
-  std::sort(files.begin(), files.end());
-  for(int k = 0; k < files.size()-1; k++) {
-    if(files[k]+1 != files[k+1]) {
-      errorList.push_back("Error: files aren't correctly referenced");
-      return true;
-    }
+  if (!convert_commas_to_vector(umistr, v)) {
+    return false;
   }
+  if (v.empty()) {
+    errorList.push_back("Error: empty UMI list " + umistr);
+    return false;
+  }
+  if (v.size() != 1) {
+    errorList.push_back("Error: only a single UMI list allow " + umistr);
+    return false;
+  }
+  busopt.umi = std::move(v.front());
 
-  stringVal = techstr.substr(lastIndex, techstr.length()-lastIndex);
-  if(numbers.size() == 2) {
-    try {
-      val = stoi(stringVal);
-      numbers.push_back(val);
-      values.push_back(BUSOptionSubstr(numbers[0], numbers[1], numbers[2]));
-      numbers.clear();  
-    } catch(const std:: invalid_argument& ia) {
-      errorList.push_back("Error: Invalid argument");
-      return true;
-    }
-  } else {
-    errorList.push_back("Error: Wrong number of pairs provided");
-    return true;
+
+  if (!convert_commas_to_vector(seqstr, v)) {
+    return false;
   }
-  if(currValue != 3) {
-    errorList.push_back("Error: Wrong number of substrings provided");
-    return true;
+  if (v.empty()) {
+    errorList.push_back("Error: empty sequence list " + bcstr);
+    return false;
   }
-  return false;
+
+  busopt.seq = std::move(v);
+
+  return true;
 }
 
 
+
 void ParseOptionsH5Dump(int argc, char **argv, ProgramOptions& opt) {
   int peek_flag = 0;
   const char *opt_string = "o:";
@@ -828,55 +863,55 @@ bool CheckOptionsBus(ProgramOptions& opt) {
     if (opt.bam) { // Note: only 10xV2 has been tested
       busopt.nfiles = 1;
       if (opt.technology == "10XV2") {
-        busopt.seq = BUSOptionSubstr(1,0,0); // second file, entire string
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0)); // second file, entire string
         busopt.umi = BUSOptionSubstr(0,16,26); // first file [16:26]
         busopt.bc.push_back(BUSOptionSubstr(0,0,16));
       } else if (opt.technology == "10XV3") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,16,28);
         busopt.bc.push_back(BUSOptionSubstr(0,0,16));
 //      } else if (opt.technology == "10XV1") {
 
       } else if (opt.technology == "SURECELL") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,18,26);
         busopt.bc.push_back(BUSOptionSubstr(0,0,18));
       } else if (opt.technology == "DROPSEQ") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,12,20);
         busopt.bc.push_back(BUSOptionSubstr(0,0,12));
       } else if (opt.technology == "INDROPSV1") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,42,48);
         busopt.bc.push_back(BUSOptionSubstr(0,0,11));
         busopt.bc.push_back(BUSOptionSubstr(0,30,38));  
       } else if (opt.technology == "INDROPSV2") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(0,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(0,0,0));
         busopt.umi = BUSOptionSubstr(1,42,48);
         busopt.bc.push_back(BUSOptionSubstr(1,0,11));
         busopt.bc.push_back(BUSOptionSubstr(1,30,38));  
       } else if (opt.technology == "INDROPSV3") {
         busopt.nfiles = 3;
-        busopt.seq = BUSOptionSubstr(2,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(2,0,0));
         busopt.umi = BUSOptionSubstr(1,8,14);
         busopt.bc.push_back(BUSOptionSubstr(0,0,8));
         busopt.bc.push_back(BUSOptionSubstr(1,0,8));
       } else if (opt.technology == "CELSEQ") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,8,12);
         busopt.bc.push_back(BUSOptionSubstr(0,0,8));
       } else if (opt.technology == "CELSEQ2") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,0,6);
         busopt.bc.push_back(BUSOptionSubstr(0,6,12));
       } else if (opt.technology == "SCRBSEQ") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,6,16);
         busopt.bc.push_back(BUSOptionSubstr(0,0,6));
       } else if (opt.technology == "INDROPSV3") {
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,0,6);
         busopt.bc.push_back(BUSOptionSubstr(0,6,16));
       } else {
@@ -884,14 +919,18 @@ bool CheckOptionsBus(ProgramOptions& opt) {
         vector<BUSOptionSubstr> values;
         vector<BUSOptionSubstr> bcValues;
         vector<std::string> errorList;
-        bool invalid = ParseTechnology(opt.technology, values, files, errorList, bcValues);
-        if(!invalid) {
+        //bool invalid = ParseTechnology(opt.technology, values, files, errorList, bcValues);
+        bool valid = ParseTechnology(opt.technology, busopt, errorList);
+        
+        if(!valid) {
+          /*
           busopt.nfiles = files.size(); 
           for(int i = 0; i < bcValues.size(); i++) {
             busopt.bc.push_back(bcValues[i]);
           }
           busopt.umi = values[0];
-          busopt.seq = values[1];
+          busopt.seq.push_back(values[1]);
+          */
         } else {
           for(int j = 0; j < errorList.size(); j++) {
             cerr << errorList[j] << endl;
@@ -903,67 +942,67 @@ bool CheckOptionsBus(ProgramOptions& opt) {
     } else {
       if (opt.technology == "10XV2") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0); // second file, entire string
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0)); // second file, entire string
         busopt.umi = BUSOptionSubstr(0,16,26); // first file [16:26]
         busopt.bc.push_back(BUSOptionSubstr(0,0,16));
       } else if (opt.technology == "10XV3") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,16,28);
         busopt.bc.push_back(BUSOptionSubstr(0,0,16));
       } else if (opt.technology == "10XV1") {
         busopt.nfiles = 3;
-        busopt.seq = BUSOptionSubstr(0,0,0);
-        busopt.umi = BUSOptionSubstr(1,0,0);
-        busopt.bc.push_back(BUSOptionSubstr(2,0,0));
+        busopt.seq.push_back(BUSOptionSubstr(2,0,0));
+        busopt.umi = BUSOptionSubstr(1,0,10);
+        busopt.bc.push_back(BUSOptionSubstr(0,0,14));
       } else if (opt.technology == "SURECELL") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,51,59);
         busopt.bc.push_back(BUSOptionSubstr(0,0,6));
         busopt.bc.push_back(BUSOptionSubstr(0,21,27));
         busopt.bc.push_back(BUSOptionSubstr(0,42,48));
       } else if (opt.technology == "DROPSEQ") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,12,20);
         busopt.bc.push_back(BUSOptionSubstr(0,0,12));
       } else if (opt.technology == "INDROPSV1") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,42,48);
         busopt.bc.push_back(BUSOptionSubstr(0,0,11));
         busopt.bc.push_back(BUSOptionSubstr(0,30,38));  
       } else if (opt.technology == "INDROPSV2") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(0,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(0,0,0));
         busopt.umi = BUSOptionSubstr(1,42,48);
         busopt.bc.push_back(BUSOptionSubstr(1,0,11));
         busopt.bc.push_back(BUSOptionSubstr(1,30,38));  
       } else if (opt.technology == "INDROPSV3") {
         busopt.nfiles = 3;
-        busopt.seq = BUSOptionSubstr(2,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(2,0,0));
         busopt.umi = BUSOptionSubstr(1,8,14);
         busopt.bc.push_back(BUSOptionSubstr(0,0,8));
         busopt.bc.push_back(BUSOptionSubstr(1,0,8));
       } else if (opt.technology == "CELSEQ") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,8,12);
         busopt.bc.push_back(BUSOptionSubstr(0,0,8));
       } else if (opt.technology == "CELSEQ2") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,0,6);
         busopt.bc.push_back(BUSOptionSubstr(0,6,12));
       } else if (opt.technology == "SCRBSEQ") {
         busopt.nfiles = 2;
-        busopt.seq = BUSOptionSubstr(1,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(1,0,0));
         busopt.umi = BUSOptionSubstr(0,6,16);
         busopt.bc.push_back(BUSOptionSubstr(0,0,6));
       } else if (opt.technology == "INDROPSV3") {
         busopt.nfiles = 3;
-        busopt.seq = BUSOptionSubstr(2,0,0);
+        busopt.seq.push_back(BUSOptionSubstr(2,0,0));
         busopt.umi = BUSOptionSubstr(1,8,14);
         busopt.bc.push_back(BUSOptionSubstr(0,0,8));
         busopt.bc.push_back(BUSOptionSubstr(1,0,8));
@@ -971,15 +1010,20 @@ bool CheckOptionsBus(ProgramOptions& opt) {
         vector<int> files;
         vector<BUSOptionSubstr> values;
         vector<BUSOptionSubstr> bcValues;
-        vector<std::string> errorList;
-        bool invalid = ParseTechnology(opt.technology, values, files, errorList, bcValues);
-        if(!invalid) {
+        vector<std::string> errorList;        
+        //bool invalid = ParseTechnology(opt.technology, values, files, errorList, bcValues);
+        bool valid = ParseTechnology(opt.technology, busopt, errorList);
+        
+        
+        if(valid) {
+          /*
           busopt.nfiles = files.size(); 
           for(int i = 0; i < bcValues.size(); i++) {
             busopt.bc.push_back(bcValues[i]);
           }
           busopt.umi = values[0];
-          busopt.seq = values[1];
+          busopt.seq.push_back(values[1]);
+          */
         } else {
           for(int j = 0; j < errorList.size(); j++) {
             cerr << errorList[j] << endl;
@@ -991,6 +1035,30 @@ bool CheckOptionsBus(ProgramOptions& opt) {
     }
   }
 
+  if (opt.genomebam) {
+    if (opt.busOptions.seq.size() != 1) {
+      cerr << "Error: BAM output is currently only supported for technologies with a single CDNA read file" << endl;
+      ret = false;
+    }
+    if (!opt.gtfFile.empty()) {
+      if (!checkFileExists(opt.gtfFile)) {
+        cerr << "Error: GTF file " << opt.gtfFile << " does not exist" << endl;
+        ret = false;
+      }
+    } else {
+      cerr << "Error: need GTF file for genome alignment" << endl;
+      ret = false;
+    }
+    if (!opt.chromFile.empty()) {
+      if (!checkFileExists(opt.chromFile)) {
+        cerr << "Error: Chromosome file not found: " << opt.chromFile << endl;
+        ret = false;
+      }
+    }
+  }
+
+  
+
   if (ret && !opt.bam && opt.files.size() %  opt.busOptions.nfiles != 0) {
     cerr << "Error: Number of files (" << opt.files.size() << ") does not match number of input files required by "
     << "technology " << opt.technology << " (" << opt.busOptions.nfiles << ")" << endl;
@@ -1223,6 +1291,14 @@ bool CheckOptionsEM(ProgramOptions& opt, bool emonly = false) {
     ret = false;
   }
 
+  #ifndef USE_HDF5  
+  if (opt.bootstrap > 0 && !opt.plaintext) {
+    cerr << "Warning: kallisto was not compiled with HDF5 support so no bootstrapping" << endl
+         << "will be performed. Run quant with --plaintext option or recompile with" << endl
+         << "HDF5 support to obtain bootstrap estimates." << endl;
+    opt.bootstrap = 0;
+  }
+  #endif
   return ret;
 }
 
@@ -1648,7 +1724,8 @@ void usageBus() {
        << "-l, --list                    List all single-cell technologies supported" << endl
        << "-t, --threads=INT             Number of threads to use (default: 1)" << endl
        << "-b, --bam                     Input file is a BAM file" << endl
-       << "-n, --num                     Output number of read in flag column (incompatible with --bam)" << endl;
+       << "-n, --num                     Output number of read in flag column (incompatible with --bam)" << endl
+       << "    --verbose                 Print out progress information every 1M proccessed reads" << endl;
 }
 
 void usageIndex() {
@@ -1715,7 +1792,8 @@ void usageEM(bool valid_input = true) {
        << "-g, --gtf                     GTF file for transcriptome information" << endl
        << "                              (required for --genomebam)" << endl
        << "-c, --chromosomes             Tab separated file with chromosome names and lengths" << endl
-       << "                              (optional for --genomebam, but recommended)" << endl;
+       << "                              (optional for --genomebam, but recommended)" << endl
+       << "    --verbose                 Print out progress information every 1M proccessed reads" << endl;
 
 }
 
@@ -1853,12 +1931,28 @@ int main(int argc, char *argv[]) {
         exit(1);
       } else {
         int num_trans, index_version;
-        int64_t num_processed, num_pseudoaligned, num_unique;
+        int64_t num_processed = 0;
+        int64_t num_pseudoaligned = 0;
+        int64_t num_unique = 0;
+
         opt.bus_mode = true;
         opt.single_end = false;
         KmerIndex index(opt);
         index.load(opt);
+
+
+        bool guessChromosomes = false;
         Transcriptome model; // empty
+        if (opt.genomebam) {
+          if (!opt.chromFile.empty()) {
+            model.loadChromosomes(opt.chromFile);
+          } else {
+            guessChromosomes = true;
+          }
+          model.parseGTF(opt.gtfFile, index, opt, guessChromosomes);
+        }
+
+
         MinCollector collection(index, opt); 
         MasterProcessor MP(index, opt, collection, model);
         num_processed = ProcessBUSReads(MP, opt);
@@ -1939,6 +2033,18 @@ int main(int argc, char *argv[]) {
             std::string(std::to_string(index_version)),
             start_time,
             call);
+
+        if (opt.pseudobam) {
+          std::vector<double> fl_means(index.target_lens_.size(),0.0);
+          EMAlgorithm em(collection.counts, index, collection, fl_means, opt);
+          MP.processAln(em, false);
+        }
+
+
+        cerr << endl;
+        if (num_pseudoaligned == 0) {
+          exit(1); // exit with error
+        }
       }
     } else if (cmd == "merge") {
       if (argc == 2) {
@@ -2053,12 +2159,15 @@ int main(int argc, char *argv[]) {
 
         std::string call = argv_to_string(argc, argv);
 
+
+        #ifdef USE_HDF5
         H5Writer writer;
         if (!opt.plaintext) {
           writer.init(opt.output + "/abundance.h5", opt.bootstrap, num_processed, fld, preBias, em.post_bias_, 6,
               index.INDEX_VERSION, call, start_time);
           writer.write_main(em, index.target_names_, index.target_lens_);
         }
+        #endif // USE_HDF5
 
         for (int i = 0; i < index.num_trans; i++) {
           num_unique += collection.counts[i];          
@@ -2090,7 +2199,9 @@ int main(int argc, char *argv[]) {
           // this happens if nothing aligns, then we write an empty bootstrap file
           for (int b = 0; b < opt.bootstrap; b++) {
             if (!opt.plaintext) {
+              #ifdef USE_HDF5
               writer.write_bootstrap(em, b); // em is empty
+              #endif
             } else {
               plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
                     em.target_names_, em.alpha_, em.eff_lens_, index.target_lens_); // re-use empty input
@@ -2116,9 +2227,10 @@ int main(int argc, char *argv[]) {
                 << opt.bootstrap << endl;
               n_threads = opt.bootstrap;
             }
-
+            #ifdef USE_HDF5            
             BootstrapThreadPool pool(opt.threads, seeds, collection.counts, index,
-                collection, em.eff_lens_, opt, writer, fl_means);
+                collection, em.eff_lens_, opt, &writer, fl_means);
+            #endif
           } else {
             for (auto b = 0; b < B; ++b) {
               Bootstrap bs(collection.counts, index, collection, em.eff_lens_, seeds[b], fl_means, opt);
@@ -2126,7 +2238,9 @@ int main(int argc, char *argv[]) {
               auto res = bs.run_em();
 
               if (!opt.plaintext) {
+                #ifdef USE_HDF5
                 writer.write_bootstrap(res, b);
+                #endif
               } else {
                 plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
                     em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_);
@@ -2193,13 +2307,13 @@ int main(int argc, char *argv[]) {
         em.run(10000, 50, true, opt.bias);
 
         std::string call = argv_to_string(argc, argv);
-        H5Writer writer;
+        //H5Writer writer;
 
         if (!opt.plaintext) {
           // setting num_processed to 0 because quant-only is for debugging/special ops
-          writer.init(opt.output + "/abundance.h5", opt.bootstrap, 0, fld, preBias, em.post_bias_, 6,
+          /* writer.init(opt.output + "/abundance.h5", opt.bootstrap, 0, fld, preBias, em.post_bias_, 6,
               index.INDEX_VERSION, call, start_time);
-          writer.write_main(em, index.target_names_, index.target_lens_);
+          writer.write_main(em, index.target_names_, index.target_lens_);*/
         } else {
           plaintext_aux(
               opt.output + "/run_info.json",
@@ -2231,7 +2345,7 @@ int main(int argc, char *argv[]) {
           // this happens if nothing aligns, then we write an empty bootstrap file
           for (int b = 0; b < opt.bootstrap; b++) {
             if (!opt.plaintext) {
-              writer.write_bootstrap(em, b); // em is empty
+              //writer.write_bootstrap(em, b); // em is empty
             } else {
               plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
                     em.target_names_, em.alpha_, em.eff_lens_, index.target_lens_); // re-use empty input
@@ -2258,8 +2372,8 @@ int main(int argc, char *argv[]) {
               n_threads = opt.bootstrap;
             }
 
-            BootstrapThreadPool pool(n_threads, seeds, collection.counts, index,
-                collection, em.eff_lens_, opt, writer, fl_means);
+            /*BootstrapThreadPool pool(n_threads, seeds, collection.counts, index,
+                collection, em.eff_lens_, opt, writer, fl_means);*/
           } else {
             for (auto b = 0; b < B; ++b) {
               Bootstrap bs(collection.counts, index, collection, em.eff_lens_, seeds[b], fl_means, opt);
@@ -2267,7 +2381,7 @@ int main(int argc, char *argv[]) {
               auto res = bs.run_em();
 
               if (!opt.plaintext) {
-                writer.write_bootstrap(res, b);
+                //writer.write_bootstrap(res, b);
               } else {
                 plaintext_writer(opt.output + "/bs_abundance_" + std::to_string(b) + ".tsv",
                     em.target_names_, res.alpha_, em.eff_lens_, index.target_lens_);
@@ -2326,6 +2440,11 @@ int main(int argc, char *argv[]) {
           }
         }
         
+        std::ofstream transout_f((opt.output + "/transcripts.txt"));
+        for (const auto &t : index.target_names_) {
+          transout_f << t << "\n";
+        }
+        transout_f.close();
 
         plaintext_aux(
             opt.output + "/run_info.json",
@@ -2511,12 +2630,13 @@ int main(int argc, char *argv[]) {
         usageh5dump();
         exit(1);
       }
-
+      #ifdef USE_HDF5
       H5Converter h5conv(opt.files[0], opt.output);
       if (!opt.peek) {
         h5conv.write_aux();
         h5conv.convert();
       }
+      #endif
     }  else {
       cerr << "Error: invalid command " << cmd << endl;
       usage();


=====================================
test/Snakefile
=====================================
@@ -5,13 +5,19 @@ INDEX = "{0}.kidx".format(PRE)
 
 rule all:
     input:
-        "kallisto_out/abundance.h5"
+        "quant_out/abundance.tsv",
+        "bus_out/output.bus"
 
 rule index:
     input: FASTA
     output: INDEX
     shell:
-        "kallisto index -i {output} {input}"
+        """
+        kallisto index -i {output} {input}
+
+        mkdir quant_out
+        mkdir bus_out
+        """
 
 rule kallisto_quant:
     input:
@@ -19,15 +25,36 @@ rule kallisto_quant:
         "reads_2.fastq.gz",
         INDEX
     output:
-        "kallisto_out",
-        "kallisto_out/abundance.h5",
-        "kallisto_out/abundance.tsv",
-        "kallisto_out/run_info.json"
+        "quant_out/abundance.tsv",
+        "quant_out/run_info.json"
+    shell:
+        """
+        kallisto quant \
+        -i {INDEX} \
+        -b 30 \
+        -o quant_out/ \
+        --genomebam \
+        --gtf {GTF} \
+        --chromosomes chrom.txt \
+        {input[0]} {input[1]}
+        """
+
+rule kallisto_bus:
+    input:
+        "sc_reads_1.fastq.gz",
+        "sc_reads_2.fastq.gz",
+        INDEX
+    output:
+        "bus_out/matrix.ec",
+        "bus_out/transcripts.txt",
+        "bus_out/output.bus",
+        "bus_out/run_info.json"
     shell:
-        "kallisto quant "
-        "-i {INDEX} "
-        "-b 30 "
-        "-o {output[0]} "
-        "--genomebam --gtf {GTF} "
-        "--chromosomes chrom.txt "
-        "{input[0]} {input[1]}"
+        """
+        kallisto bus \
+        -i {INDEX} \
+        -x 10xv2 \
+        -o bus_out \
+        -t 4 \
+        {input[0]} {input[1]}
+        """


=====================================
test/sc_reads_1.fastq.gz
=====================================
Binary files /dev/null and b/test/sc_reads_1.fastq.gz differ


=====================================
test/sc_reads_2.fastq.gz
=====================================
Binary files /dev/null and b/test/sc_reads_2.fastq.gz differ



View it on GitLab: https://salsa.debian.org/med-team/kallisto/-/commit/53c35c620a0df2920adba9b0cfbd8ea07fcdd7b2

-- 
View it on GitLab: https://salsa.debian.org/med-team/kallisto/-/commit/53c35c620a0df2920adba9b0cfbd8ea07fcdd7b2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200515/80ca58d2/attachment-0001.html>


More information about the debian-med-commit mailing list