[med-svn] [hilive] 02/06: Imported Upstream version 0.3

Andreas Tille tille at debian.org
Fri Nov 25 14:43:10 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository hilive.

commit a0a71e26f71cfa39b56376ba0078776c0a165e58
Author: Andreas Tille <tille at debian.org>
Date:   Fri Nov 25 15:19:07 2016 +0100

    Imported Upstream version 0.3
---
 CMakeLists.txt          |  102 +++--
 CONTRIBUTORS            |    8 +
 LICENSE                 |    4 +-
 README                  |   86 ----
 README.md               |  143 +++++++
 lib/alnblock.cpp        |  359 ----------------
 lib/alnblock.h          |   81 ----
 lib/alnread.cpp         | 1077 +++++++++++++++++++++++------------------------
 lib/alnread.h           |   98 +++--
 lib/alnstream.cpp       |  268 ++++++++----
 lib/alnstream.h         |   10 +-
 lib/argument_parser.cpp |  231 ++++++++++
 lib/argument_parser.h   |    9 +
 lib/config.h.in         |    2 +-
 lib/definitions.h       |  124 +++---
 lib/headers.h           |    2 +
 lib/kindex.cpp          |  294 +++++--------
 lib/kindex.h            |   31 +-
 lib/tools.cpp           |  110 +++--
 lib/tools.h             |   22 +-
 tools/.ann.location     |    1 -
 tools/build_index.cpp   |   25 +-
 tools/hilive.cpp        |  590 ++++++++------------------
 23 files changed, 1655 insertions(+), 2022 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0b2562d..4c6b203 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,16 +1,21 @@
-# Header
+##############
+### Header ###
+
 cmake_minimum_required (VERSION 2.8)
 project (HiLive)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -g -pthread -W -Wall -std=gnu++11 -O2")
+# Set the version number 
+set (HiLive_VERSION_MAJOR 0)
+set (HiLive_VERSION_MINOR 3)
 
 # Set the k-mer length
 set (HiLive_K 15 CACHE INT "Set the k-mer length for index and mapper")
 
-# Set the version number 
-set (HiLive_VERSION_MAJOR 0)
-set (HiLive_VERSION_MINOR 1)
+# Set flags for compilation
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -g -pthread -W -Wall -std=gnu++11 -O0")
 
+
+# configure a header file to pass some of the CMake settings to the source code
 configure_file (
   "${PROJECT_SOURCE_DIR}/lib/config.h.in"
   "${PROJECT_BINARY_DIR}/config.h"  )
@@ -20,38 +25,81 @@ include_directories("${PROJECT_BINARY_DIR}")
 
 
 
-include_directories(lib)
+#############################
+### setup seqan libraries ###
 
-set(LIBS tools
-         alnblock
-         alnread
-	 alnstream
-	 illumina_parsers
-	 kindex
-	 parallel)
+set (CMAKE_MODULE_PATH "/home/schulzeja/masterthesis/tools/seqan-src/util/cmake") # adjust this to [pathToSeqanCode]/util/cmake
+set (SEQAN_INCLUDE_PATH "/home/schulzeja/masterthesis/tools/seqan-src/include/") # adjust this to [pathToSeqanCode]/include
+
+# Configure SeqAn, enabling features for libbz2 and zlib.
+#set (SEQAN_FIND_DEPENDENCIES ZLIB BZip2) # original version from seqan tutorial
+set (SEQAN_FIND_DEPENDENCIES BZip2)
+find_package (SeqAn REQUIRED)
+
+# Add include directories, defines, and flags for SeqAn (and its dependencies).
+include_directories (${SEQAN_INCLUDE_DIRS})
+add_definitions (${SEQAN_DEFINITIONS})
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SEQAN_CXX_FLAGS}")
 
-set(LIB_FILES "")
-foreach (lib ${LIBS})
-    list(APPEND LIB_FILES "lib/${lib}.cpp")
-endforeach()
-add_library(hilivelib ${LIB_FILES})
+
+
+#############################
+### setup Boost libraries ###
 
 set(Boost_USE_STATIC_LIBS ON) 
 set(Boost_USE_MULTITHREADED ON)  
 set(Boost_USE_STATIC_RUNTIME ON) 
-FIND_PACKAGE( Boost COMPONENTS system filesystem program_options REQUIRED )
-INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR} )
+find_package( Boost COMPONENTS system filesystem program_options REQUIRED )
+include_directories( ${Boost_INCLUDE_DIR} )
+
+
+
+############################
+### setup Zlib and lz4 libraries ###
 
 find_package( ZLIB REQUIRED )
+set (LZ4_PATH /usr/local/lib) # possibly adjust this to [pathToLz4]/lib if using downloaded lz4 source code
+include_directories(${LZ4_PATH})
+link_directories(${LZ4_PATH})
+set(CompressionLibs "${ZLIB_LIBRARIES};lz4")
+
+
+#############################################
+### download & build the Samtools library ###
+file(DOWNLOAD "https://sourceforge.net/projects/samtools/files/samtools/1.2/samtools-1.2.tar.bz2/download" ${CMAKE_CURRENT_BINARY_DIR}/samtools-1.2.tar.bz2)
+execute_process(COMMAND tar xjf ${CMAKE_CURRENT_BINARY_DIR}/samtools-1.2.tar.bz2 )
+execute_process(COMMAND make -s -C samtools-1.2 )
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/samtools-1.2 )
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/samtools-1.2/htslib-1.2.1 )
+link_directories( ${CMAKE_CURRENT_BINARY_DIR}/samtools-1.2 ${CMAKE_CURRENT_BINARY_DIR}/samtools-1.2/htslib-1.2.1 )
+
+
+##############################
+### setup HiLive libraries ###
+include_directories("${PROJECT_SOURCE_DIR}/lib")
+# make a list of HiLive libraries
+set (LIB_NAMES tools alnread alnstream illumina_parsers kindex parallel argument_parser)
+set(LIB_LIST "")
+foreach (x ${LIB_NAMES})
+	list(APPEND LIB_LIST "lib/${x}.cpp")
+endforeach()
+add_library(HiLiveLibs ${LIB_LIST})
 
-# manually set the path for lz4 library
-set (LZ4_PATH /usr/local/lib CACHE PATH "Manually set the path to the lz4 library")
-LINK_DIRECTORIES(${LZ4_PATH})
 
+#############################
+### Build the executables ###
 
-# create the executables
-add_executable(hilive tools/hilive.cpp )
-target_link_libraries(hilive hilivelib ${ZLIB_LIBRARIES} ${Boost_LIBRARIES} lz4)
+add_executable (hilive tools/hilive.cpp)
+target_link_libraries (hilive HiLiveLibs ${CompressionLibs} ${Boost_LIBRARIES} ${SEQAN_LIBRARIES} hts)
 
 add_executable(hilive-build tools/build_index.cpp )
-target_link_libraries(hilive-build hilivelib ${ZLIB_LIBRARIES} ${Boost_LIBRARIES} lz4)
+target_link_libraries(hilive-build HiLiveLibs ${CompressionLibs} ${Boost_LIBRARIES} ${SEQAN_LIBRARIES})
+
+
+#####################
+### for debugging ###
+
+#get_cmake_property(_variableNames VARIABLES)
+#foreach (_variableName ${_variableNames})
+	#message(STATUS "${_variableName}=${${_variableName}}")
+#endforeach()
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..a9e3fc2
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,8 @@
+Bernhard Y. Renard <renardb (at) rki.de>
+ * Project head
+
+Martin S. Lindner <martin (at) mail-linder.de>
+ * Project founder, implemented versions 0.1 and 0.2
+
+Jakob M. Schulze <jakobschulze (at) arcor.de>
+ * developed version 0.2 to 0.3
diff --git a/LICENSE b/LICENSE
index 734932c..dfe02f6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2015, Martin S. Lindner, marzin at mail-lindner.de
+Copyright (c) 2015-2016, Martin S. Lindner and the HiLive contributors. See CONTRIBUTORS for more info.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
@@ -9,4 +9,4 @@ Redistribution and use in source and binary forms, with or without modification,
 
 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROF [...]
\ No newline at end of file
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROF [...]
diff --git a/README b/README
deleted file mode 100644
index f7c9a15..0000000
--- a/README
+++ /dev/null
@@ -1,86 +0,0 @@
-HiLive - Live Mapping of Illumina reads
-
-1. Description
---------------
-HiLive is a read mapping tool that maps Illumina HiSeq (or comparable) 
-reads right in the moment when they are produced. This means, read mapping 
-is finished as soon as the sequencer is finished.
-
-
-2. Website
-----------
-
-The HiLive project website is https://sourceforge.net/projects/hilive/
-
-There you can find the latest version of HiLive, source code, documentation,
-and examples.
-
-
-3. Installation
----------------
-
-If the binary distribution from the project website does not work for you,
-you can still compile HiLive from source.
-
-Make sure that the following dependences are installed:
- - cmake (>= 2.8)
- - boost (system, filesystem, program_options)
- - zlib
- - lz4
-
-Check out the source code from the project website. Compile HiLive with:
-  cd [hilive-code]
-  mkdir build && cd build
-  cmake ..
-  make
-
-To compile HiLive with a different k-mer size than k=15 make the following
-adjustment (here: k=10):
-  cmake -DHiLive_K 10 ..
-  make
-
-
-4. Usage
---------
-
-HiLive has two components:
- - hilive-build: build the k-mer index of the reference genome
- - hilive: the read mapper itself
-
-Using hilive-build:
-
-Building a k-mer index from FASTA file input.fa:
-  hilive-build input.fa
-The index is written to input.fa.kix
-
-Building an index from a large reference genome. Here is makes sense to use trimming,
-i.e. removing k-mers from the index that occurr more than 1000 times (for example) in
-the index. The index is written into the file trimmed.kix
-  hilive-build -t 1000 -o trimmed.kix input.fa
-
-Using hilive:
-
-To map reads in a 100bp run using default settings:
-  hilive /path/to/BaseCalls /path/to/index.kix 100
-
-We recommend to adjust the numbers of threads used by HiLive with -n. If possible,
-make use of all threads on the machine!
-
-Please consult the project website for more details on the parameters!
-
-
-5. License
-----------
-
-See the file LICENSE for licensing information.
-
-
-6. Contact
-----------
-
-Please consult the HiLive project website for questions!
-
-If this does not help, please feel free to consult:
-
-Martin S. Lindner <martin (at) mail-lindner.de> (technical contact)
-Bernhard Y. Renard <renardb (at) rki.de> (project head)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6b47601
--- /dev/null
+++ b/README.md
@@ -0,0 +1,143 @@
+HiLive - Live Mapping of Illumina reads
+=======================================
+
+Description
+-----------
+
+HiLive is a read mapping tool that maps Illumina HiSeq (or comparable) 
+reads right in the moment when they are produced. This means, read mapping 
+is finished as soon as the sequencer is finished.
+
+
+Website
+-------
+
+The HiLive project website is https://gitlab.com/SimonHTausch/HiLive
+
+There you can find the latest version of HiLive, source code, documentation,
+and examples.
+
+
+Installation
+------------
+
+If you are using a Debian based system you can directly install the Debian 
+package from the official [Debian repository](https://packages.debian.org/sid/hilive "HiLive Debian package")
+
+If this does not work for you, you can still compile HiLive from source.
+
+Make sure that the following dependencies are installed:
+
+ * cmake (>= 2.8)
+ * boost (system, filesystem, program\_options)
+ * zlib
+ * lz4
+
+If using a local version of lz4 then adjust path in CMakeLists.txt line 61.
+
+---
+
+You also need to download a specific seqan build. Navigate to a folder where
+you want to store the seqan library and run:
+
+    git clone https://github.com/seqan/seqan.git
+    cd seqan && git checkout 9119aa6
+
+---
+
+Check out the HiLive source code from the project website and adjust the paths of the
+seqan module in the file CMakeLists.txt in the hilive folder (line 31 and 32).
+Then, compile HiLive with:
+
+    cd [hilive-code]
+    mkdir build && cd build
+    cmake ..
+    make
+
+---
+
+To compile HiLive with a different k-mer size than k=15 make the following
+adjustment (here: k=10):
+
+    cmake -DHiLive\_K 10 ..
+    make
+
+
+Usage
+-----
+
+HiLive has two components:
+
+ * ``hilive-build``  builds the k-mer index of the reference genome
+ * ``hilive``        the read mapper itself
+
+---
+
+#### Using hilive-build:
+
+Building a k-mer index from FASTA file input.fa to output file input.fa.kix:
+
+    hilive-build input.fa
+
+Building an index from a large reference genome. Here is makes sense to use trimming,
+i.e. removing k-mers from the index that occurr more than 1000 times (for example) in
+the index. The index is written into the file trimmed.kix
+
+    hilive-build -t 1000 -o trimmed.kix input.fa
+
+---
+
+#### Using hilive:
+
+To map reads in a 100bp run using default settings:
+
+    hilive /path/to/BaseCalls /path/to/index.kix 100 /path/to/outputFolder
+
+Hint: to concatenate the resulting sam files of a lane to a single file using samtools you could use the following commands:
+
+    for file in /path/to/outputFolder/L001/s_1_*.sam; do samtools view -bS $file > ${file%.sam}.bam; done
+    samtools cat /path/to/outputFolder/L001/s_1_*.bam > /path/to/outputFolder/L001/allAlignments.bam
+    samtools view -SH /path/to/outputFolder/L001/s_1_1101.sam > /path/to/outputFolder/L001/allAlignments.sam
+    samtools view /path/to/outputFolder/L001/allAlignments.bam >> /path/to/outputFolder/L001/allAlignments.sam
+
+---
+
+#### Demultiplexing:
+
+To map reads from multiplexed sequencing runs, you can provide HiLive with the barcode sequences from your Sample Sheet.
+In default cases, barcode sequences are read after the (first) read, such that demultiplexing is carried out after the mapping is completed. 
+If you use double indexing, please concatenate both indices in the correct order and provide them as one sequence. Please take care that the number of cycles is exactly the read length from your Sample Sheet plus that of your complete barcode sequence. All entered indices must be of the same length. To provide multiple indices, enter the -XXX argument for every barcode or barcode combination, e.g.:
+
+	hilive /path/to/BaseCalls /path/to/index.kix 107 /path/to/outputFolder -b barcode1 -b barcode2 ...
+
+Reads containing one of the given barcodes will be written to the resulting samfiles. The corresponding barcode sequence is stored in the BC-field of the samfile.
+
+Hint: You can write the results of each sample into a separate files using the following command:
+
+	for barcode in barcode1 barcode2 ... ; do grep "^@" allAlignments.sam > allAlignments_$barcode.sam && grep "BC:Z:$barcode" allAlignments.sam >> allAlignments_$barcode.sam; done
+
+---
+
+We recommend to adjust the numbers of threads used by HiLive with -n. If possible,
+make use of all threads on the machine!
+
+Please consult the project website for more details on the parameters!
+
+
+License
+-------
+
+See the file LICENSE for licensing information.
+
+
+Contact
+-------
+
+Please consult the HiLive project website for questions!
+
+If this does not help, please feel free to consult:
+ * [Martin S. Lindner](mailto:martin at mail-lindner.de "Mail Martin S. Lindner") (technical contact)
+ * [Bernhard Y. Renard](mailto:renardb at rki.de "Mail Bernhard Y. Renard") (project head)
+
+
+also see CONTRIBUTORS for a complete list of contributors and their contact information
diff --git a/lib/alnblock.cpp b/lib/alnblock.cpp
deleted file mode 100644
index be4e979..0000000
--- a/lib/alnblock.cpp
+++ /dev/null
@@ -1,359 +0,0 @@
-#include "alnblock.h"
-
-
-uint16_t DatasetAlignment::get_cycle() {
-  return cycle;
-}
-
-uint16_t DatasetAlignment::get_lane() {
-  return lane;
-}
-
-uint16_t DatasetAlignment::get_tile() {
-  return tile;
-}
-
-std::string DatasetAlignment::get_root() {
-  return root;
-}
-  
-CountType DatasetAlignment::get_read_length() {
-  return rlen;
-}
-
-std::string DatasetAlignment::get_bcl_file(uint16_t cl /* = 0 */) {
-  std::ostringstream path_stream;
-  uint16_t thiscycle = cl;
-  if ( cl == 0 ) {
-    // only allow cycle numbers > 0
-    assert(cycle > 0);
-    thiscycle = cycle;
-  }
-  path_stream << root << "/L00" << int(lane) << "/C" << int(thiscycle) << ".1/s_"<< int(lane) <<"_" << int(tile) << ".bcl";
-  return path_stream.str();
-}
-
-
-std::string DatasetAlignment::get_alignment_file() {
-  std::ostringstream path_stream;
-  path_stream << root << "/L00" << int(lane) << "/s_"<< int(lane) << "_" << int(tile) << ".align";
-  return path_stream.str();
-}
-
-
-std::string DatasetAlignment::get_filter_file() {
-  std::ostringstream path_stream;
-  path_stream << root << "/L00" << int(lane) << "/s_"<< int(lane) << "_" << int(tile) << ".filter";
-  return path_stream.str();
-}
-
-
-std::vector<char> DatasetAlignment::serialize() {
-  // calculate total size first
-  unsigned long int total_size = 0;
-
-  total_size += sizeof(uint16_t); // lane
-  total_size += sizeof(uint16_t); // tile
-  total_size += sizeof(CountType); // cycle
-
-  // root directory name
-  uint16_t root_size = root.size();
-  total_size += sizeof(uint16_t);
-  total_size += root_size;
-
-  // number of reads
-  unsigned long int num_reads = reads.size();
-  total_size += sizeof(uint64_t);
-
-  // read length
-  total_size += sizeof(CountType);
-
-  // sum up the sizes of the single alignments
-  for (auto it = reads.begin(); it != reads.end(); ++it) {
-    total_size += sizeof(uint32_t);
-    total_size += (*it).serialize_size();
-  }
-
-  // create the vector to store the data
-  std::vector<char> data (total_size);
-  char* d = data.data();
-
-  // write the lane
-  memcpy(d,&lane,sizeof(uint16_t));
-  d += sizeof(uint16_t);
-
-  // write the tile
-  memcpy(d,&tile,sizeof(uint16_t));
-  d += sizeof(uint16_t);
-
-  // write the cycle
-  memcpy(d,&cycle,sizeof(CountType));
-  d += sizeof(CountType);
-
-  // root directory name
-  memcpy(d,&root_size,sizeof(uint16_t));
-  d += sizeof(uint16_t);
-
-  memcpy(d,root.c_str(),root_size);
-  d += root_size;
-
-  // write the read length
-  memcpy(d,&rlen,sizeof(CountType));
-  d += sizeof(CountType);
-
-  // write the number of reads
-  memcpy(d,&num_reads,sizeof(uint64_t));
-  d += sizeof(uint64_t);
-
-  // write the reads
-  int i = 0;
-  for (auto it = reads.begin(); it != reads.end(); ++it,++i) {
-
-    std::vector<char> sread = it->serialize();
-    uint32_t size = sread.size();
-
-    memcpy(d,&size,sizeof(uint32_t));
-    d += sizeof(uint32_t);
-    memcpy(d,sread.data(),size);
-    d += size;
-    
-  }
-
-  return data;
-}
-
-
-uint64_t DatasetAlignment::serialize_file(std::string f /* = "" */) {
-  std::string fname = get_alignment_file();  
-  if ( f != "" ) {
-    fname = f;
-  }
-
-  // serialize data
-  std::vector<char> sdata = serialize();
-
-  // write data
-  return write_binary_file(fname,sdata);
-
-}
-
-
-
-uint64_t DatasetAlignment::deserialize(char* d) {
-  // the total number of bytes read
-  uint64_t bytes = 0; 
-
-  // read the lane
-  memcpy(&lane,d+bytes,sizeof(uint16_t));
-  bytes += sizeof(uint16_t);
-
-  // read the tile
-  memcpy(&tile,d+bytes,sizeof(uint16_t));
-  bytes += sizeof(uint16_t);
-
-  // read the cycle
-  memcpy(&cycle,d+bytes,sizeof(unsigned short));
-  bytes += sizeof(unsigned short);
-
-  // read the root directory name
-  uint16_t root_size;
-  memcpy(&root_size,d+bytes,sizeof(uint16_t));
-  bytes += sizeof(uint16_t);
-
-  char tmp[root_size+1];
-  memcpy(tmp,d+bytes,root_size);
-  tmp[root_size] = 0; // make the string null-terminated
-  root = tmp;
-  bytes += root_size;
-
-  // read the read length
-  memcpy(&rlen,d+bytes,sizeof(CountType));
-  bytes += sizeof(CountType);
-
-  // read the number of reads
-  unsigned long int num_reads;
-  memcpy(&num_reads,d+bytes,sizeof(uint64_t));
-  bytes += sizeof(uint64_t);
-
-  // reading the reads
-  reads.clear();
-  reads.reserve(num_reads);
-  for (unsigned int i = 0; i < num_reads; ++i) {
-    // get the size of the read
-    uint32_t size;
-    memcpy(&size,d+bytes,sizeof(uint32_t));
-    bytes += sizeof(uint32_t);
-
-    // read as a chunk and deserialize
-    reads.emplace_back(ReadAlignment(rlen));
-    //if (flags[i]) {
-    std::vector<char> sread (size);
-    memcpy(sread.data(),d+bytes,size);
-    reads.back().deserialize(sread.data());
-    //}
-
-    bytes += size;
-  }
-
-  return bytes;
-}
-
-
-unsigned long int DatasetAlignment::deserialize_file(std::string f /* = "" */) {
-  std::string fname = get_alignment_file();
-  if ( f != "" ) {
-    fname = f;
-  }
-
-  // read raw data from file
-  std::vector<char> sdata = read_binary_file(fname);
-  
-  // deserialize data
-  deserialize(sdata.data());
-
-  return sdata.size();
-}
-
-
-void DatasetAlignment::initialize(std::string rt, int ln, int tl, int rl) {
-  // set the basic stuff
-  root = rt;
-  lane = ln;
-  tile = tl;
-  cycle = 0;
-  rlen = rl;
-
-  // get the number of reads in this tile by looking in the first bcl file
-  std::string fname = get_bcl_file(1);
-
-  // extract the number of reads
-  uint32_t num_reads = num_reads_from_bcl(fname);
-
-  // create vector for read alignments
-  reads.clear();
-  reads.reserve(num_reads);
-  for (uint32_t i = 0; i < num_reads; i++){
-    reads.emplace_back(ReadAlignment(rlen));
-  }
-}
-
-
-
-void DatasetAlignment::add_nucleotide(KixRun* index, AlignmentSettings* settings) {
-
-  // TODO: check the prerequisites: bcl file must be available
-
-  // ----------------------------------------
-  // read the BCL file
-  // ----------------------------------------
-  std::string fname = get_bcl_file(cycle+1);
-  
-  std::vector<char> bcl_data = read_binary_file(fname);
-
-  // extract the number of reads from the BCL file
-  uint64_t num_reads;
-  memcpy(&num_reads,bcl_data.data(),4);
-
-  if (num_reads != reads.size()){
-    std::cerr << "Input Error: Number of base calls (" << num_reads << ") does not match the number of reads in this dataset (" << reads.size() << ")." << std::endl;
-    return;
-  }
-  
-  // pointer to the beginning of the base call data block
-  char* base_calls = bcl_data.data() + 4;
-
-  // ----------------------------------------
-  // load the filter flags
-  // ----------------------------------------
-  std::string filter_fname = get_filter_file();
-
-  std::vector<char> filterdata = read_binary_file(filter_fname);
-
-  // extract the number of reads from the filter file
-  unsigned int num_reads_filter;
-  memcpy(&num_reads_filter,filterdata.data()+8,4);
-
-  if (num_reads != num_reads_filter){
-    std::cerr << "Input Error: Number of reads in filter file (" << num_reads_filter << ") does not match the number of reads in the BCL file (" << num_reads << ")." << std::endl;
-    return;
-  }
-
-  // pointer to the beginning of the base call data block
-  char* filter_flags = filterdata.data() + 12;
-
-
-  // ----------------------------------------  
-  // update the reads
-  // ----------------------------------------
-
-  // timing stuff
-  std::chrono::high_resolution_clock::duration d_vec = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_seed = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_add = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_rem = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_sort = std::chrono::high_resolution_clock::duration::zero();
-
-
-  
-  auto rit = reads.begin();
-  for (uint64_t i = 0; i < num_reads; ++i, /*++fit,*/ ++rit) {
-
-    // extend the alignment by one nucleotide, if flags are ok
-    if (*(filter_flags+i) > 0) {
-      std::vector<std::chrono::high_resolution_clock::duration> t = (*rit).extend_alignment(*(base_calls+i), index, settings);
-	d_vec += t[0];
-	d_seed += t[1];
-	d_add += t[2];
-	d_rem += t[3];
-	d_sort += t[4];
-      }else {
-      (*rit).seeds.clear();
-    }
-  }
-  
-  cycle++;
-  
-  std::cout << "Time gather seeds:  " << d_vec.count()/1000 << std::endl;
-  std::cout << "Time extend seeds:  " << d_seed.count()/1000 << std::endl;
-  std::cout << "Time add new seeds: " << d_add.count()/1000 << std::endl;
-  std::cout << "Time filter seeds:  " << d_rem.count()/1000 << std::endl;
-  std::cout << "Time sorting seeds: " << d_sort.count()/1000 << std::endl;
-  
-
-  return;
-}
-
-
-
-void DatasetAlignment::report_alignments_spam(std::string fname, KixRun* idx) {
-
-  // open output file for writing
-  FILE* ofile;
-  ofile = fopen(fname.c_str(), "w");
-  if (!ofile) {
-    std::cerr << "Error opening output file " << fname << std::endl;
-    return;
-  }
-
-  // write out all seeds of each read
-  uint64_t rd = 0;
-  for (auto rit = reads.begin(); rit != reads.end(); ++rit, ++rd) {
-    // compose a read name
-    std::string rname = std::string("read_") + std::to_string(rd);
-
-    for (auto sit = rit->seeds.begin(); sit != rit->seeds.end(); ++sit) {
-      PositionType pos = (*sit)->start_pos;
-      if (pos < 0) {
-	pos = -pos - rlen + K;
-      }
-      std::stringstream out;
-      out << rname << "\t" << idx->get_name((*sit)->gid)  << "\t" << pos  << "\t" << CIGAR(**sit) << std::endl;
-      fwrite(out.str().c_str(), 1, out.str().size(), ofile);
-    }
-
-  }
-
-  fclose(ofile);
-}
-
-
diff --git a/lib/alnblock.h b/lib/alnblock.h
deleted file mode 100644
index 9e06963..0000000
--- a/lib/alnblock.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef ALNBLOCK_H
-#define ALNBLOCK_H
-
-#include "headers.h"
-#include "definitions.h"
-#include "kindex.h"
-#include "tools.h"
-#include "alnread.h"
-
-
-//-------------------------------------------------------------------//
-//------  The Dataset-Alignment class  ------------------------------//
-//-------------------------------------------------------------------//
-
-class DatasetAlignment {
-
-  // dataset information
-  uint16_t cycle;
-  uint16_t lane;
-  uint16_t tile;
-  std::string root; // the BaseCalls directory
-  CountType rlen;
-
- public:
-
-  // getter functions for the dataset information
-  uint16_t get_cycle();
-  uint16_t get_lane();
-  uint16_t get_tile();
-  std::string get_root();
-  CountType get_read_length();
-
-  void set_cycle(uint16_t c) {cycle=c;}
-  
-  // get the path to the bcl file of a given cycle. cl = 0 denotes the current cycle.
-  std::string get_bcl_file(uint16_t cl = 0);
-
-  // get the path to the alignment file. The alignment file is located in
-  // <root>/L00<lane>/s_<lane>_<tile>.align
-  std::string get_alignment_file();
-
-  // get the path to the filter file. The illumina filter information is located in
-  // <root>/L00<lane>/s_<lane>_<tile>.filter
-  std::string get_filter_file();
-
-  // illumina validity flags for all reads
-  /* std::vector<bool> flags; */
-
-  // all read alignments
-  std::vector<ReadAlignment> reads;
-
-  // serialize the object into a char vector
-  std::vector<char> serialize();
-
-  // serialize the object into a binary file
-  uint64_t serialize_file(std::string f  = "");
-
-  // deserialize (read) data from a char vector
-  uint64_t deserialize(char* d);
-
-  // deserialize data from a binary file
-  uint64_t deserialize_file(std::string f = "");
-
-  // initialize the dataset alignment with
-  // 1. BaseCalls directory
-  // 2. Lane number
-  // 3. Tile number
-  // 4. Read length of Read 1
-  void initialize(std::string rt, int ln, int tl, int rl);
-
-  // extend alignment by one nucleotide
-  void add_nucleotide(KixRun* index, AlignmentSettings* settings);
-
-  // report alignments in SPAM format
-  void report_alignments_spam(std::string fname, KixRun* idx);
-
-}; // END class DatasetAlignment 
-
-
-
-#endif /* ALNBLOCK_H */
diff --git a/lib/alnread.cpp b/lib/alnread.cpp
index ed892a4..06ebea8 100644
--- a/lib/alnread.cpp
+++ b/lib/alnread.cpp
@@ -1,19 +1,74 @@
-#include <time.h>
-#include <chrono>
-
 #include "alnread.h"
 
 
-bool seed_compare (Seed i,Seed j) { 
-  return (i.start_pos < j.start_pos); 
-}
-
-bool seed_compare_pos (const USeed & i, const USeed & j) { 
-  return (i->start_pos < j->start_pos); 
-}
+seqan::String<seqan::CigarElement<> > Seed::returnSeqanCigarString() {
+	typedef seqan::String<seqan::CigarElement<> > TSeqanCigarString;
+	TSeqanCigarString seqanCigarString;
+	seqan::CigarElement<> cigarElem;
+	int last_offset = 0;
+	for (CigarVector::const_iterator it = cigar_data.begin(); it != cigar_data.end(); ++it) { // for every element in cigar_data
+		if (it == cigar_data.begin() && (*it).offset==NO_MATCH) { // Alignment begins with NO_MATCH region => Softclipped start 
+			cigarElem.operation='S';
+			cigarElem.count=(*it).length;
+			seqan::appendValue(seqanCigarString, cigarElem);
+			continue;
+		}
+		if (it == --cigar_data.end() && (*it).offset==NO_MATCH) { // Alignment ends with NO_MATCH region => Softclipped end 
+			cigarElem.operation='S';
+			cigarElem.count=(*it).length;
+			seqan::appendValue(seqanCigarString, cigarElem);
+			continue;
+		}
+		if ((*it).offset==NO_MATCH) { // ??? -> Mismatch
+			cigarElem.operation='M';
+			cigarElem.count=(*it).length;
+			seqan::appendValue(seqanCigarString, cigarElem);
+			continue;
+		}
+		if ((*it).offset!=NO_MATCH) { // ??? -> Match
+            if (last_offset == (*it).offset) { // no offset change
+                cigarElem.operation='M';
+                cigarElem.count=(*it).length;
+                seqan::appendValue(seqanCigarString, cigarElem);
+                last_offset = (*it).offset;
+                continue;
+			}
+			if (last_offset < (*it).offset) { // offset gets bigger => reference in alignment is longer than read => deletion in read (and thereby cigar string)
+				cigarElem.operation='D';
+				cigarElem.count=(*it).offset - last_offset;
+				seqan::appendValue(seqanCigarString, cigarElem);
+				cigarElem.operation='M';
+				cigarElem.count=(*it).length;
+				seqan::appendValue(seqanCigarString, cigarElem);
+                last_offset = (*it).offset;
+				continue;
+			}
+			if (last_offset > (*it).offset) { // offset gets smaller => reference in alignment is smaller than read => insertion in read (and thereby cigar string)
+				cigarElem.operation='I';
+				cigarElem.count=last_offset - (*it).offset;
+				seqan::appendValue(seqanCigarString, cigarElem);
+				cigarElem.operation='M';
+				cigarElem.count=(*it).length;
+				seqan::appendValue(seqanCigarString, cigarElem);
+                last_offset = (*it).offset;
+				continue;
+			}
+		}
+	}
 
-bool seed_compare_num_matches (const USeed & i, const USeed & j) { 
-  return (i->num_matches < j->num_matches); 
+    // collapse Neighboring match regions
+	for (unsigned k = 1; k<length(seqanCigarString); k++)
+		if ((seqanCigarString[k-1].operation == 'M') && (seqanCigarString[k].operation == 'M')) {
+			unsigned temp = seqanCigarString[k].count;
+			erase(seqanCigarString, k);
+			seqanCigarString[k-1].count += temp;
+			k--;
+		}
+	
+	// reverse Cigar String if seed maps reverse
+    if (start_pos < 0)
+        seqan::reverse(seqanCigarString);
+    return seqanCigarString;
 }
 
 uint16_t Seed::serialize_size() {
@@ -29,7 +84,6 @@ uint16_t Seed::serialize_size() {
   uint8_t cigar_len = cigar_data.size();
   total_size += sizeof(uint8_t); // the size of the cigar information 
   total_size += cigar_len*(sizeof(CountType) + sizeof(DiffType)); // the cigar information itself
-  
   return total_size;
 }
 
@@ -96,7 +150,6 @@ uint16_t Seed::deserialize(char* d) {
 
   // read the cigar elements
   cigar_data.clear();
-  cigar_data.reserve(cigar_len);
   for (uint8_t i = 0; i < cigar_len; ++i) {
     CigarElement cig;
     memcpy(&(cig.length),d+bytes,sizeof(CountType));
@@ -105,24 +158,27 @@ uint16_t Seed::deserialize(char* d) {
     memcpy(&(cig.offset),d+bytes,sizeof(DiffType));
     bytes += sizeof(DiffType);
 
-    cigar_data.push_back(cig);
+    cigar_data.emplace_back(cig);
   }
 
   return bytes;  
 }
 
 
-
-
+void ReadAlignment::set_rlen(CountType r) {
+    rlen = r;
+}
 
 
 uint64_t ReadAlignment::serialize_size() {
   // calculate total size first
   uint64_t total_size = 0;
   total_size += 1; // the flag
-  total_size += sizeof(HashIntoType); // the last k-mer
   total_size += sizeof(CountType); // the cycle number
   total_size += sizeof(CountType); // the last_invalid cycle
+  
+  total_size += sizeof(CountType); // the sequence length
+  total_size += sequenceStoreVector.size()*(sizeof(uint8_t)); // the sequence information itself
 
   // total number of seeds
   total_size += sizeof(uint32_t);
@@ -135,6 +191,7 @@ uint64_t ReadAlignment::serialize_size() {
   return total_size;
 }
 
+
 std::vector<char> ReadAlignment::serialize() {
   // get the total size of the serialization
   uint64_t total_size = serialize_size();
@@ -148,10 +205,6 @@ std::vector<char> ReadAlignment::serialize() {
   memcpy(d,&flags,1);
   d++;
 
-  // write the last k-mer
-  memcpy(d,&last_kmer,sizeof(HashIntoType));
-  d += sizeof(HashIntoType);
-
   // write the cycle
   memcpy(d,&cycle,sizeof(CountType));
   d += sizeof(CountType);
@@ -160,6 +213,16 @@ std::vector<char> ReadAlignment::serialize() {
   memcpy(d,&last_invalid,sizeof(CountType));
   d += sizeof(CountType);
 
+  // write the sequence length
+  memcpy(d,&sequenceLen,sizeof(CountType));
+  d += sizeof(CountType);
+
+  // write the sequenceStoreVector
+  for (auto it = sequenceStoreVector.begin(); it != sequenceStoreVector.end(); ++it) {
+    memcpy(d,&(*it),sizeof(uint8_t));
+    d += sizeof(uint8_t);
+  }
+
   // write the number of seeds
   memcpy(d,&num_seeds,sizeof(uint32_t));
   d += sizeof(uint32_t);
@@ -189,10 +252,6 @@ uint64_t ReadAlignment::deserialize(char* d) {
   memcpy(&flags,d,1);
   bytes++;
 
-  // read the last k-mer
-  memcpy(&last_kmer,d+bytes,sizeof(HashIntoType));
-  bytes += sizeof(HashIntoType);
-
   // read the cycle
   memcpy(&cycle,d+bytes,sizeof(CountType));
   bytes += sizeof(CountType);
@@ -201,6 +260,22 @@ uint64_t ReadAlignment::deserialize(char* d) {
   memcpy(&last_invalid,d+bytes,sizeof(CountType));
   bytes += sizeof(CountType);
 
+  // read the sequence length
+  sequenceLen = 0;
+  memcpy(&sequenceLen,d+bytes,sizeof(CountType));
+  bytes += sizeof(CountType);
+
+  // read the sequence
+  unsigned seqVec_size = (unsigned) std::ceil((float) sequenceLen / 4.0);
+  sequenceStoreVector.clear();
+  sequenceStoreVector.reserve(seqVec_size);
+  for (unsigned i = 0; i <seqVec_size; ++i) {
+    uint8_t elem;
+    memcpy(&(elem),d+bytes,sizeof(uint8_t));
+    bytes += sizeof(uint8_t);
+    sequenceStoreVector.push_back(elem);
+  }
+
   // read the number of seeds
   uint32_t num_seeds = 0;
   memcpy(&num_seeds,d+bytes,sizeof(uint32_t));
@@ -208,7 +283,6 @@ uint64_t ReadAlignment::deserialize(char* d) {
 
   // read the seeds
   seeds.clear();
-  seeds.reserve(num_seeds);
   for (uint32_t i = 0; i < num_seeds; ++i) {
     uint16_t seed_size = 0;
     memcpy(&seed_size,d+bytes,sizeof(uint16_t));
@@ -221,6 +295,8 @@ uint64_t ReadAlignment::deserialize(char* d) {
     USeed s (new Seed);
     s->deserialize(seed_data.data());
 
+    // insert into sorted list. The data read in is already sorted!!!
+    // therefore I only push back
     seeds.push_back(std::move(s));
   }
 
@@ -228,596 +304,487 @@ uint64_t ReadAlignment::deserialize(char* d) {
 }
 
 
+// convert and return sequence of the seed as string
+std::string ReadAlignment::getSequenceString(AlignmentSettings & settings) {
+    std::string seq = "";
+    // append one 4 base block at a time
+    for (unsigned i = 0; i<sequenceStoreVector.size(); i++)
+        seq.append(unhash(sequenceStoreVector[i], 4)); // 4 because 4 nucleotides fit in one element of sequenceStoreVector, namely a uint8_t
 
-// Create new seeds from a list of kmer positions and add to current seeds
-void ReadAlignment::add_new_seeds(GenomePosListType& pos) {
+    // delete last few bases, because sequence is only len long
+    for (unsigned i = sequenceLen; i<4*sequenceStoreVector.size(); ++i)
+        seq.pop_back();
 
-  seeds.reserve(seeds.size() + pos.size());
-  
-  for(GenomePosListIt it = pos.begin(); it != pos.end(); ++it) {
-
-    USeed s (new Seed);
+    // return sequence without barcode
+    return seq.substr(0,settings.seqlen);
+}
 
-    s->gid = it->gid;
-    s->start_pos = it->pos - (cycle-K);
-    s->num_matches = 1;
-    s->cigar_data.clear();
-    if (cycle-K > 0)
-      s->cigar_data.emplace_back(cycle-K,NO_MATCH);
-    s->cigar_data.emplace_back(K,0);
 
-    seeds.push_back(std::move(s));
+// convert and return barcode
+std::string ReadAlignment::getBarcodeString(AlignmentSettings & settings) {
+    std::string seq = "";
+    // append one 4 base block at a time
+    for (unsigned i = 0; i<sequenceStoreVector.size(); i++)
+        seq.append(unhash(sequenceStoreVector[i], 4)); // 4 because 4 nucleotides fit in one element of sequenceStoreVector, namely a uint8_t
 
-  }
+    // delete last few bases, because sequence is only len long
+    for (unsigned i = sequenceLen; i<4*sequenceStoreVector.size(); ++i)
+        seq.pop_back();
 
+    // return only barcode of sequence
+    return seq.substr(settings.seqlen);
 }
 
 
+// append one nucleotide to sequenceStoreVector
+void ReadAlignment::appendNucleotideToSequenceStoreVector(char nuc) {
+    // check if all bits from sequenceStoreVector are used
+    if (sequenceLen % 4 == 0) { // yes, all used => new 8 Bit block needs to be created
+        uint8_t newBlock = twobit_repr(nuc);
+        newBlock = newBlock << 6; // 'empty' bits are on the right side
+        sequenceStoreVector.push_back(newBlock);
+        ++sequenceLen;
+    }
+    else { // not all bits are used. At least the last two are unoccupied
+        // append new 2 bits to the right of the old bits
+        if (sequenceLen % 4 == 1) // 6 bits empty
+            sequenceStoreVector.back() = sequenceStoreVector.back() >> 4;
+        if (sequenceLen % 4 == 2) // 4 bits empty
+            sequenceStoreVector.back() = sequenceStoreVector.back() >> 2;
+        sequenceStoreVector.back() = sequenceStoreVector.back() | twobit_repr(nuc);
+        ++sequenceLen;
+        // shift used bits until they are left aligned
+        if (sequenceLen % 4 == 2) // 4 bits empty
+            sequenceStoreVector.back() = sequenceStoreVector.back() << 4;
+        if (sequenceLen % 4 == 3) // 2 bits empty
+            sequenceStoreVector.back() = sequenceStoreVector.back() << 2;
+    }
+}
 
-std::vector<std::chrono::high_resolution_clock::duration> ReadAlignment::extend_alignment(char bc, KixRun* index, AlignmentSettings* settings) {
-  // time stuff
-  std::chrono::high_resolution_clock::duration d_vec = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_seed = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_add = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_rem = std::chrono::high_resolution_clock::duration::zero();
-  std::chrono::high_resolution_clock::duration d_sort = std::chrono::high_resolution_clock::duration::zero();
-
-  // q-gram-lemma: seeds added after last_new_seed have at least more than min_errors errors
-  CountType last_new_seed = K*(settings->min_errors+1); 
 
-  // move to the next cycle
-  cycle += 1;
+// helper function for add_new_seeds
+bool seed_compare_pos (const USeed & i, const USeed & j) { 
+  return (i->start_pos < j->start_pos); 
+}
 
-  // update the last k-mer
-  uint8_t qual = ((bc >> 2) & 63); // get bits 3-8
-  if ( (bc == 0) || (qual < settings->min_qual) ){ // no call if all 0 bits or quality below threshold 
-    last_kmer = 0;
-    last_invalid = cycle;
-  }
-  else{
-    // update the current k-mer of the read using the basecall
-    update_kmer(last_kmer, bc);
-  }
 
-  // update the alignments
-  if ( last_invalid+K-1 < cycle ) {
+// Create new seeds from a list of kmer positions and add to current seeds
+void ReadAlignment::add_new_seeds(GenomePosListType& pos, std::vector<bool> & posWasUsedForExtension, AlignmentSettings & settings) {
+  SeedVecIt sit = seeds.begin();
 
-    std::chrono::high_resolution_clock::time_point tv1 = std::chrono::high_resolution_clock::now();
-    // get all occurrences of last_kmer (fwd & rc) from index
-    GenomePosListType pos = index->retrieve_positions(last_kmer);
+  for(GenomePosListIt it = pos.begin(); it != pos.end(); ++it) {
+    if (posWasUsedForExtension[it - pos.begin()]) // if current reference hit was used at least once for seed extension
+        continue;
+    USeed s (new Seed);
 
-    // pos MUST be sorted. However, pos is sorted as long as the index is sorted (should be by default)
-    if (settings->sort_positions) {
-      std::sort(pos.begin(),pos.end(),gp_compare);
+    s->gid = it->gid;
+    s->start_pos = it->pos - (cycle-settings.kmer_span);
+    s->num_matches = K_HiLive;
+    s->cigar_data.clear();
+    if (cycle-settings.kmer_span > 0)
+      s->cigar_data.emplace_back(cycle-settings.kmer_span,NO_MATCH);
+
+    // set correct matches and mismatches depending on kmer mask
+    std::vector<unsigned> gapVec = settings.kmer_gaps;
+    gapVec.push_back(settings.kmer_span+1);
+    unsigned lastProcessedGapPosition = 0;
+    for (unsigned gapIndex = 0, nextGap; gapIndex < gapVec.size(); ++gapIndex) {
+        nextGap = gapVec[gapIndex];
+        if (nextGap - lastProcessedGapPosition - 1 > 0)
+            s->cigar_data.emplace_back(nextGap - lastProcessedGapPosition - 1,0);
+        lastProcessedGapPosition = nextGap;
+        s->cigar_data.emplace_back(1,NO_MATCH);
     }
-    
-    std::chrono::high_resolution_clock::time_point tv2 = std::chrono::high_resolution_clock::now();
-    d_vec = tv2 - tv1;
+    s->cigar_data.pop_back(); // remove tailing NO_MATCH from the for-loop
 
+    // insert seed into sorted list of seeds
+    while (sit != seeds.end() && seed_compare_pos(*sit, s)) // if seed exists and elem has larger starting position than (*sit)
+        ++sit;
+    sit = seeds.insert(sit, std::move(s));
+  }
+}
 
-    std::chrono::high_resolution_clock::time_point tl1 = std::chrono::high_resolution_clock::now();
 
-    CountType max_num_matches = 0;
-    
-    // check if the current k-mer was trimmed in the index
-    if ( (pos.size() == 1) && (pos[0].gid == TRIMMED) ) {
-      // clear the pos list such that nothing bad happens in the next steps
-      pos.clear();
-      
-      // pretend that all existing seeds could be extended
-      for (auto sd = seeds.begin() ; sd!=seeds.end(); ++sd ) {
-	(*sd)->cigar_data.back().length += 1;
-	if ((*sd)->cigar_data.back().offset != NO_MATCH) {
-	  (*sd)->num_matches += 1;
-	  max_num_matches = std::max(max_num_matches, (*sd)->num_matches);
-	}
-      }
-    }
-    // not trimmed in the index --> try to extend existing seeds
-    else {
-      // find support for each candidate: iterate over seed candidates and positions simultaneously
-      auto cPos1 = pos.begin(); // beginning of the sliding window [cPos1, cPos2)
-      auto cPos2 = pos.begin(); // end of the sliding window
-      auto wSeed1 = seeds.begin(); // beginning of the sliding window in the seeds [wSeed1, wSeed2)
-      auto wSeed2 = seeds.begin(); // end of the sliding window
-      
-      for (auto cSeed = seeds.begin(); cSeed!=seeds.end(); ++cSeed ) {
-	PositionType seed_pos = (*cSeed)->start_pos + cycle -K;
-	
-	// adjust the window in the position list
-	while( (cPos1!=pos.end()) && (cPos1->pos < seed_pos - settings->window) ){
-	  ++cPos1;
-	}
-	while( (cPos2!=pos.end()) && (cPos2->pos < seed_pos + settings->window) ){
-	  ++cPos2;
-	}
-	
-	// adjust the neighboring seeds window
-	while( (wSeed1!=seeds.end()) && ((*wSeed1)->start_pos < (*cSeed)->start_pos - 2*settings->window) ){
-	  ++wSeed1;
-	}
-	while( (wSeed2!=seeds.end()) && ((*wSeed2)->start_pos < (*cSeed)->start_pos + 2*settings->window) ){
-	  ++wSeed2;
-	}
-	
-	// search all positions in the window for the best matching extension of the seed
-	DiffType best_distance = settings->window+1;  // set larger than search window
-	GenomePosListIt best_match = cPos2; // set behind the last element of the window
-	for(GenomePosListIt win=cPos1; win!=cPos2; ++win){
-	  if (win->gid == (*cSeed)->gid){
-	    int dist = seed_pos - win->pos; 
-	    if ((best_match==cPos2)||(abs(dist) < abs(best_distance))) {
-	      best_match = win;
-	      best_distance = dist;
-	    }
-	  }
-	}
-	
-	// assign best position to the seed
-	if (best_match != cPos2) {
-	  // find the best seed from the perspective of best_match
-	  DiffType best_sdist = 2*settings->window+1;  // set larger than search window
-	  auto best_seed = wSeed2;   // set behind the last element of the window
-	  for(auto win=wSeed1; win!=wSeed2; ++win){
-	    if ((*win)->gid == best_match->gid){
-	      int dist = best_match->pos - ((*win)->start_pos+cycle-K); 
-	      if ((best_seed==wSeed2)||(abs(dist) < abs(best_sdist))) {
-		best_seed = win;
-		best_sdist = dist;
-	      }
-	    }
-	  }
-	  
-	  if (best_seed == cSeed) {
-	    //(*cSeed)->matches.push_back(best_distance);
-	    (*cSeed)->num_matches += 1;
-	    if ( (*cSeed)->cigar_data.back().offset == NO_MATCH ) {
-	      // start a new match area. 1 matching k-mer = K matches
-	      (*cSeed)->cigar_data.emplace_back(K,best_distance);
-	    }
-	    else {
-	      // continue existing match area
-	      (*cSeed)->cigar_data.back().length += 1;
-	    }
-	    max_num_matches = std::max(max_num_matches, (*cSeed)->num_matches);
-	    // remove assigned position from the list
-	    if(best_match == cPos1){
-	      cPos1 = pos.erase(best_match);
-	    }
-	    else {
-	      pos.erase(best_match);
-	    }
-	    --cPos2; // 
-	  }
-	  else{
-	    // best match has another favourite
-	    if ( (*cSeed)->cigar_data.back().offset == NO_MATCH ) {
-	      // continue existing mismatch area
-	      (*cSeed)->cigar_data.back().length += 1;
-	    }
-	    else {
-	      // start new mismatch area
-	      (*cSeed)->cigar_data.emplace_back(1,NO_MATCH);
-	    }
-	    //(*cSeed)->matches.push_back(NO_MATCH);
-	  }
+// Extend or create a placeholder seed for read with only trimmed matches
+void ReadAlignment::create_placeholder_seed(AlignmentSettings & settings) {
+	// if no seed exist, create one (only in the correct cycle and if parameter is true).
+	if(seeds.size()==0){
+		USeed s (new Seed);
+		s->gid = TRIMMED;
+		s->num_matches = 1;
+		s->cigar_data.clear();
+		s->cigar_data.emplace_back(settings.kmer_span,TRIMMED_MATCH);
+
+        // this is always sorted, because there is only one seed now
+        seeds.push_back(std::move(s));
 	}
-	else{
-	  // no position found to extend the current seed
-	  if ( (*cSeed)->cigar_data.back().offset == NO_MATCH ) {
-	    // continue existing mismatch area
-	    (*cSeed)->cigar_data.back().length += 1;
-	  }
-	  else {
-	    // start new mismatch area
-	    (*cSeed)->cigar_data.emplace_back(1,NO_MATCH);
-	  }
-	  //(*cSeed)->matches.push_back(NO_MATCH);
+}
+
+
+// convert a placeholder seed to a set of normal seeds
+void ReadAlignment::convertPlaceholder(GenomePosListType& pos, AlignmentSettings & settings){
+	if(seeds.size()!=1)
+		return;
+	auto s = seeds.begin();
+	auto matches = (*s)->num_matches;
+	if(cycle - settings.kmer_span > 0 && (*s)->gid==TRIMMED){
+		seeds.clear();
+		for(auto p = pos.begin(); p != pos.end(); ++p){
+			USeed newSeed (new Seed());
+		    newSeed->gid = p->gid;
+		    newSeed->start_pos = p->pos - (cycle - settings.kmer_span);
+		    newSeed->num_matches = matches;
+		    newSeed->cigar_data.clear();
+            newSeed->cigar_data.emplace_back(cycle - settings.kmer_span,NO_MATCH);
+
+            // set correct matches and mismatches depending on kmer mask
+            std::vector<unsigned> gapVec = settings.kmer_gaps;
+            gapVec.push_back(settings.kmer_span); // without +1 because the last match will be inserted in extend_alignment
+            unsigned lastProcessedGapPosition = 0;
+            for (unsigned gapIndex = 0, nextGap; gapIndex < gapVec.size(); ++gapIndex) {
+                nextGap = gapVec[gapIndex];
+                if (nextGap - lastProcessedGapPosition - 1 > 0)
+                    newSeed->cigar_data.emplace_back(nextGap - lastProcessedGapPosition - 1,0);
+                lastProcessedGapPosition = nextGap;
+                newSeed->cigar_data.emplace_back(1,NO_MATCH);
+            }
+            newSeed->cigar_data.pop_back(); // remove tailing NO_MATCH from the for-loop
+
+            // insert into sorted list. The list is empty and pos vector is already sorted.
+            // therefore I only push back
+            seeds.push_back(std::move(newSeed));
+		}
 	}
-      } // END: for(seeds...)
-    } // END: not trimmed
-    std::chrono::high_resolution_clock::time_point tl2 = std::chrono::high_resolution_clock::now();
-    d_seed = tl2 - tl1;
+}
 
 
-    std::chrono::high_resolution_clock::time_point ta1 = std::chrono::high_resolution_clock::now();
-    // set the last_new_seed cycle according to the mapping mode
-    if ( settings->best_hit_mode ) {
-      last_new_seed = std::min(CountType(rlen-max_num_matches+1),last_new_seed);
+// filter seeds based on filtering mode and q gram lemma. Also calls add_new_seeds.
+void ReadAlignment::filterAndCreateNewSeeds(AlignmentSettings & settings, GenomePosListType & pos, std::vector<bool> & posWasUsedForExtension) {
+    // TODO this function misbehaves when using demultiplexing. Reads might be preferred, kicking others only to get discarded due to barcode in the end.
+    // compute possible remaining matches
+    int possibleRemainingMatches = settings.seqlen - cycle + settings.kmer_span - 1;
+    if (cycle == settings.seqlen)
+        possibleRemainingMatches = 0;
+
+    // compute threshold for number of matching bases a seed must already have to have a chance of getting printed in the end
+    // adjust threshold via adapted q-gram-lemma
+    int num_matches_threshold = settings.seqlen - settings.min_errors*(settings.kmer_span) - possibleRemainingMatches;
+
+    if ((settings.all_best_hit_mode) || (settings.any_best_hit_mode)) {
+        int max_num_matches = 0;
+        for(SeedVecIt sd = seeds.begin() ; sd !=seeds.end(); ++sd)
+            max_num_matches = std::max(max_num_matches, (int) (*sd)->num_matches);
+        num_matches_threshold = std::max(num_matches_threshold, (int) max_num_matches - possibleRemainingMatches);
     }
-    if ( settings->best_n_mode ) {
-      //TODO: last_new_seed = std::min(CountType(rlen-max_num_matches+1),last_new_seed);
+    if (settings.all_best_n_scores_mode && settings.best_n > 0) {
+        std::vector<CountType> num_matches_vector;
+        for (SeedVecIt it=seeds.begin(); it!=seeds.end();++it)
+            num_matches_vector.push_back((*it)->num_matches);
+        std::sort(num_matches_vector.begin(), num_matches_vector.end(), std::greater<CountType>()); // sort in decreasing order
+        unsigned numberOfUniques = std::unique(num_matches_vector.begin(), num_matches_vector.end()) - num_matches_vector.begin(); // uniques are in front of vector now
+        if (numberOfUniques > settings.best_n) {
+            CountType nth_best_num_matches = num_matches_vector[settings.best_n - 1];
+            num_matches_threshold = std::max(num_matches_threshold, (int) nth_best_num_matches - possibleRemainingMatches);
+        }
     }
+    // else nothing has to be done for all-hit-mode
+    
 
-    // create new seed candidates for each k-mer match that was not used to extend a seed
-    if ( cycle <= last_new_seed ) {
+    // delete all seeds which do not reach threshold
+    SeedVecIt it=seeds.begin();
+    bool foundHit = false;
+    while ( it!=seeds.end()) {
+        if ((*it)->num_matches < num_matches_threshold)
+            it = seeds.erase(it);
+        else if (settings.discard_ohw && (cycle>settings.start_ohw) && ((*it)->num_matches <= K_HiLive)) // remove one-hit-wonders
+            it = seeds.erase(it);
+        else if (cycle == settings.seqlen && settings.any_best_hit_mode && foundHit)
+            it = seeds.erase(it);
+        else
+            ++it;
+        foundHit = true;
+    }
 
-      add_new_seeds(pos);
+    // if a new seed would have a chance then create it
+    if (num_matches_threshold <= 1) // new seed would have 1 more match than expected by possibleRemainingMatches
+        add_new_seeds(pos, posWasUsedForExtension, settings);
+}
 
-      if (pos.begin() != pos.end()) {
-	max_num_matches = std::max(max_num_matches, (CountType)1);
-      }
 
+// updates cigar_data accordingly to a new matching kmer
+void ReadAlignment::addMatchingKmer(USeed & s, DiffType offset, AlignmentSettings & settings) {
+
+    s->cigar_data.emplace_back(1,offset);
+    s->num_matches += 1;
+
+    ////////////////////////////////////////////////////////////
+    //// determine last occurred offset ////////////////////////
+    int last_offset = 0;
+    if ((*prev(prev(s->cigar_data.end()))).offset != NO_MATCH) // if last CigarElement is Match
+        last_offset = (*prev(prev(s->cigar_data.end()))).offset;
+    else // then the one before has to be a match
+        last_offset = (*prev(prev(prev(s->cigar_data.end())))).offset;
+    assert(last_offset != NO_MATCH);
+
+    ////////////////////////////////////////////////////////////
+    //// split last kmer-span bases in single CigarElements ////
+    CigarVector::iterator it = --(s->cigar_data.end());
+    unsigned summedLength = 1;
+    while (summedLength < settings.kmer_span) {
+        ++summedLength;
+        --it;
+        if ((*it).length > 1) {
+            CigarElement elem1(1, (*it).offset);
+            CigarElement elem2((*it).length-1, (*it).offset);
+            s->cigar_data.insert(it, elem2);
+            s->cigar_data.insert(it, elem1);
+            it = s->cigar_data.erase(it); // points now at element after former it
+            --it; // points now at elem1
+        }
+    }
+    // it points now at kmer-spans-th last element of cigar_data list
+
+    ////////////////////////////////////////////////////////////
+    //// remove possibly inserted bases ////////////////////////
+    if (offset < last_offset) {
+        --it;
+        for (int i=0; i<last_offset-offset; ++i) {
+            assert((*it).offset == NO_MATCH);
+            if ((*it).length > 1)
+                --((*it).length);
+            else
+                it = --(s->cigar_data.erase(it));
+        }
+        ++it; // now again points at kmer-spans-th last element of cigar_data list
     }
-    std::chrono::high_resolution_clock::time_point ta2 = std::chrono::high_resolution_clock::now();
-    d_add = ta2 - ta1;
 
-    
-    std::chrono::high_resolution_clock::time_point tr1 = std::chrono::high_resolution_clock::now();
-
-    // get the num_matches of the N'th best seed
-    // seed list gets partially sorted (and sorted back later). Noone ever said it will be fast...
-    CountType nth_best_match = 0;
-    if ( settings->best_n_mode && (settings->best_n > 0) && (seeds.size() >= settings->best_n) ) {
-      std::nth_element(seeds.begin(), seeds.begin()+seeds.size()-settings->best_n , seeds.end(), seed_compare_num_matches);
-      nth_best_match = (*(seeds.begin()+seeds.size()-settings->best_n))->num_matches;
+    ////////////////////////////////////////////////////////////
+    //// set matched bases as match ////////////////////////////
+
+    CigarVector::iterator it_save = it; // for joining
+    unsigned positionInKmer = 1;
+    while (it != s->cigar_data.end()) {
+        // if positionInKmer is not in kmer_gaps and cigar element was match
+        if ((*it).offset == NO_MATCH && std::find(settings.kmer_gaps.begin(), settings.kmer_gaps.end(), positionInKmer) == settings.kmer_gaps.end()) {
+            (*it).offset = offset;
+            s->num_matches += 1;
+        }
+        ++it;
+        ++positionInKmer;
     }
 
-    // define a lambda function implementing all discard criteria.
-    // all criteria must be fulfilled to keep the seed. Go from stronger to weaker criteria.
-    auto crit = [&] (USeed & s) {
-      // don't filter seeds that were extended in this cycle
-      if (s->cigar_data.back().offset != NO_MATCH) {
-	return false;
-      }
-
-      // 1. remove one-hit-wonders
-      if ( settings->discard_ohw && (cycle>settings->start_ohw)&&(s->num_matches<=1) ) {
-	return true;
-      }
-
-      // 2. remove according to q-gram lemma
-      if ( cycle > (K*(settings->min_errors+1) + s->num_matches) ) {
-	return true;
-      }
-
-      // 3. remove according Best-Hit-criteria
-      if ( settings->best_hit_mode ) {
-	if (cycle > (rlen - max_num_matches + s->num_matches)) {
-	  return true;
-	}
-      }
-      // 4. remove according Best-N-criteria
-      else if ( settings->best_n_mode ) {
-	if ( cycle > (rlen - nth_best_match + s->num_matches) ) {
-	  return true;
-	}
-      }
+    ////////////////////////////////////////////////////////////
+    //// join last kmer-span+1 CigarElements ///////////////////
+    it = it_save;
+    if (it == s->cigar_data.begin())
+        ++it;
+    while (it != s->cigar_data.end()) {
+        if ((*prev(it)).offset == (*it).offset) {
+            (*prev(it)).length += 1;
+            it = s->cigar_data.erase(it);
+        }
+        else
+            ++it;
+    }
+}
 
-      // get the number of mismatches since the last match
-      int since_last_match = s->cigar_data.back().length;
 
-      // 5. heuristic criterium
-      if ((since_last_match >= K+10)&&(s->num_matches < (int)(std::sqrt(cycle-K+1)))){
-	return true;
-      }
+// Extend an existing seed (to be precise, extend the CIGAR vector / data).
+bool ReadAlignment::extendSeed(USeed & s, DiffType offset, AlignmentSettings & settings){
+	// Extend CIGAR for TRIMMED k-mers
+	if ((offset == TRIMMED_MATCH) || ((offset == NO_MATCH) && (s->cigar_data.back().offset == TRIMMED_MATCH))){
+		s->cigar_data.back().length += 1;
+		if (s->cigar_data.back().offset != NO_MATCH){
+			s->num_matches += 1;
+			return true;
+		}
+		return false;
+	}
+    
+    // if last region was match region
+	if(s->cigar_data.back().offset != NO_MATCH){ 
+        // MATCH -> NO_MATCH
+        if(offset == NO_MATCH){
+            // new mismatch region
+            s->cigar_data.emplace_back(1,NO_MATCH);
+            return false;
+        }
+        // MATCH -> MATCH  Extend match region with new match.
+        else {
+            int offset_change = offset - s->cigar_data.rbegin()->offset;
+            // without any mismatch in between there cannot be a valid offset_change other than 0
+            if (offset_change!=0) {
+                // new mismatch region
+                s->cigar_data.emplace_back(1,NO_MATCH);
+                return false;
+            }
+            // else: extend current match region
+            addMatchingKmer(s, offset, settings);
+            return true;
+        }
+    }
+    // so last region was mismatch region
+    else {
+        // NO_MATCH -> NO_MATCH  Extend mismatch region with new mismatch.
+        if(offset == NO_MATCH){
+            s->cigar_data.back().length += 1;
+            return false;
+        }
+        // NO_MATCH -> MATCH
+        else {
+            assert((++(s->cigar_data.rbegin()))->offset != TRIMMED_MATCH && (++(s->cigar_data.rbegin()))->offset != NO_MATCH);
+            int offset_change = offset - (++(s->cigar_data.rbegin()))->offset;
+            // If there is an offset change, I need to have seen the appropriate mismatches before.
+            if ( offset_change == 0
+            || ((offset_change < 0) && (s->cigar_data.back().length >= -offset_change + settings.kmer_span - 1)) // Insertion in read
+            || ((offset_change > 0) && (s->cigar_data.back().length >= settings.kmer_span - 1 )) ) { // Deletion in read
+                addMatchingKmer(s, offset, settings);
+                return true;
+            } else {
+                // criteria not fulfilled: extend existing mismatch area.
+                s->cigar_data.back().length += 1;
+                return false;
+            }
+        }
+    }
+}
 
-      return false;
-    };
 
-    seeds.erase(std::remove_if(seeds.begin(),seeds.end(),crit) , seeds.end());
-    std::chrono::high_resolution_clock::time_point tr2 = std::chrono::high_resolution_clock::now();
-    d_rem = tr2 - tr1;
 
+void ReadAlignment::extend_alignment(char bc, KixRun* index, AlignmentSettings* settings) {
 
+	// move to the next cycle
+	cycle += 1;
 
-    std::chrono::high_resolution_clock::time_point tso1 = std::chrono::high_resolution_clock::now();
-    std::sort(seeds.begin(), seeds.end(), seed_compare_pos);
-    std::chrono::high_resolution_clock::time_point tso2 = std::chrono::high_resolution_clock::now();
-    d_sort = tso2 - tso1;
+	// update the last k-mer
+	uint8_t qual = ((bc >> 2) & 63); // get bits 3-8
+	if ( (bc == 0) || (qual < settings->min_qual) ){ // no call if all 0 bits or quality below threshold
+		last_invalid = cycle; // TODO append an N as basecall? Could be a bad idea
+	}
+    unsigned mask = 3;
+    if (flags != 0) // if read is valid
+        appendNucleotideToSequenceStoreVector(revtwobit_repr(bc & mask)); // get the nucleotide as an actual character, disregarding the quality
+
+    // do not update the alignments when reading barcode
+    if (cycle > settings->seqlen) {
+        if (cycle == rlen)
+            if (std::find(settings->barcodeVector.begin(), settings->barcodeVector.end(), getBarcodeString(*settings)) == settings->barcodeVector.end())
+                this->disable(*settings);
+        return;
+    }
 
-  } // END: if ( last_invalid+K-1 < cycle ) ...
-  else {
-    
-    // write a NO_MATCH if cycle > K-1
-    if ( cycle > K-1 ) {
-      for (auto sit = seeds.begin(); sit != seeds.end(); ++sit){
-	//(*sit)->matches.push_back(NO_MATCH);
-	if ( (*sit)->cigar_data.back().offset == NO_MATCH ) {
-	  // continue existing mismatch area
-	  (*sit)->cigar_data.back().length += 1;
+    // do not update the alignments when reading the first kmer_span-1 cycles
+    if (cycle < settings->kmer_span)
+        return;
+
+	// update the alignments
+	GenomePosListType pos;
+    std::vector<bool> posWasUsedForExtension;
+	// if last kmer of read is not valid
+	if (!( last_invalid+settings->kmer_span-1 < cycle )) {
+		// write a NO_MATCH
+        for (auto sit = seeds.begin(); sit != seeds.end(); ++sit)
+            extendSeed(*sit, NO_MATCH, *settings);
 	}
 	else {
-	  // start new mismatch area
-	  (*sit)->cigar_data.emplace_back(1,NO_MATCH);
-	}
-      }
-    }
-  
-  }
 
-  std::vector<std::chrono::high_resolution_clock::duration> dur;
-  dur.push_back(d_vec);
-  dur.push_back(d_seed);
-  dur.push_back(d_add);
-  dur.push_back(d_rem);
-  dur.push_back(d_sort);
-  return dur;
+		// get all occurrences of last_kmer (fwd & rc) from index
+        const std::string sequence = getSequenceString(*settings);
+        std::string::const_iterator it_lastKmer = sequence.end() - settings->kmer_span;
+        HashIntoType last_kmer = 0;
+        hash_fw(it_lastKmer, sequence.end(), last_kmer, *settings);
+		pos = index->retrieve_positions(sequence.substr(sequence.length()-settings->kmer_span), *settings);
+        posWasUsedForExtension.resize(pos.size(), false);
+
+		// check if the current k-mer was trimmed in the index
+		if ( (pos.size() == 1) && ((*pos.begin()).gid == TRIMMED) ) {
+			if(seeds.size()==0 && cycle==settings->kmer_span ){	// Create placeholder seed if first k-mer is trimmed
+				create_placeholder_seed(*settings);
+			} else
+				// pretend that all existing seeds could be extended
+				for(auto sd = seeds.begin() ; sd !=seeds.end(); ++sd)
+					extendSeed(*sd, TRIMMED_MATCH, *settings);
+			// clear the pos list so nothing bad happens in the next steps
+			pos.clear();
+		}
+
+		// not trimmed in the index --> try to extend existing seeds
+		else {
+			// Convert placeholder seeds (if any) to "real" seeds
+			if(seeds.size()==1 && (*(seeds.begin()))->gid==TRIMMED){
+				convertPlaceholder(pos, *settings);
+			}
+			// find support for each candidate: iterate over seed candidates and positions simultaneously
+			auto cPos1 = pos.begin(), cPos2 = pos.begin(); // sliding window [cPos1, cPos2)
+      
+			for (auto cSeed = seeds.begin(); cSeed!=seeds.end(); ++cSeed ) {
+				PositionType seed_pos = (*cSeed)->start_pos + cycle -settings->kmer_span;
+
+                // adjust the window in the position list
+                while( (cPos1!=pos.end()) && (cPos1->pos < seed_pos - settings->window) )
+                    ++cPos1;
+                while( (cPos2!=pos.end()) && (cPos2->pos <= seed_pos + settings->window) )
+                    ++cPos2;
+        
+				// search all positions in the window for the best matching extension of the seed
+				DiffType best_offset = settings->window+1;  // set larger than search window
+				GenomePosListIt best_match = cPos2; // set behind the last element of the window
+				for(GenomePosListIt kmerHitIt = cPos1; kmerHitIt!=cPos2; ++kmerHitIt)
+					if (kmerHitIt->gid == (*cSeed)->gid){
+					    // if offset gets bigger => Deletion in read
+						int offset = kmerHitIt->pos - seed_pos;
+						if ((best_match==cPos2)||(abs(offset) < abs(best_offset))) {
+							best_match = kmerHitIt;
+							best_offset = offset;
+						}
+					}
+        
+				// check if a best match was found for this seed
+				if (best_match != cPos2) {
+						if(extendSeed(*cSeed, best_offset, *settings))
+		            		// if pos was used as match, mark it so that later it does not get converted into a new seed
+		            		posWasUsedForExtension[best_match-pos.begin()] = true;
+                }
+				else{
+					// no position found to extend the current seed
+					extendSeed(*cSeed, NO_MATCH, *settings);
+				}
+			} // END: for(seeds...)
+		} // END: not trimmed
+	} // END: if last kmer is valid
+
+    filterAndCreateNewSeeds(*settings, pos, posWasUsedForExtension);
+
+	return;
 }
 
 
 
 // disable this alignment, i.e. delete all seeds and set the last_invalid indicator to the
 // end of the read. --> This read will not be aligned and consumes almost no space.
-void ReadAlignment::disable() {
-  last_invalid = rlen;
+void ReadAlignment::disable(AlignmentSettings & settings) {
+  last_invalid = settings.seqlen;
   seeds.clear();
   flags = 0;
-}
-
-
-// generate the SAM flags for a seed
-uint32_t ReadAlignment::get_SAM_flags(uint32_t sd) {
-  if ( sd < seeds.size() ) {
-    uint32_t flags = 0;
-    // flag for the reverse strand alignment
-    if (seeds[sd]->start_pos < 0)
-      flags += 16;
-
-    // Primary/Secondary alignment
-    // the "Primary" alignment is the alignment with the highest number of matches
-    // in case of a tie, the left-most alignment wins (including negative positions)
-    bool primary = true;
-    for(uint32_t i = 0; i < seeds.size(); i++) {
-      if(seeds[i]->num_matches > seeds[sd]->num_matches || (seeds[i]->num_matches == seeds[sd]->num_matches && i < sd)) {
-	primary = false;
-      }
-    }
-    if (!primary)
-      flags += 256;
-    return flags;
-  }
-  else {
-    throw std::length_error(std::string("Error generating SAM flags: requested alignment ID (")+std::to_string(sd)+std::string(") exceeds number of alignments (")+std::to_string(seeds.size())+std::string(")!"));
-  }
+  sequenceLen=0;
+  sequenceStoreVector.clear();
 }
 
 
 // obtain start position of a seed according to SAM (leftmost) 
-PositionType ReadAlignment::get_SAM_start_pos(uint32_t sd) {
-  PositionType pos = seeds[sd]->start_pos;
+PositionType ReadAlignment::get_SAM_start_pos(USeed & sd, AlignmentSettings & settings) {
+  PositionType pos = sd->start_pos;
   if (pos < 0) {
-    pos = -pos - rlen + K;
+    if (sd->cigar_data.back().offset == NO_MATCH)
+        pos = -pos - settings.seqlen + settings.kmer_span - (++sd->cigar_data.rbegin())->offset;
+    else
+        pos = -pos - settings.seqlen + settings.kmer_span - (sd->cigar_data.rbegin())->offset;
   }
   return pos;
 }
 
 
-// calculate a quality score according to SAM
-uint16_t ReadAlignment::get_SAM_quality(uint32_t sd) {
-  CountType best_match = 0;
-  for (uint32_t s = 0; s < seeds.size(); s++) {
-    best_match = std::max(seeds[s]->num_matches, best_match);
-  }
-  double total_weighted_matches = 0.;
-  for (uint32_t s = 0; s < seeds.size(); s++) {
-    total_weighted_matches = std::pow(4., double(seeds[s]->num_matches - best_match));
-  }
-
-  double prob = std::pow(4., double(seeds[sd]->num_matches - best_match)) / total_weighted_matches;
-  int score = -10 * std::log10(prob);
-  if (score > 255 || score < 0)
-    score = 255;
-  return score;
-}
-
-
-
-
-
-
-
-
-
-
-/* Calculate the mapping quality for all alignments of the read based on the other alignments
-   and the number of matching positions.
- */
-
+// Calculate the mapping quality for all alignments of the read based on the other alignments and the number of matching positions.
 int16_t MAPQ(const SeedVec &sv){
   return sv.size();
   }
-
-
-/* Helper function; checks if the position at 'sit' is sane. only applicable for reads matching 
-   to the backwards genome sequence (forward sanity check is MUCH easier) */
-bool is_sane(std::vector<DiffType>::iterator sit,
-	     const std::vector<DiffType> &matches){
-  DiffType dist = 0;
-  DiffType this_offset = *sit;
-  // find the OFFSET of the next matching region (after the next NOMATCH region)
-  bool passed_NOMATCH = false;
-  while(!((*sit!=NO_MATCH)&&passed_NOMATCH) && (sit!=matches.end())){
-    if(*sit == NO_MATCH) {passed_NOMATCH = true;}
-    ++sit;
-    ++dist;
-  }
-  
-  if (sit == matches.end()) {
-    return true;
-  }
-  else {
-    int offset_change = *sit - this_offset;
-    return ((dist-K+1 - offset_change*(offset_change>0)) > 0);
-  }
-}
-
-/* Construct a CIGAR string from a seed */
-std::string CIGAR(const Seed &seed){
-  // Alignments are always reported in forward direction wrt the reference sequence --> take care
-  bool fw = (seed.start_pos >= 0); // Read was mapped to the forward sequence
-  CigarVector cig;
-  if (fw) {
-    cig = seed.cigar_data;
-  }
-  else {
-    // need to adjust the offsets --> find last offset != NO_MATCH
-    auto rit = seed.cigar_data.rbegin();
-    while((rit != seed.cigar_data.rend()) && (rit->offset == NO_MATCH)) {
-      ++rit;
-    }
-    int loffset;
-    if (rit != seed.cigar_data.rend())
-      loffset = rit->offset;
-    else
-      loffset = 0;
-    cig.reserve(seed.cigar_data.size());
-    // reverse direction of 'cigar_data' and adjust offsets
-    for(rit = seed.cigar_data.rbegin(); rit != seed.cigar_data.rend(); ++rit) {
-      if (rit->offset != NO_MATCH)
-	cig.emplace_back(rit->length,loffset - rit->offset);
-      else
-	cig.emplace_back(rit->length,NO_MATCH);
-    }
-  }
-  
-  // Now, calculate the CIGAR string
-  std::string cigar;
-  if (cig.size() > 0) {
-    int previous_offset = 0; // the offset in the previous matching region
-    for (uint32_t i = 0; i < cig.size(); i++) {
-      // extend the cigar string accordingly
-      if ( cig[i].offset != NO_MATCH ) {
-	cigar.append(std::to_string(cig[i].length)+std::string("M"));
-	previous_offset = cig[i].offset;
-      }
-      else {
-	int offset_change = 0;
-	int mm_len = cig[i].length -K+1;
-	// correct the length of the mismatch region if there are insane positions
-	if ( i+1 < cig.size() ) {
-	  offset_change = cig[i+1].offset - previous_offset;
-	  if (offset_change > mm_len) {
-	    // offset change cannot be larger than the mismatch area
-	    cig[i].length += (offset_change - mm_len);
-	    cig[i+1].length -= (offset_change - mm_len);
-	    mm_len = cig[i].length -K+1;
-	  }
-	}
-
-	if ( offset_change == 0 ) {
-	  // Sequence mismatch(es)
-	  if ( i > 0 && i < cig.size()-1 ) {
-	    cigar.append(std::to_string(cig[i].length -K+1)+std::string("X"));
-	  }
-	  else {
-	    // report as soft-clipped at the beginning/end of the read
-	    cigar.append(std::to_string(cig[i].length)+std::string("S"));
-	  }
-	}
-	else {
-	  // Number of insertions
-	  int insertions = cig[i].length -K+1;
-	  // Number of deletions
-	  int deletions = - (offset_change - insertions);
-	  // append to cigar string
-	  if ( insertions > 0 )
-	    cigar.append(std::to_string(insertions)+std::string("I"));
-	  if ( deletions > 0 )
-	    cigar.append(std::to_string(deletions)+std::string("D"));
-	}
-      }
-    }
-  }
-
-  /*int count = K-1; // number of matches/mismatches in a row
-    bool previous_match = true; // was the previous k-mer a match? -> recognize changes
-    // change initial values if the read starts with a mismatch
-    if(*(matches.begin()) == NO_MATCH) {
-      count = K-1; 
-      previous_match = false; 
-    }
-    int previous_offset = 0; // the offset in the previous matching region
-    int pos = 0; // current position in the read
-    for(auto it = matches.begin(); it != matches.end(); ++it){
-      if ((*it) != NO_MATCH) {
-	// The k-mer could be matched
-	if (previous_match) {
-	  // Check the sanity of this match, if found on backward seq
-	  bool sane = true;
-	  if (!fw){
-	    sane = is_sane(it, matches);
-	  }
-	  if (sane) {
-	    // Continue a match
-	    count += 1;
-	    previous_offset = (*it);
-	  }
-	  else {
-	    // end the match here, treat this position as mismatch
-	    //std::cout << "INSANE position!!!" << std::endl;
-	    // start a new mismatch area here. Don't care about countinuing, this happens below
-	    // Finish a match region
-	    cigar.append(std::to_string(count)+std::string("M"));
-	    // Start a new mismatch region
-	    count = 1; 
-	    previous_match = false;
-	  }
-	}
-	// the previous k-mer was not matched
-	else {
-	  int offset_change = (*it) - previous_offset;
-	  // check the sanity of the match
-	  bool sane = ((count-K+1 - offset_change*(offset_change>0)) >= 0) && fw;
-	  // add the criterium for backward matches here (as above)
-	  sane = sane || (!fw && is_sane(it, matches));
-	  if (sane) {
-	    // Finish a mismatch region
-	    if (offset_change == 0) {
-	      if(count > K-1){
-		// Sequence mismatch(es)
-		cigar.append(std::to_string(count-K+1)+std::string("X"));
-	      }
-	    }
-	    else {
-	      // cure only on forward mapping reads
-	      if (count > K-1){
-		// Insertion compared to reference
-		cigar.append(std::to_string(count-K+1)+std::string("I"));
-	      }
-	      if (count-offset_change > K-1){ 
-		// In this case, a string of length count-offset_change was deleted from reference
-		cigar.append(std::to_string(count-offset_change-K+1)+std::string("D"));
-	      }
-	    }
-	    // Start a new match region. The first k-mer represents K matching positions
-	    count = K;
-	    previous_match = true;
-	    previous_offset = (*it);
-	  }
-	  else {
-	    // insane match --> treat as mismatch
-	    count += 1;
-	    previous_match = false;
-	  }
-	}
-      }
-      else {
-	// The k-mer could NOT be matched
-	if (!previous_match) {
-	  // Continue a mismatch
-	  count += 1;
-	} else {
-	  // Finish a match region
-	  cigar.append(std::to_string(count)+std::string("M"));
-	  // Start a new mismatch region
-	  count = 1; 
-	  previous_match = false;
-	}
-      }
-      // increase the position counter
-      pos++;
-    }
-    // Complete the cigar string
-    if (previous_match) {
-      // Fill cigar string with matches
-      cigar.append(std::to_string(count)+std::string("M"));
-    }
-    else {
-      // Fill cigar string with mismatches
-      // TODO: probably report this as "soft-clipping"
-      cigar.append(std::to_string(count)+std::string("X"));
-    }
-    }*/
-  return cigar;
-}
diff --git a/lib/alnread.h b/lib/alnread.h
index e16bede..040a213 100644
--- a/lib/alnread.h
+++ b/lib/alnread.h
@@ -1,42 +1,37 @@
 #ifndef ALNREAD_H
 #define ALNREAD_H
 
-#include <chrono>
-
 #include "headers.h"
 #include "definitions.h"
 #include "kindex.h"
 #include "tools.h"
+#include <seqan/basic.h>
+#include <seqan/bam_io.h>
 
 
 //-------------------------------------------------------------------//
 //------  The Seed data structure  ----------------------------------//
 //-------------------------------------------------------------------//
 
-struct CigarElement {
-  CountType length;
-  DiffType offset;
-  CigarElement (CountType l, DiffType o): length(l), offset(o) {};
-  CigarElement (): length(0), offset(NO_MATCH) {};
-};
-
-
-typedef std::vector<CigarElement> CigarVector;
 
 // a Seed stores the alignment of a read to a target genome
 struct Seed {
+
   // internal sequence ID of taget genome
   GenomeIdType gid;
+
   // (estimated) start position of the read on the target
   PositionType start_pos;
-  // number of matches
+
+  // number of matching bases
   CountType num_matches;
 
-  // list of all observed matches
-  //std::vector<DiffType> matches;
   // Information about matches/mismatches (similar to CIGAR). The last element is the current one
   CigarVector cigar_data;
 
+  // return Seqans String of CigarElement
+  seqan::String<seqan::CigarElement<> > returnSeqanCigarString();
+
   // get the size of the serialized object
   uint16_t serialize_size();
 
@@ -47,15 +42,12 @@ struct Seed {
   uint16_t deserialize(char* d);
 };
 
-typedef std::unique_ptr<Seed> USeed;
 
+typedef std::unique_ptr<Seed> USeed;
 // compare function to sort Seed objects by position
-bool seed_compare (Seed i,Seed j);
 bool seed_compare_pos (const USeed & i, const USeed & j);
-bool seed_compare_num_matches (const USeed & i, const USeed & j);
-
-// std::vector of Seed pointers is much faster
-typedef std::vector<USeed> SeedVec;
+// std::list of Seed pointers is much faster
+typedef std::list<USeed> SeedVec;
 // a SeedVec Iterator
 typedef SeedVec::iterator SeedVecIt;
 
@@ -65,22 +57,39 @@ typedef SeedVec::iterator SeedVecIt;
 //------  The Read-Alignment class  ---------------------------------//
 //-------------------------------------------------------------------//
 
+
 class ReadAlignment {
+
  private:
   // read length
-  const CountType rlen;
+  CountType rlen;
+
+  // sequence of the read so far, saved as vector<uint8_t> so interpretation is not that trivial. Contains barcode
+  CountType sequenceLen=0;
+  std::vector<uint8_t> sequenceStoreVector;
+
+  // Extend or create a placeholder seed for read with only trimmed matches
+  void create_placeholder_seed(AlignmentSettings & settings);
+
+  // convert a placeholder seed to a set of normal seeds
+  void convertPlaceholder(GenomePosListType& pos, AlignmentSettings & settings);
 
   // Create new seeds from a list of kmer positions and add to current seeds
-  void add_new_seeds(GenomePosListType& pos);
+  void add_new_seeds(GenomePosListType& pos, std::vector<bool> & posWasUsedForExtension, AlignmentSettings & settings);
 
+  // filter seeds based on filtering mode and q gram lemma. Also calls add_new_seeds.
+  void filterAndCreateNewSeeds(AlignmentSettings & settings, GenomePosListType & pos, std::vector<bool> & posWasUsedForExtension);
+
+  // updates cigar_data accordingly to a new matching kmer
+  void addMatchingKmer(USeed & s, DiffType offset, AlignmentSettings & settings);
+
+  // Extend an existing CIGAR string for a seed based on a new basecall. return false if last CIGAR element after extension is mismatch area (NO_MATCH), true otherwise.
+  bool extendSeed(USeed & s, DiffType offset, AlignmentSettings & settings);
 
  public: // have everything public until the apropriate access functions are available
 
   // Flags for this read; 1 = read is valid (illumina flag)
-  unsigned char flags;
-
-  // the k-mer value observed in the last cycle
-  HashIntoType last_kmer;
+  unsigned char flags = 1;
 
   // the last invalid cycle
   CountType last_invalid;
@@ -91,9 +100,12 @@ class ReadAlignment {
   // a list of all found seeds
   SeedVec seeds;
 
-  // Create a new read alignment given a certain read length
-  ReadAlignment(CountType rl): rlen(rl), last_kmer(0), last_invalid(0), cycle(0) {seeds.clear();};
+  // max number of matches for this read
+  CountType max_num_matches;
 
+  // set the read_length
+  void set_rlen(CountType r);
+  
   // get the size of the serialized object
   uint64_t serialize_size();
 
@@ -103,33 +115,31 @@ class ReadAlignment {
   // deserialize (read) data from a char vector
   uint64_t deserialize(char* d);
 
+  // convert and return sequence of the read as string (without barcode)
+  std::string getSequenceString(AlignmentSettings & settings);
+
+  // convert and return sequence of the barcode
+  std::string getBarcodeString(AlignmentSettings & settings);
+
+  // append one nucleotide to sequenceStoreVector
+  void appendNucleotideToSequenceStoreVector(char nuc);
+
   // extend the alignment by one basecall using reference database index
-  std::vector<std::chrono::high_resolution_clock::duration> extend_alignment(char bc, KixRun* index, AlignmentSettings* settings);
+  void extend_alignment(char bc, KixRun* index, AlignmentSettings* settings);
 
   // disable this alignment
-  void disable();
-
-  // generate the SAM flags for a seed
-  uint32_t get_SAM_flags(uint32_t sd);
+  void disable(AlignmentSettings & settings);
 
   // obtain start position of a seed according to SAM (leftmost) 
-  PositionType get_SAM_start_pos(uint32_t sd);
-
-  // calculate a quality score according to SAM
-  uint16_t get_SAM_quality(uint32_t sd);
+  PositionType get_SAM_start_pos(USeed & sd, AlignmentSettings & settings);
 
 }; // END class ReadAlignment 
 
 
 
-
-
-
+//-------------------------------------------------------------------//
 //------  Other helper functions  -----------------------------------//
-//CountType num_matches(const std::vector<DiffType> &matches);
+//-------------------------------------------------------------------//
 int16_t MAPQ(const SeedVec &sv);
-std::string CIGAR(const Seed &seed);
-
-
 
 #endif /* ALNREAD_H */
diff --git a/lib/alnstream.cpp b/lib/alnstream.cpp
index 0f969ba..88dc524 100644
--- a/lib/alnstream.cpp
+++ b/lib/alnstream.cpp
@@ -23,7 +23,7 @@ uint64_t oAlnStream::lz4write(const char* source, uint64_t size) {
   // write the block size
   if ( !fwrite(&compressed_size, 1, sizeof(uint32_t), ofile) )
     throw std::runtime_error("Error writing block size to file while compressing data with LZ4.");
-  
+
   // write the data chunk
   if ( !fwrite(buf.data(), 1, compressed_size, ofile) )
     throw std::runtime_error("Error writing data to file while compressing with LZ4.");
@@ -117,7 +117,7 @@ uint64_t oAlnStream::open(std::string fname) {
 
 // writes a read alignment to the output Alignment file. 
 // Buffering is handled internally
-uint64_t oAlnStream::write_alignment(ReadAlignment & al) {
+uint64_t oAlnStream::write_alignment(ReadAlignment * al) {
   if ( (!ofile && (format == 0 || format == 2)) || (ozfile == Z_NULL && format == 1) ){
     throw std::runtime_error("Could not write alignment to file. File handle not valid.");
   }
@@ -125,7 +125,7 @@ uint64_t oAlnStream::write_alignment(ReadAlignment & al) {
     throw std::length_error("Could not write alignment to file. All alignments were already written.");
   }
 
-  std::vector<char> data = al.serialize();
+  std::vector<char> data = al->serialize();
   uint64_t al_size = data.size();
 
   // first, write the size of the serialized alignment (uint32_t = 4 bytes)
@@ -286,10 +286,11 @@ uint64_t iAlnStream::open(std::string fname) {
       // root directory name size
       bytes += fread(&root_size,sizeof(uint16_t),1,ifile);
       // root name
-      char tmp[root_size+1];
+      char * tmp = new char[root_size+1];
       bytes += fread(tmp,1,root_size,ifile);
       tmp[root_size] = 0; // make the string null-terminated
       root = tmp;
+      delete tmp;
       // read the read length
       bytes += fread(&rlen,sizeof(CountType),1,ifile);
       // read the number of reads
@@ -307,10 +308,11 @@ uint64_t iAlnStream::open(std::string fname) {
       // root directory name size
       bytes += gzread(izfile,&root_size,sizeof(uint16_t));
       // root name
-      char tmp[root_size+1];
+      char * tmp = new char[root_size+1];
       bytes += gzread(izfile,tmp,root_size);
       tmp[root_size] = 0; // make the string null-terminated
       root = tmp;
+      delete tmp;
       // read the read length
       bytes += gzread(izfile,&rlen,sizeof(CountType));
       // read the number of reads
@@ -324,7 +326,7 @@ uint64_t iAlnStream::open(std::string fname) {
 
 
 
-ReadAlignment iAlnStream::get_alignment() {
+ReadAlignment* iAlnStream::get_alignment() {
 
   if ( (format==0 && !ifile) || (format==1 && izfile == Z_NULL) ){
     throw std::runtime_error("Could not load alignment from file. File handle not valid.");
@@ -400,8 +402,9 @@ ReadAlignment iAlnStream::get_alignment() {
   }
 
   // finally, deserialize the alignment
-  ReadAlignment ra (rlen);
-  ra.deserialize(data.data());
+  ReadAlignment* ra = new ReadAlignment();
+  ra->set_rlen(rlen);
+  ra->deserialize(data.data());
   
   num_loaded++;
 
@@ -436,6 +439,18 @@ bool iAlnStream::close() {
 //------  The StreamedAlignment class  ------------------------------//
 //-------------------------------------------------------------------//
 
+StreamedAlignment& StreamedAlignment::operator=(const StreamedAlignment& other) {
+  if(&other == this)
+    return *this;
+  
+  lane = other.lane;
+  tile = other.tile;
+  root = other.root;
+  rlen = other.rlen;
+  
+  return *this;
+}
+
 std::string StreamedAlignment::get_bcl_file(uint16_t cycle) {
   std::ostringstream path_stream;
 
@@ -477,13 +492,11 @@ void StreamedAlignment::create_directories(AlignmentSettings* settings) {
 
   boost::filesystem::create_directories(path_stream.str());
 
-  if (settings->sam_dir != "") {
-    std::ostringstream sam_stream;
-    sam_stream << settings->sam_dir;
-    sam_stream << "/L00" << lane;
-    
-    boost::filesystem::create_directories(sam_stream.str());
-  }
+  std::ostringstream sam_stream;
+  sam_stream << settings->out_dir;
+  sam_stream << "/L00" << lane;
+  
+  boost::filesystem::create_directories(sam_stream.str());
 }
 
 
@@ -503,8 +516,10 @@ void StreamedAlignment::init_alignment(AlignmentSettings* settings) {
 
   // write empty read alignments for each read
   for (uint32_t i = 0; i < num_reads; ++i) {
-    ReadAlignment ra (rlen);
+    ReadAlignment * ra = new ReadAlignment();
+    ra->set_rlen(rlen);
     output.write_alignment(ra);
+    delete ra;
   }
 
   if(!output.close()) {
@@ -520,7 +535,6 @@ uint64_t StreamedAlignment::extend_alignment(uint16_t cycle, KixRun* index, Alig
   // 1. Open the input file
   //-----------------------
   std::string in_fname = get_alignment_file(cycle-1, settings->temp_dir);
-  std::string out_fname = get_alignment_file(cycle, settings->temp_dir);
   std::string bcl_fname = get_bcl_file(cycle);
   std::string filter_fname = get_filter_file();
 
@@ -537,6 +551,7 @@ uint64_t StreamedAlignment::extend_alignment(uint16_t cycle, KixRun* index, Alig
 
   // 2. Open output stream
   //----------------------------------------------------------
+  std::string out_fname = get_alignment_file(cycle, settings->temp_dir);
   oAlnStream output (lane, tile, cycle, root, rlen, num_reads, settings->block_size, settings->compression_format);
   output.open(out_fname);
 
@@ -570,25 +585,26 @@ uint64_t StreamedAlignment::extend_alignment(uint16_t cycle, KixRun* index, Alig
   //-------------------------------------------------
   uint64_t num_seeds = 0;
   for (uint64_t i = 0; i < num_reads; ++i) {
-    ReadAlignment ra = input.get_alignment();
+    ReadAlignment* ra = input.get_alignment();
     if (filters.size() > 0 && filters.has_next()) {
       // filter file was found -> apply filter
       if(filters.next()) {
-	ra.extend_alignment(basecalls.next(), index, settings);
-	num_seeds += ra.seeds.size();
+        ra->extend_alignment(basecalls.next(), index, settings);
+        num_seeds += ra->seeds.size();
       }
       else {
-	basecalls.next();
-	ra.disable();
+        basecalls.next();
+        ra->disable(*settings);
       }
     }
     // filter file was not found -> treat every alignment as valid
     else {
-      ra.extend_alignment(basecalls.next(), index, settings);
-      num_seeds += ra.seeds.size();
+      ra->extend_alignment(basecalls.next(), index, settings);
+      num_seeds += ra->seeds.size();
     }
 
     output.write_alignment(ra);
+    delete ra;
   }
 
   // 6. Close files
@@ -606,10 +622,6 @@ uint64_t StreamedAlignment::extend_alignment(uint16_t cycle, KixRun* index, Alig
   return num_seeds;
 }
 
-
-
-
-
 //-------------------------------------------------------------------//
 //------  Streamed SAM generation -----------------------------------//
 //-------------------------------------------------------------------//
@@ -617,32 +629,21 @@ uint64_t StreamedAlignment::extend_alignment(uint16_t cycle, KixRun* index, Alig
 uint64_t alignments_to_sam(uint16_t ln, uint16_t tl, std::string rt, CountType rl, KixRun* index, AlignmentSettings* settings) {
   // set the file names
   std::string temp;
-  if (settings->temp_dir == "") {
+  if (settings->temp_dir == "")
     temp = rt;
-  }
-  else {
+  else
     temp = settings->temp_dir;
-  }
-  std::string sam_dir;
-  if (settings->sam_dir == "") {
-    sam_dir = temp;
-  }
-  else {
-    sam_dir = settings->sam_dir;
-  }
+
+  std::string sam_dir = settings->out_dir;
   std::string filter_fname = filter_name(rt, ln, tl);
-  std::string position_fname = position_name(rt, ln, tl);
   std::string alignment_fname = alignment_name(temp, ln, tl, rl);
-  std::string sam_fname = sam_tile_name(sam_dir, ln, tl);
+  std::string sam_fname = sam_tile_name(sam_dir, ln, tl, settings->write_bam);
 
   // check if files exist
-  if ( !file_exists(alignment_fname) ) {
+  if ( !file_exists(alignment_fname) )
     throw std::runtime_error(std::string("Could not create SAM file. Alignment file not found: ")+ alignment_fname);
-  }
-  if ( !file_exists(filter_fname) ) {
+  if ( !file_exists(filter_fname) )
     std::cerr << "Could not find .filter file: " <<  filter_fname << std::endl;
-  }
-
 
   // open the alignment file
   iAlnStream input ( settings->block_size, settings->compression_format );
@@ -663,59 +664,145 @@ uint64_t alignments_to_sam(uint16_t ln, uint16_t tl, std::string rt, CountType r
   }
 
 
-  // open the positions file, if applicable
- 
+  /////////////////////////////////////////////
+  // generate sam file
 
-  // open the SAM file and write header
-  std::ofstream samfile;
-  samfile.open(sam_fname);
-  samfile << index->get_SAM_header();
+  // set BamFileOut object
+  seqan::CharString samFileName = sam_fname;
+  seqan::BamFileOut samFileOut(seqan::toCString(samFileName));
+  seqan::StringSet<seqan::CharString> referenceNames;
+  for (unsigned i=0; i<seqan::length(index->seq_names); i++) {
+      seqan::appendValue(referenceNames, index->seq_names[i]);
+  }
+  seqan::NameStoreCache<seqan::StringSet<seqan::CharString> > referenceNamesCache(referenceNames);
+  seqan::BamIOContext<seqan::StringSet<seqan::CharString> > bamIOContext(referenceNames, referenceNamesCache);
+  samFileOut.context = bamIOContext;
 
+
+  /////////////////
+  // set SAM header
+  seqan::BamHeaderRecord headerRecord;
+  
+  // @HD header.
+  seqan::clear(headerRecord);
+  headerRecord.type = seqan::BAM_HEADER_FIRST;
+  seqan::resize(headerRecord.tags, 2);
+  headerRecord.tags[0].i1 = "VN";
+  headerRecord.tags[0].i2 = "1.5";
+  headerRecord.tags[1].i1 = "GO";
+  headerRecord.tags[1].i2 = "query";
+  seqan::writeHeader(samFileOut, headerRecord);
+  
+  // @SQ header.
+  std::stringstream ss;
+  for ( uint64_t i = 0; i < index->seq_names.size(); i++  ) {
+    ss.str(std::string()); // clear string stream
+    seqan::clear(headerRecord);
+    headerRecord.type = seqan::BAM_HEADER_REFERENCE;
+    seqan::resize(headerRecord.tags, 2);
+  	headerRecord.tags[0].i1 = "SN";
+  	headerRecord.tags[0].i2 = index->seq_names[i];
+
+  	headerRecord.tags[1].i1 = "LN";
+  	ss << index->seq_lengths[i];
+  	headerRecord.tags[1].i2 = ss.str();
+
+    seqan::writeHeader(samFileOut, headerRecord);
+  }
+  
+  // @PG header.
+  seqan::clear(headerRecord);
+  headerRecord.type = seqan::BAM_HEADER_PROGRAM;
+  seqan::resize(headerRecord.tags, 3);
+  headerRecord.tags[0].i1 = "ID";
+  headerRecord.tags[0].i2 = "hilive";
+  headerRecord.tags[1].i1 = "PN";
+  headerRecord.tags[1].i2 = "HiLive";
+  headerRecord.tags[2].i1 = "VN";
+  ss.str(std::string());
+  ss << HiLive_VERSION_MAJOR << "." << HiLive_VERSION_MINOR;
+  headerRecord.tags[2].i2 = ss.str();
+  seqan::writeHeader(samFileOut, headerRecord);
+  
+
+  /////////////////
+  // prepare sam entries
   uint64_t num_alignments = 0;
 
-  // read alignments and write to SAM
   for (uint64_t i = 0; i < num_reads; ++i) {
-    ReadAlignment ra = input.get_alignment();
+    ReadAlignment * ra = input.get_alignment();
+	
     // if either: filter file is open and all filter flags were loaded and the filter flag is > 0
     //    or: the filter file is not available
-    if ((filters.size()>0 && filters.next()) || filters.size() == 0) {
-      num_alignments += ra.seeds.size();
-      std::stringstream readname;
-	// Read name format <instrument‐name>:<run ID>:<flowcell ID>:<lane‐number>:<tile‐number>:<x‐pos>:<y‐pos>
-      //readname << "<instrument>:<run-ID>:<flowcell-ID>:" << ln << ":" << tl << ":<xpos>:<ypos>:" << i;
-      readname << "lane." << ln << "|tile." << tl << "|read." << i;
-      for (uint64_t s = 0; s < ra.seeds.size(); s++) {
-	std::stringstream aln;
-	// write the SAM compliant alignment information
-	// Read name
-	aln << readname.str() << "\t";
-	// Flag
-	aln << ra.get_SAM_flags(s) << "\t";
-	// Reference name
-	aln << index->get_name(ra.seeds[s]->gid) << "\t";
-	// Position
-	aln << ra.get_SAM_start_pos(s) << "\t";
-	// Mapping Quality
-	aln << ra.get_SAM_quality(s) << "\t";
-	// CIGAR string
-	Seed sd = *(ra.seeds[s]);
-	aln << CIGAR(sd) << "\t";
-	// RNEXT field is unavailable
-	aln << "*\t";
-	// PNEXT field is unavailable
-	aln << "0\t";
-	// TLEN field is unavailable
-	aln << "0\t";
-	// Sequence is only added on request
-	aln << "*\t";
-	// Qualities are only added on request
-	aln << "*\n";
-	samfile << aln.str();
-      }
+    // then proceed else skip
+    if (!((filters.size()>0 && filters.next()) || filters.size() == 0))
+        continue;
+
+    num_alignments += ra->seeds.size();
+
+    // Read name format <instrument‐name>:<run ID>:<flowcell ID>:<lane‐number>:<tile‐number>:<x‐pos>:<y‐pos>
+    //readname << "<instrument>:<run-ID>:<flowcell-ID>:" << ln << ":" << tl << ":<xpos>:<ypos>:" << i;
+    std::stringstream readname;
+    readname << "lane." << ln << "|tile." << tl << "|read." << i;
+
+    /////////////////
+    // set sam entries
+    seqan::BamAlignmentRecord record;
+    for (SeedVecIt it = ra->seeds.begin(); it != ra->seeds.end(); ++it) {
+        seqan::clear(record);
+
+        record.qName = readname.str();
+
+        record.rID = (*it)->gid;
+
+        record.beginPos = ra->get_SAM_start_pos(*it, *settings)-1; // seqan expects 0-based positions, but in HiLive we use 1-based
+        if (record.beginPos < 0)
+            continue;
+
+        record.cigar = (*it)->returnSeqanCigarString();
+
+        // flag and seq
+        record.flag = 0;
+        record.seq = ra->getSequenceString(*settings);
+        if ((*it)->start_pos < 0) { // if read matched reverse complementary
+            seqan::reverseComplement(record.seq);
+            record.flag |= 16;
+        }
+        if (it != ra->seeds.begin()) { // if current seed is secondary alignment
+            record.flag |= 256;
+            seqan::clear(record.seq);
+            record.qual = "*";
+        }
+
+
+        // check if cigar string sums up to read length
+        unsigned cigarElemSum = 0;
+        unsigned deletionSum = 0;
+        for (seqan::Iterator<seqan::String<seqan::CigarElement<> > >::Type elem = seqan::begin(record.cigar); elem != end(record.cigar); ++elem) {
+            if ((elem->operation == 'M') || (elem->operation == 'I') || (elem->operation == 'S') || (elem->operation == '=') || (elem->operation == 'X')) 
+                cigarElemSum += elem->count;
+            if (elem->operation == 'D')
+                deletionSum += elem->count;
+        }
+        if (cigarElemSum != settings->seqlen) {
+            std::cerr << "WARNING: Excluded an alignment of read " << record.qName << " at position " << ra->get_SAM_start_pos(*it, *settings) << " because its cigar vector had length " << cigarElemSum << std::endl;
+            continue;
+        }
+
+        // tags
+        seqan::BamTagsDict dict;
+        seqan::appendTagValue(dict, "AS", (*it)->num_matches);
+        if (settings->seqlen < settings->rlen) // if demultiplexing is on
+            seqan::appendTagValue(dict, "BC", ra->getBarcodeString(*settings));
+        seqan::appendTagValue(dict, "NM", deletionSum + settings->seqlen - (*it)->num_matches);
+        record.tags = seqan::host(dict);
+
+
+        // write record to disk
+        seqan::writeRecord(samFileOut, record);
     }
   }
-  samfile.close();
-
+  
   std::ofstream statsfile;
   statsfile.open(sam_fname+".stats");
   statsfile << "Number of reads\t" << num_reads << std::endl;
@@ -724,4 +811,3 @@ uint64_t alignments_to_sam(uint16_t ln, uint16_t tl, std::string rt, CountType r
 
   return 0;
 }
-
diff --git a/lib/alnstream.h b/lib/alnstream.h
index 5afd108..613feff 100644
--- a/lib/alnstream.h
+++ b/lib/alnstream.h
@@ -7,6 +7,9 @@
 #include "tools.h"
 #include "alnread.h"
 #include "illumina_parsers.h"
+#include <seqan/basic.h>
+#include <seqan/sequence.h>
+#include <seqan/bam_io.h>
 
 // Output alignment stream: write alignments to file one by one
 class oAlnStream {
@@ -52,7 +55,7 @@ class oAlnStream {
 
   // writes a read alignment to the output Alignment file. 
   // Buffering is handled internally
-  uint64_t write_alignment(ReadAlignment & al);
+  uint64_t write_alignment(ReadAlignment * al);
   
   // checks if the correct number of alignments was written and closes the Alignment file
   bool close();
@@ -104,7 +107,7 @@ class iAlnStream {
 
   // loads a read alignment from the input Alignment file. 
   // Buffering is handled internally
-  ReadAlignment get_alignment();
+  ReadAlignment* get_alignment();
   
   // checks if the correct number of alignments was loaded and closes the Alignment file
   bool close();
@@ -153,6 +156,8 @@ class StreamedAlignment {
  public:
   StreamedAlignment(uint16_t ln, uint16_t tl, std::string rt, CountType rl): lane(ln), tile(tl), root(rt), rlen(rl) {};  
 
+  StreamedAlignment& operator=(const StreamedAlignment& other);
+  
   // create directories required to store the alignment files (only if not stored in root)
   void create_directories(AlignmentSettings* settings);
 
@@ -161,7 +166,6 @@ class StreamedAlignment {
   
   // extend an existing alignment from cycle <cycle-1> to <cycle>
   uint64_t extend_alignment(uint16_t cycle, KixRun* index, AlignmentSettings* settings);
-
 }; /* END class StreamedAlignment */
 
 
diff --git a/lib/argument_parser.cpp b/lib/argument_parser.cpp
new file mode 100644
index 0000000..499e631
--- /dev/null
+++ b/lib/argument_parser.cpp
@@ -0,0 +1,231 @@
+#include "argument_parser.h"
+namespace po = boost::program_options;
+
+
+int parseCommandLineArguments(AlignmentSettings & settings, std::string license, int argc, char const ** argv)
+{
+    po::options_description general("General");
+    general.add_options()
+        ("help,h", "Print this help message and exit")
+        ("license", "Print licensing information and exit");
+
+    po::options_description parameters("Parameters");
+    parameters.add_options()
+        ("BC_DIR", po::value<std::string>(&settings.root)->required(), "Illumina BaseCalls directory")
+        ("INDEX", po::value<std::string>(&settings.index_fname)->required(), "Path to k-mer index")
+        ("CYCLES", po::value<CountType>(&settings.rlen)->required(), "Number of cycles")
+        ("OUTDIR", po::value<std::string>(&settings.out_dir), "Directory to store sam files in [Default: temporary or BaseCalls directory");
+
+    po::options_description io_settings("IO settings");
+    io_settings.add_options()
+        ("temp", po::value<std::string>(&settings.temp_dir)->default_value(""), "Temporary directory for the alignment files [Default: use BaseCalls directory]")
+        //("bam,B", po::bool_switch(&settings.write_bam)->default_value(false), "Create BAM files instead of SAM files [Default: false]")
+        ("keep-files,k", po::bool_switch(&settings.keep_aln_files)->default_value(false), "Keep intermediate alignment files [Default: false]")
+        ("lanes,l", po::value< std::vector<uint16_t> >()->multitoken()->composing(), "Select lane [Default: all lanes]")
+        ("tiles,t", po::value< std::vector<uint16_t> >()->multitoken()->composing(), "Select tile numbers [Default: all tiles]")
+        ("barcodes,b", po::value< std::vector<std::string> >()->multitoken()->composing(), "Enumerate barcodes (must have same length) for demultiplexing, i.e. -b AGGATC -b CCCTTT [Default: no demultiplexing]");
+
+    po::options_description alignment("Alignment settings");
+    alignment.add_options()
+        ("min-errors,e", po::value<CountType>(&settings.min_errors)->default_value(2), "Number of errors tolerated in read alignment [Default: 2]")
+        ("all-best-hit,H", po::bool_switch()->default_value(false), "Report all of the best alignments for each read")
+        ("any-best-hit", po::bool_switch(), "Report one of the best alignments for each read (default)")
+        ("all-best-n-scores,N", po::value<CountType>(&settings.best_n), "Report all alignments of the N best alignment scores for each read")
+        ("all-hits,A", po::bool_switch()->default_value(false), "Report all valid alignments for each read")
+        ("disable-ohw-filter", po::bool_switch(&settings.discard_ohw)->default_value(true), "disable the One-Hit Wonder filter [Default: false]")
+        ("start-ohw", po::value<CountType>(&settings.start_ohw)->default_value(K_HiLive+5), "First cycle to apply One-Hit Wonder filter [Default: K+5]")
+        ("window,w", po::value<DiffType>(&settings.window)->default_value(5), "Set the window size to search for alignment extension, i.e. maximum total insertion/deletion size [Default: 5]")
+        ("min-quality", po::value<CountType>(&settings.min_qual)->default_value(1), "Minimum allowed basecall quality [Default: 1]");
+
+    po::options_description technical("Technical settings");
+    technical.add_options()
+        ("block-size", po::value<uint64_t>()->default_value(64*1024*1024), "Block size for the alignment input/output stream in Bytes. Use -K or -M to specify in Kilobytes or Megabytes")
+        (",K", po::bool_switch()->default_value(false), "Interpret the block-size argument as Kilobytes instead of Bytes")
+        (",M", po::bool_switch()->default_value(false), "Interpret the block-size argument as Megabytes instead of Bytes")
+        ("compression,c", po::value<uint8_t>(&settings.compression_format)->default_value(2), "Compress alignment files. 0: no compression (default) 1: Deflate (smaller) 2: LZ4 (faster)")
+        ("num-threads,n", po::value<int>(), "Number of threads to spawn [Default: all available]");
+
+    po::options_description cmdline_options;
+    cmdline_options.add(general).add(parameters).add(io_settings).add(alignment).add(technical);
+
+    po::options_description visible_options;
+    visible_options.add(general).add(io_settings).add(alignment).add(technical);
+
+    std::stringstream help_message;
+    help_message << "HiLive v"<< HiLive_VERSION_MAJOR << "." << HiLive_VERSION_MINOR << " - Realtime Alignment of Illumina Reads" << std::endl;
+    help_message << "Copyright (c) 2015, Martin S. Lindner" << std::endl;
+    help_message << "HiLive is open-source software. Check with --license for details." << std::endl << std::endl;
+    help_message << "Fixed k-mer size: " << K_HiLive << std::endl << std::endl;
+    help_message << "Usage: " << std::string(argv[0]) << " [options] BC_DIR INDEX CYCLES OUTDIR" << std::endl;
+    help_message << "  BC_DIR       Illumina BaseCalls directory of the sequencing run to analyze" << std::endl;
+    help_message << "  INDEX        Path to k-mer index file (*.kix)" << std::endl;
+    help_message << "  CYCLES       Total number of cycles for read 1" << std::endl;
+    help_message << "  OUTDIR       Directory to store sam files in" << std::endl;
+    help_message << visible_options << std::endl;
+
+    po::positional_options_description p;
+    p.add("BC_DIR", 1);
+    p.add("INDEX", 1);
+    p.add("CYCLES", 1);
+    p.add("OUTDIR", 1);
+
+    
+    // parse the arguments
+
+    po::variables_map vm;
+    try {
+        // parse arguments
+        po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm);
+        // first check if -h or --help was called
+        if (vm.count("help")) {
+            std::cout << help_message.str();
+            return 1;
+        }
+        // first check if --license was called
+        if (vm.count("license")) {
+            std::cout << license << std::endl;
+            return 1;
+        }
+
+        // then check arguments
+        po::notify(vm);  
+    }
+    catch ( boost::program_options::required_option& e ) {
+        std::cerr << "Missing Parameter: " << e.what() << std::endl << std::endl;
+        std::cout << help_message.str();
+        return -1;  
+    }
+    catch( boost::program_options::error& e) { 
+        std::cerr << "Error while parsing command line options: " << e.what() << std::endl << std::endl; 
+        std::cout << help_message.str();
+        return -1;  
+    } 
+
+    if (vm.count("OUTDIR"))
+        settings.out_dir = vm["OUTDIR"].as<std::string>();
+    else {
+        if (settings.temp_dir == "") 
+            settings.out_dir = settings.root;
+        else
+            settings.out_dir = settings.temp_dir;
+    }
+
+    if (vm.count("lanes"))
+        settings.lanes = vm["lanes"].as< std::vector<uint16_t> >();
+    else
+        settings.lanes = all_lanes();
+
+    if (vm.count("tiles"))
+        settings.tiles = vm["tiles"].as< std::vector<uint16_t> >();
+    else
+        settings.tiles = all_tiles();
+
+    settings.barcodeVector.clear();
+    settings.seqlen = settings.rlen;
+    if (vm.count("barcodes")) {
+        settings.barcodeVector = vm["barcodes"].as< std::vector<std::string> >();
+        settings.seqlen = settings.rlen - settings.barcodeVector[0].size();
+    }
+
+    if (vm["all-hits"].as<bool>()) {
+        // all hits: disable other modes
+        settings.any_best_hit_mode = false;
+        settings.all_best_hit_mode = false;
+        settings.all_best_n_scores_mode = false;
+    } else if (vm["all-best-hit"].as<bool>()) {
+        // enable all-best-hit mode and disable others
+        settings.any_best_hit_mode = false;
+        settings.all_best_hit_mode = true;
+        settings.all_best_n_scores_mode = false;
+    } else if (vm.count("all-best-n-scores")) {
+        // enable any-best-n mode and get parameter
+        settings.any_best_hit_mode = false;
+        settings.all_best_hit_mode = false;
+        settings.all_best_n_scores_mode = true;
+        settings.best_n = vm["all-best-n-scores"].as<CountType>();
+    } else { // the default behaviour
+        // enable any-best-hit mode and disable others
+        settings.any_best_hit_mode = true;
+        settings.all_best_hit_mode = false;
+        settings.all_best_n_scores_mode = false;
+    }
+        
+    if (vm["-M"].as<bool>())
+        settings.block_size = vm["block-size"].as<uint64_t>()*1024*1024;
+    else if (vm["-K"].as<bool>())
+        settings.block_size = vm["block-size"].as<uint64_t>()*1024;
+    else
+        settings.block_size = vm["block-size"].as<uint64_t>();
+
+    if (vm.count("num-threads")) 
+        settings.num_threads = vm["num-threads"].as<int>();
+    else { // the default case, meaning as much as physically useful
+        uint32_t n_cpu = std::thread::hardware_concurrency();
+        if (n_cpu > 1)
+            settings.num_threads = n_cpu;
+        else
+            settings.num_threads = 1;
+    }
+
+
+    // check paths and file names
+    if (!file_exists(settings.index_fname)){
+        std::cerr << "Input error: Could not find k-mer index file " << settings.index_fname << std::endl;
+        return -1;
+    }
+
+    std::size_t found = settings.root.find("BaseCalls");
+    if (!(found != std::string::npos && found >= settings.root.size()-10)) {
+        std::cerr << "Warning: BaseCalls directory seems to be invalid: " << settings.root << std::endl;
+    } 
+
+    if (!is_directory(settings.root)){
+        std::cerr << "Input error: Could not find BaseCalls directory " << settings.root << std::endl;
+        return -1;
+    }
+
+    for ( uint16_t ln : settings.lanes ) {
+        std::string ln_dir = settings.root;
+        if ( ln < 10 )
+            ln_dir += "/L00";
+        else if ( ln < 100 )
+            ln_dir += "/L0";
+        else
+            ln_dir += "/L";
+        ln_dir += std::to_string(ln);
+        if (!is_directory(ln_dir)){
+            std::cerr << "Input error: Could not find location of Lane " << ln << ": " << ln_dir << std::endl;
+            return -1;
+        }
+    }
+
+
+    // Report the basic settings
+    std::cout << "Running HiLive with       " << settings.num_threads << " thread(s)." << std::endl;
+    std::cout << "BaseCalls directory:      " << settings.root << std::endl;
+    if (settings.temp_dir != "") {
+        std::cout << "Temporary directory:      " << settings.temp_dir << std::endl;
+    }
+    //if (!settings.write_bam)
+    std::cout << "SAM output directory:     " << settings.out_dir << std::endl;
+    //else
+        //std::cout << "BAM output directory:     " << settings.out_dir << std::endl;
+    std::cout << "Lanes:                    ";
+    for ( uint16_t ln : settings.lanes )
+        std::cout << ln << " ";
+    std::cout << std::endl;
+    std::cout << "K-mer index:              " << settings.index_fname << std::endl;
+    std::cout << "Read length:              " << settings.rlen << std::endl;
+    std::cout << "Mapping error:            " << settings.min_errors << std::endl;
+    if (settings.any_best_hit_mode) 
+        std::cout << "Mapping mode:             Any-Best-Hit-Mode" << std::endl;
+    else if (settings.all_best_hit_mode) 
+        std::cout << "Mapping mode:             All-Best-Hit-Mode" << std::endl;
+    else if (settings.all_best_n_scores_mode) 
+        std::cout << "Mapping mode:             All-Best-N-Scores-Mode with N=" << settings.best_n << std::endl;
+    else
+        std::cout << "Mapping mode:             All-Hits-Mode" << std::endl;
+    std::cout << std::endl;
+
+    return 0;
+}
diff --git a/lib/argument_parser.h b/lib/argument_parser.h
new file mode 100644
index 0000000..bbeca10
--- /dev/null
+++ b/lib/argument_parser.h
@@ -0,0 +1,9 @@
+#include <iostream>
+#include <boost/program_options.hpp>
+#include "headers.h"
+#include "definitions.h"
+#include "kindex.h"
+#include "alnstream.h"
+#include "parallel.h"
+
+int parseCommandLineArguments(AlignmentSettings & settings, std::string license, int argc, char const ** argv);
diff --git a/lib/config.h.in b/lib/config.h.in
index 1694736..16fee5b 100644
--- a/lib/config.h.in
+++ b/lib/config.h.in
@@ -1,5 +1,5 @@
 // the k-mer length
-#define K @HiLive_K@
+#define K_HiLive @HiLive_K@
 
 // the HiLive Version Number
 #define HiLive_VERSION_MAJOR @HiLive_VERSION_MAJOR@
diff --git a/lib/definitions.h b/lib/definitions.h
index 7080fe8..5956458 100644
--- a/lib/definitions.h
+++ b/lib/definitions.h
@@ -8,32 +8,33 @@
 // bit representation of A/C/G/T.
 #define twobit_repr(ch) ((toupper(ch)) == 'A' ? 0LL : \
                          (toupper(ch)) == 'C' ? 1LL : \
-			 (toupper(ch)) == 'G' ? 2LL : 3LL)
+                         (toupper(ch)) == 'G' ? 2LL : 3LL)
 
 // complement bit representation of A/C/G/T.
 #define twobit_comp(ch) ((toupper(ch)) == 'A' ? 3LL : \
                          (toupper(ch)) == 'C' ? 2LL : \
-			 (toupper(ch)) == 'G' ? 1LL : 0LL)
+                         (toupper(ch)) == 'G' ? 1LL : 0LL)
 
 // bit representation to character
 #define revtwobit_repr(n) ((n) == 0 ? 'A' : \
                            (n) == 1 ? 'C' : \
-			   (n) == 2 ? 'G' : 'T')
+                           (n) == 2 ? 'G' : 'T')
+
+
 
 
 // Allowed characters in sequence
-const std::string seq_chars = "ACGT";
+const std::string seq_chars = "ACGTacgt";
 
 // largest number we're going to hash into. (8 bytes/64 bits/32 nt)
 // probably 32 bit/16 nt are enough here
 typedef uint64_t HashIntoType;
 
 // construct a mask to truncate a binary representation of a k-mer to length K
-const HashIntoType MASK = HashIntoType(pow(4,K))-1;
-
+const HashIntoType MASK = HashIntoType(pow(4,K_HiLive))-1;
 
 // identifiers for genome sequences
-typedef uint16_t GenomeIdType;
+typedef uint32_t GenomeIdType;
 const GenomeIdType TRIMMED = std::numeric_limits<GenomeIdType>::max();
 
 // list of genome identifiers
@@ -42,7 +43,6 @@ typedef std::vector<GenomeIdType> GenomeIdListType;
 // list of strings
 typedef std::vector<std::string> StringListType;
 
-
 // position in a genome
 typedef int32_t PositionType;
 
@@ -59,25 +59,19 @@ struct GenomePosType {
 // size of a pair of genome ID and position
 const uint64_t GenomePos_size = sizeof(GenomeIdType) + sizeof(PositionType);
 
-// compare function to sort GenomePosType objects by position
-bool gp_compare (GenomePosType i,GenomePosType j);
-
 // vector of ID:position pairs 
 typedef std::vector<GenomePosType> GenomePosListType;
 
 // iterator on GenomePosList
 typedef GenomePosListType::iterator GenomePosListIt;
 
-
 // the k-mer index array
-const HashIntoType n_kmer = pow(4,K);
+const HashIntoType n_kmer = pow(4,K_HiLive);
 typedef std::array<GenomePosListType,n_kmer> KmerIndexType;
 
-
 // small counters
 typedef uint16_t CountType;
 
-
 // difference between k-mer position in the read and matching position in the reference
 typedef int16_t DiffType;
 
@@ -87,68 +81,102 @@ const DiffType NO_MATCH = std::numeric_limits<DiffType>::max();
 // define a trimmed position as max(DiffType)-1
 const DiffType TRIMMED_MATCH = std::numeric_limits<DiffType>::max()-1;
 
+// one element in Cigar vector containing match/mismatch information about consecutive k-mers
+struct CigarElement {
+    CountType length;
+    DiffType offset;
+    CigarElement (CountType l, DiffType o): length(l), offset(o) {};
+    CigarElement (): length(0), offset(NO_MATCH) {};
+};
+
+// CigarVector containing CIGAR string like information about the alignments
+typedef std::list<CigarElement> CigarVector;
+
+
 
 
 // all user parameters are stored in the alignment settings
 struct AlignmentSettings {
+  // HARD CODED: kmer gap structure (this is not used anywhere)
+  //std::string kmer_structure = "11111110111110111";
+  std::string kmer_structure = "111111111111111";
+
+  // HARD CODED: kmer gap positions (one-based)
+  //std::vector<unsigned> kmer_gaps = {8, 14};
+  std::vector<unsigned> kmer_gaps;
+
+  // HARD CODED: kmer span (kmer weight is K_HiLive)
+  //unsigned kmer_span = K_HiLive+2;
+  unsigned kmer_span = K_HiLive;
+
   // PARAMETER: Base Call quality cutoff, treat BC with quality < bc_cutoff as miscall
-  uint8_t min_qual = 1;
+  CountType min_qual;
 
   // PARAMETER: max. insert/deletion size
-  DiffType window = 50;
+  DiffType window;
 
   // PARAMETER: minimum number of errors allowed in alignment
-  CountType min_errors = 6;
+  CountType min_errors;
 
   // SWITCH: discard One-hit-wonders
-  bool discard_ohw = true;
+  bool discard_ohw;
 
   // PARAMETER: first cycle to discard one-hit-wonders
-  CountType start_ohw = K+5;
+  CountType start_ohw;
+
+  // SWITCH: Best-Hit-Mode
+  bool any_best_hit_mode;
 
   // SWITCH: Best-Hit-Mode
-  bool best_hit_mode = true;
+  bool all_best_hit_mode;
 
   // SWITCH: Best-N-Mode
-  bool best_n_mode = false;
+  bool all_best_n_scores_mode;
 
   // PARAMETER: Best-N-Mode::N
-  CountType best_n = 1;
-
-  // SWITCH: sort positions found in index. Saves you a few seconds when turned off, but messes everything up when index is not sorted.
-  bool sort_positions = false;
+  CountType best_n;
 
   // PARAMETER: temporary directory for the streamed alignment
-  std::string temp_dir = "";
+  std::string temp_dir;
 
-  // PARAMETER: directory for SAM file output
-  std::string sam_dir = "";
+  // SWITCH: write sam/bam output or not
+  bool write_bam=false;
 
   // SWITCH: Keep the old alignment files of previous cycles
-  bool keep_aln_files = true;
+  bool keep_aln_files;
 
   // PARAMETER: Memory block size for the input and output buffer in the streamed alignment
-  uint64_t block_size = 64*1024*1024; /* 64 MB */
+  uint64_t block_size;
 
   // PARAMETER: Compression format for alignment files
-  uint8_t compression_format = 0;
-
-  // initialize with default values
-  AlignmentSettings() : min_qual(1), 
-                        window(5),
-                        min_errors(2),
-                        discard_ohw(true), 
-                        start_ohw(K+5), 
-                        best_hit_mode(true), 
-                        best_n_mode(false),
-                        best_n(1),
-                        sort_positions(false),
-                        temp_dir(""),
-                        sam_dir(""),
-                        keep_aln_files(true),
-                        block_size(64*1024*1024),
-                        compression_format(0) {};
+  uint8_t compression_format;
+
+  // PARAMETER: list of lanes to process
+  std::vector<uint16_t> lanes;
+  
+  // PARAMETER: list of tiles to process
+  std::vector<uint16_t> tiles;
+
+  // PARAMETER: root directory of hilive run
+  std::string root;
+
+  // PARAMETER: path to the index file
+  std::string index_fname;
+
+  // PARAMETER: read length of all reads (including barcodes)
+  CountType rlen;
+
+  // PARAMETER: length of the sequence of all reads (excluding barcodes)
+  CountType seqlen;
+
+  // PARAMETER: vector containing all barcodes of the reads which should be outputted
+  std::vector<std::string> barcodeVector;
+
+  // PARAMETER: directory in which to create the output directory structure 
+  std::string out_dir;
 
+  // PARAMETER: number of threads to use
+  CountType num_threads;
 };
 
 
diff --git a/lib/headers.h b/lib/headers.h
index 1869b16..ca54a5c 100644
--- a/lib/headers.h
+++ b/lib/headers.h
@@ -1,5 +1,6 @@
 #include "config.h"
 #include <iostream>
+#include <sstream>
 #include <fstream>
 #include <zlib.h>
 #include <lz4.h>
@@ -21,4 +22,5 @@
 #include <mutex>
 #include <chrono>
 #include <stdexcept>
+#include <utility>
 #include <boost/filesystem.hpp>
diff --git a/lib/kindex.cpp b/lib/kindex.cpp
index b03d73f..77d90b7 100644
--- a/lib/kindex.cpp
+++ b/lib/kindex.cpp
@@ -1,99 +1,62 @@
 #include "kindex.h"
 
 
-
-int KixBuild::add_fasta(const std::string &fname) {
+int KixBuild::add_fasta(const std::string &fname, AlignmentSettings & settings, bool convert_spaces, bool trim_ids) {
   GenomeIdListType added_ids;
-  return add_fasta(fname, added_ids);
+  return add_fasta(fname, added_ids, settings, convert_spaces, trim_ids);
 }
 
-int KixBuild::add_fasta(const std::string &fname, GenomeIdListType &ids) {
-  //std::cout << "Sequence " << num_seq << ".  Reading file " << fname << std::endl;
+int KixBuild::add_fasta(const std::string &fname, GenomeIdListType &ids, AlignmentSettings & settings, bool convert_spaces, bool trim_ids) {
   std::ios::sync_with_stdio(false);
   std::ifstream::sync_with_stdio(false);
 
   std::ifstream infile (fname.c_str());
   assert(infile.is_open());
 
-  std::string line, seq;
-  HashIntoType fw; // forward k-mer
-  PositionType pos = 1; // use 1-based positions (to allow for negative positions)
+  std::string line;
   GenomeIdType seq_id = 0;
   std::string  seq_name;
   GenomeIdListType added_ids;
-  unsigned long int last_invalid = K+1; // number of characters since the last invalid character (e.g. N)
+  PositionType sequencePosition;
+  bool startNewSequence = false;
+  std::string tailingKmer;
 
   while(getline(infile, line)) {
     if (line.length() == 0) {continue;};
+    
+    if (line[line.length()-1] == '\r'){
+      // check for Windows newline characters (just in case the fasta comes from Windows --> thanks Simon)
+      line.erase(line.length()-1);
+    }
 
     if (line[0] == '>') { // header line
       // initialize a new sequence
-      pos = 1;
-      fw = 0;
       num_seq += 1;
       seq_id = num_seq - 1;
-      last_invalid = K+1;
-      if (line.find(" ") != std::string::npos) {
-	seq_name = line.substr(0,line.find(" "));
-      } else {
-	seq_name = line;
-      }
+
+      if (convert_spaces)
+        std::replace( line.begin(), line.end(), ' ', '_');
+      if (trim_ids)
+        seq_name = line.substr(1,line.find(' ')-1);
+      else
+        seq_name = line.substr(1,line.length()-1);
+
       added_ids.push_back(seq_id);
       seq_names.push_back(seq_name);
-      seq_lengths.push_back(0);
+      seq_lengths.push_back(0); // gets later corrected to settings.kmer_span - 1
       assert(seq_names.size() == num_seq);
+      startNewSequence = true;
     } 
     else { // sequence line
-      const char * lstr = line.c_str(); 
-      if (pos==1) { // beginning of a sequence
-	last_invalid = hash_fw(lstr, fw); // hash the first k-mer
-	seq_lengths.back()+=1;
-
-	if (last_invalid > K) {
-	  add_kmer(fw,seq_id,pos);
-	}
-
-	for (unsigned int i = K; i < line.length(); i++) {
-	  pos = i - K + 1 + 1; // use 1-based positions (to allow for negative positions)
-	  seq_lengths.back()+=1;
-
-	  if ( seq_chars.find(lstr[i]) == std::string::npos ) {
-	    last_invalid = 1;
-	    continue;
-	  } else {
-	    last_invalid += 1;
-	  }
-
-	  update_kmer(fw, twobit_repr(lstr[i]));
-	
-	  // add k-mer to database
-	  if (last_invalid > K) {
-	    add_kmer(fw,seq_id,pos);
-	  }
-	}
-      }
-      else { // continue a sequence
-	for (unsigned int i = 0; i < line.length(); i++) {
-	  pos++;
-	  seq_lengths.back()+=1;
-	
-	  if ( seq_chars.find(lstr[i]) == std::string::npos ) {
-	    last_invalid = 1;
-	    continue;
-	  } else {
-	    last_invalid += 1;
-	  }
-
-	  update_kmer(fw, twobit_repr(lstr[i]));
-
-	  // add k-mer to database
-	  if (last_invalid > K) {
-	    add_kmer(fw,seq_id,pos);
-	  }
-	}
+      if (startNewSequence) {
+        if (line.length() < settings.kmer_span)
+            continue; // ignore sequences shorter than K
+        start_sequence(line, tailingKmer, sequencePosition, settings);
+        startNewSequence = false;
       }
+      else
+        continue_sequence(line, tailingKmer, sequencePosition, settings);
     }
-    
   }
   infile.close();
   ids = added_ids;
@@ -101,52 +64,69 @@ int KixBuild::add_fasta(const std::string &fname, GenomeIdListType &ids) {
 }
 
 
+/* Start adding all k-mers in a sequence string s to the database.
+    A new ID is created for this sequence.
+    Return: sequence ID
+*/
+GenomeIdType KixBuild::start_sequence(const std::string &s, std::string& tailingKmer, PositionType& sequencePosition, AlignmentSettings & settings) {
+  assert(seq_names.size() == num_seq);
+  assert(s.length() >= settings.kmer_span);
 
-GenomeIdType KixBuild::add_sequence(const std::string &s) {
-  /* Add all k-mers in a sequence string s to the database.
-     A new ID is created for this sequence.
-     Return: sequence ID
-   */
+  // add sequence kmers to index
+  sequencePosition = 0; // use 1-based positions (to allow for negative positions)
+  std::string::const_iterator it_s = s.begin();
+  std::string::const_iterator last_invalid;
+  HashIntoType fw; // forward k-mer
 
-  assert(seq_names.size() == num_seq);
+  seq_lengths.back()=settings.kmer_span-1;
+  for (; it_s < s.end()-settings.kmer_span+1; ++it_s) {
+    ++sequencePosition; // use 1-based positions (to allow for negative positions)
+    seq_lengths.back()+=1;
+    last_invalid = hash_fw(it_s, s.end(), fw, settings);
 
-  // increase the sequence counter
-  num_seq += 1; 
+    // add k-mer to database
+    if (last_invalid < it_s)
+      add_kmer(fw,num_seq-1,sequencePosition);
+    else {
+      unsigned jumplength = std::min(last_invalid - it_s, s.end() - settings.kmer_span - it_s);
+      sequencePosition += jumplength;
+      seq_lengths.back() += jumplength;
+      it_s = last_invalid;
+    }
+  }
+  tailingKmer = s.substr(s.length()-settings.kmer_span);
+  return num_seq;
+}
 
-  // add a default name for the sequence
-  seq_names.push_back(std::string("Sequence_")+std::to_string(num_seq));
-  seq_lengths.push_back(0);
 
-  PositionType pos = 1; // use 1-based positions (to allow for negative positions)
-  const char * sp = s.c_str(); 
-  unsigned int length = s.length();
+/* Continue adding all k-mers in a sequence string s to the database.
+    Return: sequence ID
+*/
+GenomeIdType KixBuild::continue_sequence(const std::string &s, std::string& tailingKmer, PositionType& sequencePosition, AlignmentSettings & settings) {
+  assert(seq_names.size() == num_seq);
 
-  // the current k-mers
+  std::string concatString = tailingKmer + s;
+  // add sequence kmers to index
+  std::string::const_iterator it_s = concatString.begin() + 1;
+  std::string::const_iterator last_invalid;
   HashIntoType fw; // forward k-mer
-  HashIntoType last_invalid = hash_fw(sp, fw); // hash the first k-mer
-  seq_lengths.back()+=1;
-  if (last_invalid > K) {
-    add_kmer(fw,num_seq-1,pos);
-  }
 
-  for (unsigned int i = K; i < length; i++) {
-    pos = i - K + 1 + 1; // use 1-based positions (to allow for negative positions)
+  for (; it_s < concatString.end()-settings.kmer_span+1; ++it_s) {
+    ++sequencePosition; // use 1-based positions (to allow for negative positions)
     seq_lengths.back()+=1;
-    if ( seq_chars.find(sp[i]) == std::string::npos ) {
-      last_invalid = 1;
-      continue;
-    } else {
-      last_invalid += 1;
-    }
-
-    update_kmer(fw, twobit_repr(sp[i]));
+    last_invalid = hash_fw(it_s, concatString.end(), fw, settings);
 
     // add k-mer to database
-    if (last_invalid > K) {
-      add_kmer(fw,num_seq-1,pos);
+    if (last_invalid < it_s)
+      add_kmer(fw,num_seq-1,sequencePosition);
+    else {
+      unsigned jumplength = std::min(last_invalid - it_s, concatString.end()- settings.kmer_span - it_s);
+      sequencePosition += jumplength;
+      seq_lengths.back() += jumplength;
+      it_s = last_invalid;
     }
   }
-  
+  tailingKmer = concatString.substr(concatString.length()-settings.kmer_span);
   return num_seq;
 }
 
@@ -161,43 +141,34 @@ int KixBuild::add_kmer(HashIntoType kmer, GenomeIdType id, PositionType pos) {
   return 1;
 }
 
+
 /* Trim the k-mer index: removes all k-mers with more than
    max_count occurrences in the reference genomes. Trimmed
    k-mers are marked by the GenomeIdType TRIMMED (from
    definitions.h). */
 uint64_t KixBuild::trim(uint64_t max_count) {
-
   uint64_t trimmed = 0;
   GenomePosType gp_trimmed (TRIMMED,0);
 
-  for (auto it = db.begin(); it != db.end(); ++it) {
+  for (auto it = db.begin(); it != db.end(); ++it)
     if ((*it).size() > max_count) {
       trimmed += (*it).size();
       (*it).clear();
       (*it).push_back(gp_trimmed);
     }
-  }
-
   return trimmed;
 }
 
 
-
-std::string KixBuild::get_name(GenomeIdType id) {
-  return seq_names[id];
-}
-
-
 std::vector<char> KixBuild::serialize() {
   // first of all, sort the database entries by position
-  for (auto it = db.begin(); it != db.end(); ++it) {
+  for (auto it = db.begin(); it != db.end(); ++it)
     std::sort(it->begin(), it->end(), gp_compare);
-  }
 
   // calculate total size
   unsigned long int total_size = 0;
 
-  // K itself
+  // K_HiLive itself
   total_size += 1;
 
   // total number of sequences in database
@@ -229,7 +200,7 @@ std::vector<char> KixBuild::serialize() {
   char* d = data.data();
 
   // write K
-  uint8_t kk = K;
+  uint8_t kk = K_HiLive;
   memcpy(d,&kk,1);
   d++;
 
@@ -256,14 +227,14 @@ std::vector<char> KixBuild::serialize() {
   // database entries
   for (auto it = db.begin(); it != db.end(); ++it) {
     // number of positions
-    uint32_t num_positions = (*it).size();
+    uint32_t num_positions = (*it).size(); //db is an array of GenomePosType structs
     memcpy(d,&num_positions,sizeof(uint32_t));
     d += sizeof(uint32_t);
 
     // genome ID and position
-    for( uint32_t i = 0; i < num_positions; i++) {
-      GenomeIdType gid = (*it)[i].gid;
-      PositionType pos = (*it)[i].pos;
+    for(GenomePosListIt entry=(*it).begin(); entry!=(*it).end(); ++entry) {
+      GenomeIdType gid = (*entry).gid;
+      PositionType pos = (*entry).pos;
 
       memcpy(d,&gid,sizeof(GenomeIdType));
       d += sizeof(GenomeIdType);
@@ -313,7 +284,7 @@ uint64_t KixBuild::deserialize(char* d) {
   uint8_t kk;
   memcpy(&kk,d+bytes,1);
   bytes++;
-  assert(K == kk);
+  assert(K_HiLive == kk);
 
   // read total number of sequences in database
   memcpy(&num_seq,d+bytes,sizeof(GenomeIdType));
@@ -328,10 +299,11 @@ uint64_t KixBuild::deserialize(char* d) {
     memcpy(&nm_length,d+bytes,sizeof(uint16_t));
     bytes += sizeof(uint16_t);
 
-    char tmp[nm_length+1];
+    char * tmp = new char[nm_length+1];
     memcpy(tmp,d+bytes,nm_length);
     tmp[nm_length] = 0; // make the string null-terminated
     seq_names.push_back(tmp);
+    delete tmp;
     bytes += nm_length;
   }
   for (uint32_t i = 0; i < num_seq; i++) {
@@ -410,18 +382,6 @@ uint64_t KixBuild::deserialize_file(std::string f) {
 }
 
 
-
-
-
-
-bool gp_compare (GenomePosType i,GenomePosType j) { 
-  return (i.pos < j.pos); 
-}
-
-
-
-
-
 uint64_t KixRun::deserialize(char* d) {
   // the total number of bytes read
   uint64_t bytes = 0; 
@@ -430,7 +390,7 @@ uint64_t KixRun::deserialize(char* d) {
   uint8_t kk;
   memcpy(&kk,d+bytes,1);
   bytes++;
-  assert(kk == K);
+  assert(kk == K_HiLive);
 
   // read total number of sequences in database
   memcpy(&num_seq,d+bytes,sizeof(GenomeIdType));
@@ -444,10 +404,11 @@ uint64_t KixRun::deserialize(char* d) {
     memcpy(&nm_length,d+bytes,sizeof(uint16_t));
     bytes += sizeof(uint16_t);
 
-    char tmp[nm_length+1];
+    char * tmp = new char[nm_length+1];
     memcpy(tmp,d+bytes,nm_length);
     tmp[nm_length] = 0; // make the string null-terminated
     seq_names.push_back(tmp);
+    delete tmp;
     bytes += nm_length;
   }
 
@@ -479,62 +440,29 @@ uint64_t KixRun::deserialize(char* d) {
 }
 
 
-
 uint64_t KixRun::deserialize_file(std::string f) {
   std::string fname = f;
   
   sdata = read_binary_file(f);
-  /*
-  // obtain file size
-  unsigned long int size = get_filesize(fname);
-
-  // open binary file
-  FILE* ifile;
-  ifile = fopen(fname.c_str(), "rb");
-
-  if (!ifile) {
-    std::cerr << "Error reading from file " << fname << ": Could not open file." << std::endl;
-    return 0;
-  }
-
-  // allocate memory
-  sdata.resize(size,0);
-  
-  // read all data
-  unsigned long int read = fread(sdata.data(), 1, size, ifile);
-
-  // close file
-  fclose (ifile);
-
-  if (read != size){
-    std::cerr << "Error reading from file " << fname << ": File size: " << size << " bytes. Read: " << read << " bytes." << std::endl;
-    return 0;
-  }
-  */
-  // deserialize data
   deserialize(sdata.data());
 
   return sdata.size();
 }
 
 
-
-std::string KixRun::get_name(GenomeIdType id) {
-  return seq_names[id];
-}
-
-
 /* Retrieve all occurrences (fwd & rc) of kmer in the reference from the index */
-GenomePosListType KixRun::retrieve_positions(HashIntoType kmer) {
+GenomePosListType KixRun::retrieve_positions(std::string kmerSpan, AlignmentSettings & settings) {
 
   // get the reverse complement of the kmer
-  HashIntoType kmer_rc = rc(kmer);
+  HashIntoType fwHashValue;
+  HashIntoType rcHashValue;
+  hash(kmerSpan.c_str(), fwHashValue, rcHashValue, settings);
   
   // obtain the list of positions for each k-mer
-  char* fwd_begin = db[kmer];
+  char* fwd_begin = db[fwHashValue];
   uint32_t fwd_len;
   memcpy(&fwd_len,fwd_begin,sizeof(uint32_t));
-  char* rev_begin = db[kmer_rc];
+  char* rev_begin = db[rcHashValue];
   uint32_t rev_len;
   memcpy(&rev_len,rev_begin,sizeof(uint32_t));
 
@@ -571,25 +499,3 @@ GenomePosListType KixRun::retrieve_positions(HashIntoType kmer) {
 
   return pos;
 }
-
-
-// generate a SAM compliant header string
-std::string KixRun::get_SAM_header() {
-  std::stringstream s;
-  
-  // header
-  s << "@HD\tVN:1.5\tSO:unsorted" << std::endl;
-
-  // sequence information
-  assert(seq_names.size() == seq_lengths.size());
-  for ( uint64_t i = 0; i < seq_names.size(); i++  ) {
-    s << "@SQ\tSN:" << seq_names[i] << "\tLN:" << seq_lengths[i] << std::endl;
-  }
-
-  // program information
-  s << "@PG\tHiLive" << std::endl;
-
-
-  return s.str();
-}
-
diff --git a/lib/kindex.h b/lib/kindex.h
index ab56d04..5b238fa 100644
--- a/lib/kindex.h
+++ b/lib/kindex.h
@@ -19,21 +19,16 @@ class KixBuild {
  public:
   
   // add k-mers of all sequences in FASTA file
-  int add_fasta(const std::string &fname, GenomeIdListType &ids);
-  int add_fasta(const std::string &fname);
+  int add_fasta(const std::string &fname, GenomeIdListType &ids, AlignmentSettings & settings, bool convert_spaces, bool trim_ids);
+  int add_fasta(const std::string &fname, AlignmentSettings & settings, bool convert_spaces, bool trim_ids);
   
   // add all k-mers in a string sequence to the database
-  GenomeIdType add_sequence(const std::string &s);
+  GenomeIdType start_sequence(const std::string &s, std::string& tailingKmer, PositionType& sequencePosition, AlignmentSettings & settings);
+  GenomeIdType continue_sequence(const std::string &s, std::string& tailingKmer, PositionType& sequencePosition, AlignmentSettings & settings);
 
   // trim the database: remove kmers with more than max_count occurrences
   uint64_t trim(uint64_t max_count);
 
-  // set the name of a sequence
-  int set_name(GenomeIdType id, const std::string &name);
-
-  // get the name of a sequence
-  std::string get_name(GenomeIdType id);
-
   // serialize the KixBuild
   std::vector<char> serialize();
   
@@ -52,7 +47,7 @@ class KixBuild {
   StringListType seq_names; // names of the sequences in the database
   std::vector<uint32_t> seq_lengths; // lengths of the sequences in the database
 
-};  // END class KIindex
+};  // END class KixBuild
 
 
 
@@ -67,12 +62,9 @@ class KixRun {
  public:
   // pointer to the matching positions for a k-mer
   char* kmer(HashIntoType kmer);
-
-  // get the name of a sequence
-  std::string get_name(GenomeIdType id);
   
   // retrieve all fwd and rc occurrences of kmer in the index
-  GenomePosListType retrieve_positions(HashIntoType kmer);
+  GenomePosListType retrieve_positions(std::string kmerSpan, AlignmentSettings & settings);
 
   // deserialize Kix
   uint64_t deserialize(char* d);
@@ -80,9 +72,6 @@ class KixRun {
   // load and deserialize Kix from file
   uint64_t deserialize_file(std::string f);
 
-  // generate a SAM compliant header string
-  std::string get_SAM_header();
-
   // Database content
   GenomeIdType num_seq; // total number of sequences in the database
   StringListType seq_names; // names of the sequences in the database
@@ -92,12 +81,4 @@ class KixRun {
 };  // END class KixRun
 
 
-
-
-
-
-
-
-
-
 #endif /* KINDEX_H */
diff --git a/lib/tools.cpp b/lib/tools.cpp
index 63efe7d..34a1e72 100644
--- a/lib/tools.cpp
+++ b/lib/tools.cpp
@@ -1,5 +1,10 @@
 #include "tools.h"
 
+// compares two genome positions by position (not by genome id)
+bool gp_compare (GenomePosType i,GenomePosType j) { 
+  return (i.pos < j.pos); 
+}
+
 // reads a binary file from hdd and stores its raw content in a char vector
 std::vector<char> read_binary_file(const std::string &fname) {
   
@@ -108,24 +113,6 @@ uint32_t num_reads_from_bcl(std::string bcl) {
   return num_reads;
 }
 
-
-
-// Update an existing kmer by left-shifting all nucleotides and appending new nucleotide
-void update_kmer(HashIntoType &kmer, HashIntoType nuc) {
-  
-  // left-shift the previous k-mer
-  kmer = kmer << 2;
-  
-  // 'or' in the current nucleotide
-  // only use the last 2 bits
-  kmer |= (nuc & 3);
-  
-  // mask off the 2 bits we shifted over.
-  kmer &= MASK;
-  
-}
-
-
 /* get the size (in bytes) of a file */
 std::ifstream::pos_type get_filesize(const std::string &fname)
 {
@@ -136,21 +123,23 @@ std::ifstream::pos_type get_filesize(const std::string &fname)
 
 /* calculates the first forward and reverse complement k-mer in the 
    string <kmer> and returns the canonical representation. */
-HashIntoType hash(const char * kmer, HashIntoType& _h, HashIntoType& _r)
+HashIntoType hash(const char * kmer, HashIntoType& _h, HashIntoType& _r, AlignmentSettings & settings)
 {
-  assert(strlen(kmer) >= K);
+  assert(strlen(kmer) >= settings.kmer_span);
 
   HashIntoType h = 0, r = 0;
 
   h |= twobit_repr(kmer[0]);
-  r |= twobit_comp(kmer[K-1]);
-
-  for (unsigned int i = 1, j = K - 2; i < K; i++, j--) {
-    h = h << 2;
-    r = r << 2;
-
-    h |= twobit_repr(kmer[i]);
-    r |= twobit_comp(kmer[j]);
+  r |= twobit_comp(kmer[settings.kmer_span-1]);
+
+  for (unsigned int i = 1, j = settings.kmer_span-2; i < settings.kmer_span; i++, j--) {
+    // if i not gap position
+    if (std::find(settings.kmer_gaps.begin(), settings.kmer_gaps.end(), i+1) == settings.kmer_gaps.end()) {
+      h = h << 2;
+      h |= twobit_repr(kmer[i]);
+      r = r << 2;
+      r |= twobit_comp(kmer[j]);
+    }
   }
 
   _h = h;
@@ -160,47 +149,44 @@ HashIntoType hash(const char * kmer, HashIntoType& _h, HashIntoType& _r)
 }
 
 /* calculates the first forward k-mer in the string <kmer> */
-HashIntoType hash_fw(const char * kmer, HashIntoType& _h)
+std::string::const_iterator hash_fw(std::string::const_iterator it, std::string::const_iterator end, HashIntoType& _h, AlignmentSettings & settings)
 {
-  assert(strlen(kmer) >= K);
-
+  assert(it+settings.kmer_span-1 < end);
   HashIntoType h = 0;
-  HashIntoType last_invalid = K+1;
+  std::string::const_iterator last_invalid = it-1;
 
-  h |= twobit_repr(kmer[0]);
+  h |= twobit_repr(*it);
 
-  for (unsigned int i = 1; i < K; i++) {
+  std::string::const_iterator kmerEnd = it+settings.kmer_span;
+  ++it;
+  int positionInKmer = 2;
+  for (; it != kmerEnd; ++it, ++positionInKmer) {
+    if (std::find(settings.kmer_gaps.begin(), settings.kmer_gaps.end(), positionInKmer) != settings.kmer_gaps.end())
+        continue;
     h = h << 2;
-    h |= twobit_repr(kmer[i]);
-    if ( seq_chars.find(kmer[i]) == std::string::npos ) {
-      last_invalid = K+1-i;
+    h |= twobit_repr(*it);
+    if ( seq_chars.find(*it) == std::string::npos ) {
+      last_invalid = it+settings.kmer_span-1;
     }
   }
 
   _h = h;
-
   return last_invalid;
 }
 
 
-/* returns the reverse complement of a k-mer */
-HashIntoType rc(HashIntoType fw)
+/* returns the sequence of a k-mer */
+std::string unhash(HashIntoType myHash, unsigned hashLen)
 {
+	std::string kmer = "";
 
-  HashIntoType rc = 0;
-
-  // Illumina uses
-  // A = 0b00
-  // C = 0b01
-  // G = 0b10
-  // T = 0b11
-  // so, inverting bits produces the rc: rc(A) = ~A
-
-  for (unsigned int i = 0; i < 2*K; i+=2) {
-    rc |= (~(fw >> i) & 3) << (2*K - i - 2);
-  }
-
-  return rc;
+	unsigned mask = 3;
+	for (unsigned i = 1; i<pow(2,2*hashLen); i *= 4) {
+		kmer.push_back(revtwobit_repr(myHash & mask));
+		myHash = myHash >> 2;
+	}
+	std::reverse(kmer.begin(), kmer.end());
+	return kmer;
 }
 
 
@@ -223,16 +209,22 @@ std::string alignment_name(std::string rt, uint16_t ln, uint16_t tl, uint16_t cl
 }
 
 // construct tile-wise SAM file name from: root, lane, tile
-std::string sam_tile_name(std::string rt, uint16_t ln, uint16_t tl) {
+std::string sam_tile_name(std::string rt, uint16_t ln, uint16_t tl, bool write_bam) {
   std::ostringstream path_stream;
-  path_stream << rt << "/L00" << ln << "/s_"<< ln << "_" << tl << ".sam";
+  if (write_bam)
+    path_stream << rt << "/L00" << ln << "/s_"<< ln << "_" << tl << ".bam";
+  else
+    path_stream << rt << "/L00" << ln << "/s_"<< ln << "_" << tl << ".sam";
   return path_stream.str();
 }
 
 // construct lane-wise SAM file name from: root, lane
-std::string sam_lane_name(std::string rt, uint16_t ln) {
+std::string sam_lane_name(std::string rt, uint16_t ln, bool write_bam) {
   std::ostringstream path_stream;
-  path_stream << rt << "/L00" << ln << "/s_"<< ln << ".sam";
+  if (write_bam)
+    path_stream << rt << "/L00" << ln << "/s_"<< ln << ".bam";
+  else
+    path_stream << rt << "/L00" << ln << "/s_"<< ln << ".sam";
   return path_stream.str();
 }
 
@@ -249,5 +241,3 @@ std::string position_name(std::string rt, uint16_t ln, uint16_t tl) {
   path_stream << rt << "../L00" << ln << "/s_"<< ln << "_" << tl << ".clocs";
   return path_stream.str();
 }
-
-
diff --git a/lib/tools.h b/lib/tools.h
index 667d0b6..b8060c8 100644
--- a/lib/tools.h
+++ b/lib/tools.h
@@ -5,6 +5,10 @@
 #include "definitions.h"
 #include "kindex.h"
 
+
+// compare function to sort GenomePosType objects by position
+bool gp_compare (GenomePosType i,GenomePosType j);
+
 // calculates the total size of a file in bytes
 std::ifstream::pos_type get_filesize(const std::string &fname);
 
@@ -24,23 +28,19 @@ uint32_t num_reads_from_bcl(std::string bcl);
 uint64_t write_binary_file(const std::string &fname, const std::vector<char> & data);
 
 //------  Hashing helper functions  ---------------------------------//
-HashIntoType hash(const char * kmer, HashIntoType& _h, HashIntoType& _r);
-HashIntoType hash_fw(const char * kmer, HashIntoType& _h);
-HashIntoType rc(HashIntoType fw); 
-
-// Update an existing kmer by left-shifting all nucleotides and appending new nucleotide
-void update_kmer(HashIntoType &kmer, HashIntoType nuc);
+HashIntoType hash(const char * kmer, HashIntoType& _h, HashIntoType& _r, AlignmentSettings & settings);
+std::string::const_iterator hash_fw(std::string::const_iterator it, std::string::const_iterator end, HashIntoType& _h, AlignmentSettings & settings);
+//HashIntoType rc(HashIntoType fw); 
+/* returns the sequence of a k-mer */
+std::string unhash(HashIntoType myHash, unsigned hashLen=K_HiLive);
 
 // file name construction functions
 std::string bcl_name(std::string rt, uint16_t ln, uint16_t tl, uint16_t cl);
 std::string alignment_name(std::string rt, uint16_t ln, uint16_t tl, uint16_t cl);
 std::string filter_name(std::string rt, uint16_t ln, uint16_t tl);
 std::string position_name(std::string rt, uint16_t ln, uint16_t tl);
-std::string sam_tile_name(std::string rt, uint16_t ln, uint16_t tl);
-std::string sam_lane_name(std::string rt, uint16_t ln);
-
-
-
+std::string sam_tile_name(std::string rt, uint16_t ln, uint16_t tl, bool write_bam);
+std::string sam_lane_name(std::string rt, uint16_t ln, bool write_bam);
 
 
 #endif /* TOOLS_H */
diff --git a/tools/.ann.location b/tools/.ann.location
deleted file mode 100644
index 573541a..0000000
--- a/tools/.ann.location
+++ /dev/null
@@ -1 +0,0 @@
-0
diff --git a/tools/build_index.cpp b/tools/build_index.cpp
index fc6307d..090d886 100644
--- a/tools/build_index.cpp
+++ b/tools/build_index.cpp
@@ -7,7 +7,7 @@
 namespace po = boost::program_options;
 
 std::string license =
-"Copyright (c) 2015, Martin S. Lindner, marzin at mail-lindner.de\n"
+"Copyright (c) 2015-2016, Martin S. Lindner and the HiLive contributors. See CONTRIBUTORS for more info.\n"
 "All rights reserved.\n"
 "\n"
 "Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\n"
@@ -24,6 +24,11 @@ std::string license =
 
 
 int main(int argc, char* argv[]) {
+
+  unsigned trim;
+  bool do_not_convert_spaces;
+  bool trim_ids;
+
   // setting up the command line interface
   po::options_description general("General");
   general.add_options()
@@ -37,7 +42,9 @@ int main(int argc, char* argv[]) {
   po::options_description options("Options");
   options.add_options()
     ("outfile,o", po::value<std::string>(), "Set output file name [Default: INPUT.kix]")
-    ("trim,t", po::value<uint32_t>(), "Ignore k-mers with more than t occurrences. [Default: no limit]");
+    ("trim,t", po::value<unsigned>(&trim)->default_value(0), "Ignore k-mers with more than t occurrences. [Default: no limit]")
+    ("do-not-convert-spaces", po::bool_switch(&do_not_convert_spaces)->default_value(false), "Do not convert all spaces in reference ids to underscores [Default: converting is on]")
+    ("trim-after-space", po::bool_switch(&trim_ids)->default_value(false), "Trim all reference ids after first space [Default: false]");
 
   po::options_description cmdline_options;
   cmdline_options.add(general).add(parameters).add(options);
@@ -50,7 +57,7 @@ int main(int argc, char* argv[]) {
   help_message << "Index creation tool for HiLive - Realtime Alignment of Illumina Reads" << std::endl;
   help_message << "Copyright (c) 2015, Martin S. Lindner" << std::endl;
   help_message << "HiLive is open-source software. Check with --license for details." << std::endl << std::endl;
-  help_message << "Fixed k-mer size: " << K << std::endl << std::endl;
+  help_message << "Fixed k-mer size: " << K_HiLive << std::endl << std::endl;
   help_message << "Usage: " << std::string(argv[0]) << " [options] INPUT" << std::endl;
   help_message << "  INPUT       Reference genomes in (multi-) FASTA format" << std::endl;
 
@@ -101,16 +108,10 @@ int main(int argc, char* argv[]) {
     index_name = fasta_name + std::string(".kix");    
   }
 
-  // get k-mer trimming
-  int trim = 0;
-  if (vm.count("trim")) {
-    trim = vm["trim"].as<uint32_t>();
-  }
-
-
-  std::cout << "Creating index with K=" << K << " from file " << fasta_name << std::endl; 
+  std::cout << "Creating index with K_HiLive=" << K_HiLive << " from file " << fasta_name << std::endl; 
   KixBuild* index = new KixBuild();
-  index->add_fasta(fasta_name);
+  AlignmentSettings settings; // for hard coded gapped kmer structure
+  index->add_fasta(fasta_name, settings, !do_not_convert_spaces, trim_ids);
 
   if (trim > 0) {
     uint64_t trimmed = index->trim(trim);
diff --git a/tools/hilive.cpp b/tools/hilive.cpp
index d5ecae4..feb7349 100644
--- a/tools/hilive.cpp
+++ b/tools/hilive.cpp
@@ -1,17 +1,12 @@
-#include <iostream>
-#include <boost/program_options.hpp>
-
 #include "../lib/headers.h"
 #include "../lib/definitions.h"
 #include "../lib/kindex.h"
 #include "../lib/alnstream.h"
 #include "../lib/parallel.h"
-
-
-namespace po = boost::program_options;
+#include "../lib/argument_parser.h"
 
 std::string license =
-"Copyright (c) 2015, Martin S. Lindner, marzin at mail-lindner.de\n"
+"Copyright (c) 2015-2016, Martin S. Lindner and the HiLive contributors. See CONTRIBUTORS for more info.\n"
 "All rights reserved.\n"
 "\n"
 "Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\n"
@@ -30,35 +25,36 @@ std::string license =
 // the worker function for the threads
 void worker (TaskQueue & tasks, TaskQueue & finished, TaskQueue & failed, AlignmentSettings* settings, KixRun* idx, bool & surrender ) {
 
-  // loop that keeps on running until the surrender flag is set
-  while ( !surrender ) {
-    // try to obtain a new task
-    Task t = tasks.pop();
-    if ( t != NO_TASK ) {
-      // Execute the task
-      bool success = true;
-      try {
-	StreamedAlignment s (t.lane, t.tile, t.root, t.rlen);
-	uint64_t num_seeds = s.extend_alignment(t.cycle,idx,settings);
-	std::cout << "Task [" << t << "]: Found " << num_seeds << " seeds." << std::endl;
-      }
-      catch (const std::exception &e) {
-	std::cerr << "Failed to finish task [" << t << "]: " << e.what() << std::endl;
-	success = false;
-      }
-      if (success) {
-	finished.push(t);
-      }
-      else {
-	failed.push(t);
-      }
-
-    }
-    else {
-      // send this thread to sleep for a second
-      std::this_thread::sleep_for (std::chrono::seconds(1));
-    }
-  }  
+    // loop that keeps on running until the surrender flag is set
+    while ( !surrender ) {
+        // try to obtain a new task
+        Task t = tasks.pop();
+        if ( t != NO_TASK ) {
+            // Execute the task
+            bool success = true;
+            try {
+                StreamedAlignment s (t.lane, t.tile, t.root, t.rlen);
+                uint64_t num_seeds;
+                num_seeds = s.extend_alignment(t.cycle,idx,settings);
+                std::cout << "Task [" << t << "]: Found " << num_seeds << " seeds." << std::endl;
+            }
+            catch (const std::exception &e) {
+                std::cerr << "Failed to finish task [" << t << "]: " << e.what() << std::endl;
+                success = false;
+            }
+            if (success) {
+                finished.push(t);
+            }
+            else {
+                failed.push(t);
+            }
+
+        }
+        else {
+            // send this thread to sleep for a second
+            std::this_thread::sleep_for (std::chrono::milliseconds(100));
+        }
+    }  
 
 }
 
@@ -66,412 +62,162 @@ void worker (TaskQueue & tasks, TaskQueue & finished, TaskQueue & failed, Alignm
 // create a special SAM worker, that writes out a SAM file for a tile
 void sam_worker (TaskQueue & tasks, AlignmentSettings* settings, KixRun* idx) {
 
-  // loop that keeps on running until the surrender flag is set
-  while ( true ) {
-    // try to obtain a new task
-    Task t = tasks.pop();
-    if ( t != NO_TASK ) {
-      // Execute the task
-      alignments_to_sam(t.lane,t.tile,t.root,t.rlen, idx,settings);
-    }
-    else {
-      return;
-    }
-  }  
+    // loop that keeps on running until the surrender flag is set
+    while ( true ) {
+        // try to obtain a new task
+        Task t = tasks.pop();
+        if ( t != NO_TASK ) {
+            // Execute the task
+            alignments_to_sam(t.lane,t.tile,t.root,t.rlen, idx,settings);
+        }
+        else {
+            return;
+        }
+    }  
 
 }
 
 
 
-int main(int argc, char* argv[]) {
-  time_t t_start = time(NULL);
-
-  // setting up the command line interface
-  po::options_description general("General");
-  general.add_options()
-    ("help,h", "Print this help message and exit")
-    ("license", "Print licensing information and exit");
-
-  po::options_description parameters("Parameters");
-  parameters.add_options()
-    ("BC_DIR", po::value<std::string>()->required(), "Illumina BaseCalls directory")
-    ("INDEX", po::value<std::string>()->required(), "Path to k-mer index")
-    ("CYCLES", po::value<CountType>()->required(), "Number of cycles");
-
-  po::options_description io_settings("IO settings");
-  io_settings.add_options()
-    ("temp", po::value<std::string>(), "Temporary directory for the alignment files [Default: use BaseCalls directory]")
-    ("sam,S", po::value<std::string>(), "Create SAM files for each tile. [Default: no SAM files]")
-    ("keep-files,k", "Keep intermediate alignment files [Default: false]")
-    ("lanes,l", po::value< std::vector<uint16_t> >(), "Select lane [Default: all lanes]")
-    ("tiles,t", po::value< std::vector<uint16_t> >(), "Select tile numbers [Default: all tiles]");
-
-  po::options_description alignment("Alignment settings");
-  alignment.add_options()
-    ("min-errors,e", po::value<CountType>()->default_value(2), "Number of errors tolerated in read alignment")
-    ("best-hit,H", "Report only the best alignmnet(s) for each read")
-    ("best-n,N", po::value<CountType>()->default_value(2), "Report the N best alignmnets for each read")
-    ("disable-ohw-filter", "Disable the One-Hit Wonder filter")
-    ("start-ohw", po::value<CountType>()->default_value(K+5), "First cycle to apply One-Hit Wonder filter")
-    ("window,w", po::value<DiffType>()->default_value(5), "Set the window size to search for alignment continuation, i.e. maximum insertion/deletion size")
-    ("min-quality", po::value<uint16_t>()->default_value(1), "Minimum allowed basecall quality");
-
-  po::options_description technical("Technical settings");
-  technical.add_options()
-    ("block-size", po::value<uint64_t>(), "Block size for the alignment input/output stream in Bytes. Use -K or -M to specify in Kilobytes or Megabytes")
-    (",K", "Interpret the block-size argument as Kilobytes instead of Bytes")
-    (",M", "Interpret the block-size argument as Megabytes instead of Bytes")
-    ("compression,c", po::value<uint16_t>()->default_value(2), "Compress alignment files. 0: no compression (default) 1: Deflate (smaller) 2: LZ4 (faster)")
-    ("num-threads,n", po::value<int>()->default_value(1), "Number of threads to spawn")
-    ;
-
-  po::options_description cmdline_options;
-  cmdline_options.add(general).add(parameters).add(io_settings).add(alignment).add(technical);
-
-  po::options_description visible_options;
-  visible_options.add(general).add(io_settings).add(alignment).add(technical);
-  
-  std::stringstream help_message;
-  help_message << "HiLive v"<< HiLive_VERSION_MAJOR << "." << HiLive_VERSION_MINOR << " - Realtime Alignment of Illumina Reads" << std::endl;
-  help_message << "Copyright (c) 2015, Martin S. Lindner" << std::endl;
-  help_message << "HiLive is open-source software. Check with --license for details." << std::endl << std::endl;
-  help_message << "Fixed k-mer size: " << K << std::endl << std::endl;
-  help_message << "Usage: " << std::string(argv[0]) << " [options] BC_DIR INDEX CYCLES" << std::endl;
-  help_message << "  BC_DIR       Illumina BaseCalls directory of the sequencing run to analyze" << std::endl;
-  help_message << "  INDEX        Path to k-mer index file (*.kix)" << std::endl;
-  help_message << "  CYCLES       Total number of cycles for read 1" << std::endl << std::endl;
-  help_message << visible_options << std::endl;
-
-
-  po::positional_options_description p;
-  p.add("BC_DIR", 1);
-  p.add("INDEX", 1);
-  p.add("CYCLES", 1);
-
-  po::variables_map vm;
-  try {
-    // parse arguments
-    po::store(po::command_line_parser(argc, argv).
-	      options(cmdline_options).positional(p).run(), vm);
-    // first check if -h or --help was called
-    if (vm.count("help")) {
-      std::cout << help_message.str();
-      return 1;
-    }
-    // first check if --license was called
-    if (vm.count("license")) {
-      std::cout << license << std::endl;
-      return 1;
-    }
-    
-    // then check arguments
-    po::notify(vm);  
-  }
-  catch ( boost::program_options::required_option& e ) {
-    std::cerr << "Missing Parameter: " << e.what() << std::endl << std::endl;
-    std::cout << help_message.str();
-    return -1;  
-  }
-  catch( boost::program_options::error& e) { 
-    std::cerr << "Error while parsing command line options: " << e.what() << std::endl << std::endl; 
-    std::cout << help_message.str();
-    return -1;  
-  } 
-
-
-  // variables to set
-  std::string root = vm["BC_DIR"].as<std::string>();
-  std::string index_fname = vm["INDEX"].as<std::string>();
-  CountType rlen = vm["CYCLES"].as<CountType>();
-
-  std::vector<uint16_t> lanes;
-  std::vector<uint16_t> tiles;
-
-  AlignmentSettings settings;
-  
-  bool write_sam;
-  std::string sam_dir;
-
-  if (vm.count("temp"))
-    settings.temp_dir = vm["temp"].as<std::string>();
-  else
-    settings.temp_dir = "";
-
-  if (vm.count("sam")) {
-    write_sam = true;
-    settings.sam_dir = vm["sam"].as<std::string>();
-    if (settings.sam_dir == "") {
-      if (settings.temp_dir == "") 
-	settings.sam_dir = root;
-      else
-	settings.sam_dir = settings.temp_dir;
-    }
-  }
-  else
-    write_sam = false;
-
-
-  settings.keep_aln_files = (vm.count("keep-files") > 0);
-
-  if (vm.count("lanes"))
-    lanes = vm["lanes"].as< std::vector<uint16_t> >();
-  else
-    lanes = all_lanes();
- 
-  if (vm.count("tiles"))
-    tiles = vm["tiles"].as< std::vector<uint16_t> >();
-  else
-    tiles = all_tiles();
-  
-  if (vm.count("min-errors")) 
-    settings.min_errors = vm["min-errors"].as<uint16_t>();
-  else
-    settings.min_errors = 2;
-
-  settings.best_hit_mode = (vm.count("best-hit") > 0);
-  if (vm.count("best-hit") == 0 && vm.count("best-n") > 0 ) {
-    settings.best_n_mode = true;
-    settings.best_n = vm["best-n"].as<CountType>();
-  }
-  
-  settings.discard_ohw = (vm.count("disable-ohw-filter") == 0);
-  
-  if (vm.count("start-ohw")) 
-    settings.start_ohw = vm["start-ohw"].as<CountType>();
-  else
-    settings.start_ohw = K+5;
-
-  if (vm.count("window")) 
-    settings.window = vm["window"].as<DiffType>();
-  else
-    settings.window = 5;
-
-  if (vm.count("min-quality")) 
-    settings.min_qual = vm["min-quality"].as<uint16_t>();
-  else
-    settings.min_qual = K+5;
-
-  if (vm.count("block-size")) {
-    if (vm.count("-M"))
-      settings.block_size = vm["block-size"].as<uint64_t>()*1024*1024;
-    else if (vm.count("-K"))
-      settings.block_size = vm["block-size"].as<uint64_t>()*1024;
-    else
-      settings.block_size = vm["block-size"].as<uint64_t>();
-  }
-  else
-    settings.block_size = 64*1024*1024;
-
-  if (vm.count("compression")) 
-    settings.compression_format = (uint8_t) vm["compression"].as<uint16_t>();
-  else
-    settings.compression_format = 2;
-
-  int num_threads;
-  if (vm.count("num-threads")) 
-    num_threads = vm["num-threads"].as<int>();
-  else
-    num_threads = 1;
-
-
-  // check paths and file names
-  if (!file_exists(index_fname)){
-    std::cerr << "Input error: Could not find k-mer index file " << index_fname << std::endl;
-    return -1;
-  }
-
-  std::size_t found = root.find("BaseCalls");
-  if (!(found != std::string::npos && found >= root.size()-10)) {
-    std::cerr << "Warning: BaseCalls directory seems to be invalid: " << root << std::endl;
-  } 
-
-  if (!is_directory(root)){
-    std::cerr << "Input error: Could not find BaseCalls directory " << root << std::endl;
-    return -1;
-  }
-
-  for ( uint16_t ln : lanes ) {
-    std::string ln_dir = root;
-    if ( ln < 10 )
-      ln_dir += "/L00";
-    else if ( ln < 100 )
-      ln_dir += "/L0";
-    else
-      ln_dir += "/L";
-    ln_dir += std::to_string(ln);
-    if (!is_directory(ln_dir)){
-      std::cerr << "Input error: Could not find location of Lane " << ln << ": " << ln_dir << std::endl;
-      return -1;
-    }
-  }
-  
-
-  // Report the basic settings
-  std::cout << "Running HiLive with " << num_threads << " thread(s)." << std::endl;
-  std::cout << "BaseCalls directory:   " << root << std::endl;
-  if (settings.temp_dir != "") {
-    std::cout << "Temporary directory:   " << settings.temp_dir << std::endl;
-  }
-  if (write_sam) {
-    std::cout << "SAM output directory:  " << settings.sam_dir << std::endl;
-  }
-  std::cout << "Lanes:                 ";
-  for ( uint16_t ln : lanes )
-    std::cout << ln << " ";
-  std::cout << std::endl;
-  std::cout << "K-mer index:           " << index_fname << std::endl;
-  std::cout << "Read length:           " << rlen << std::endl;
-  std::cout << "Mapping error:         " << settings.min_errors << std::endl;
-  if (settings.best_hit_mode) 
-    std::cout << "Mapping mode:          Best-Hit-Mode" << std::endl;
-  else if (settings.best_n_mode) 
-    std::cout << "Mapping mode:          Best-N-Mode" << std::endl;
-  else
-    std::cout << "Mapping mode:          All-Hits-Mode" << std::endl;
-  std::cout << std::endl;
-
-
-  // load the index
-  std::cout << "Loading Index" << std::endl;
-  KixRun* index = new KixRun();
-  index->deserialize_file(index_fname);
-
-
-  // Create the overall agenda
-  Agenda agenda (root, rlen, lanes, tiles);
-
-
-  // prepare the alignment
-  std::cout << "Initializing Alignment files. Waiting for the first cycle to finish." << std::endl;
-  bool first_cycle_available = false;
-  int wait = 0;
-  int max_wait = 3600;
-  while ( !first_cycle_available && wait < max_wait ) {
-    // check for new BCL files and update the agenda status
-    agenda.update_status();
-
-    // check if the first cycle is available for all tiles
-    first_cycle_available = true;
-    for ( auto ln : lanes ) {
-      for ( auto tl : tiles ) {
-	if ( agenda.get_status(Task(ln,tl,1,rlen,"")) != BCL_AVAILABLE) {
-	  first_cycle_available = false;
-	}
-      }
-    }
-    
-    // take a small break
-    std::this_thread::sleep_for (std::chrono::milliseconds(1000));
-    wait+=1;
-  }
-
-  if (wait >= max_wait) {
-    std::cerr << "Program aborted. No BCL files found." << std::endl;
-    return -1;
-  }
-  else {
-    std::cout << "First cycle complete. Starting alignment." << std::endl;
-  }
-
-  
-  // write empty alignment file for each tile
-  for (uint16_t ln : lanes) {
-    for (uint16_t tl : tiles) {
-      StreamedAlignment s (ln, tl, root, rlen);
-      s.create_directories(&settings);
-      s.init_alignment(&settings);
-    }
-  }
-
-  if (settings.temp_dir != "" && !is_directory(settings.temp_dir)){
-    std::cerr << "Error: Could not find temporary directory " << settings.temp_dir << std::endl;
-    return -1;
-  }
-
-  // Set up the queues
-  TaskQueue toDoQ;
-  TaskQueue finishedQ;
-  TaskQueue failedQ;
-
-  // Create the threads
-  std::cout << "Creating " << num_threads << " threads." << std::endl;
-  bool surrender = false;
-  std::vector<std::thread> workers;
-  for (int i = 0; i < num_threads; i++) {
-    workers.push_back(std::thread(worker, std::ref(toDoQ), std::ref(finishedQ), std::ref(failedQ), &settings, index, std::ref(surrender)));
-  }
-  
-  // Process all tasks on the agenda
-  while ( !agenda.finished() ) {
-    // check for new BCL files and update the agenda status
-    agenda.update_status();
-    
-    // fill the ToDo queue with tasks from the agenda
-    while(true) {
-      Task t = agenda.get_task();
-      if (t == NO_TASK)
-	break;
-      toDoQ.push(t);
-      agenda.set_status(t,RUNNING);
-    }
+int main(int argc, const char* argv[]) {
+    time_t t_start = time(NULL);
+
+    // parse the command line arguments, store results in settings
+    AlignmentSettings settings;
+    if (parseCommandLineArguments(settings, license, argc, argv)) // returns true if error occured 
+        return 1;
+
+    // load the index
+    std::cout << "Loading Index" << std::endl;
+    KixRun* index = new KixRun();
+    index->deserialize_file(settings.index_fname);
+
+
+    // Create the overall agenda
+    Agenda agenda (settings.root, settings.rlen, settings.lanes, settings.tiles);
+
+
+    // prepare the alignment
+    std::cout << "Initializing Alignment files. Waiting for the first cycle to finish." << std::endl;
+    bool first_cycle_available = false;
+
+    // wait for the first cycle to be written. Attention - this loop will wait infinitely long if no first cycle is found
+    while ( !first_cycle_available ) {
+        // check for new BCL files and update the agenda status
+        agenda.update_status();
+
+        // check if the first cycle is available for all tiles
+        first_cycle_available = true;
+        for ( auto ln : settings.lanes ) {
+            for ( auto tl : settings.tiles ) {
+                if ( agenda.get_status(Task(ln,tl,1,settings.rlen,"")) != BCL_AVAILABLE) {
+                    first_cycle_available = false;
+                }
+            }
+        }
 
-    // take a look in the finished queue and process finished tasks
-    while(true) {
-      Task t = finishedQ.pop();
-      if (t == NO_TASK)
-	break;
-      agenda.set_status(t,FINISHED);
+        // take a small break
+        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
     }
 
-    // take a look in the failed queue and process failed tasks
-    while(true) {
-      Task t = failedQ.pop();
-      if (t == NO_TASK)
-	break;
-      if (agenda.get_status(t) == RUNNING) {
-	// give it one more chance
-	agenda.set_status(t,RETRY);
-	toDoQ.push(t);
-      }
-      else {
-	agenda.set_status(t,FAILED);
-	std::cout << "Task failed! " << t << std::endl;
-      }
+    std::cout << "First cycle complete. Starting alignment." << std::endl;
+
+    // write empty alignment file for each tile
+    for (uint16_t ln : settings.lanes) {
+        for (uint16_t tl : settings.tiles) {
+            StreamedAlignment s (ln, tl, settings.root, settings.rlen);
+            s.create_directories(&settings);
+            s.init_alignment(&settings);
+        }
+    }
+
+    if (settings.temp_dir != "" && !is_directory(settings.temp_dir)){
+        std::cerr << "Error: Could not find temporary directory " << settings.temp_dir << std::endl;
+        return -1;
+    }
+
+    // Set up the queues
+    TaskQueue toDoQ;
+    TaskQueue finishedQ;
+    TaskQueue failedQ;
+
+    // Create the threads
+    std::cout << "Creating " << settings.num_threads << " threads." << std::endl;
+    bool surrender = false;
+    std::vector<std::thread> workers;
+    for (int i = 0; i < settings.num_threads; i++) {
+        workers.push_back(std::thread(worker, std::ref(toDoQ), std::ref(finishedQ), std::ref(failedQ), &settings, index, std::ref(surrender)));
+    }
+
+    // Process all tasks on the agenda
+    while ( !agenda.finished() ) {
+        // check for new BCL files and update the agenda status
+        agenda.update_status();
+
+        // fill the ToDo queue with tasks from the agenda
+        while(true) {
+            Task t = agenda.get_task();
+            if (t == NO_TASK)
+                break;
+            toDoQ.push(t);
+            agenda.set_status(t,RUNNING);
+        }
+
+        // take a look in the finished queue and process finished tasks
+        while(true) {
+            Task t = finishedQ.pop();
+            if (t == NO_TASK)
+                break;
+            agenda.set_status(t,FINISHED);
+        }
+
+        // take a look in the failed queue and process failed tasks
+        while(true) {
+            Task t = failedQ.pop();
+            if (t == NO_TASK)
+                break;
+            if (agenda.get_status(t) == RUNNING) {
+                // give it one more chance
+                agenda.set_status(t,RETRY);
+                toDoQ.push(t);
+            }
+            else {
+                agenda.set_status(t,FAILED);
+                std::cout << "Task failed! " << t << std::endl;
+            }
+        }
+
+        // take a small break
+        std::this_thread::sleep_for (std::chrono::milliseconds(100));
+    }  
+
+    // Halt the threads
+    surrender = true;
+    for (auto& w : workers) {
+        w.join();
     }
+    std::cout << "All threads joined." << std::endl;
 
-    // take a small break
-    std::this_thread::sleep_for (std::chrono::seconds(1));
-  }  
-  
-  // Halt the threads
-  surrender = true;
-  for (auto& w : workers) {
-    w.join();
-  }
-  std::cout << "All threads joined." << std::endl;
-
-  if (write_sam) {
+    
     std::cout << "Writing SAM files." << std::endl;
     // Create individual SAM files for every tile
     TaskQueue sam_tasks; 
     std::vector<Task> tv = agenda.get_SAM_tasks();
     for ( auto t: tv ) {
-      sam_tasks.push(t);
+        sam_tasks.push(t);
     }
-    
+
     workers.clear();
-    for (int i = 0; i < num_threads; i++) {
-      workers.push_back(std::thread(sam_worker, std::ref(sam_tasks), &settings, index));
+    for (int i = 0; i < settings.num_threads; i++) {
+        workers.push_back(std::thread(sam_worker, std::ref(sam_tasks), &settings, index));
     }
-    
+
     for (auto& w : workers) {
-      w.join();
+        w.join();
     }
-  }
-
-  std::cout << "Total run time: " << time(NULL) - t_start << " s" << std::endl;
 
-  delete index;
 
-  return 1;
+    std::cout << "Total run time: " << time(NULL) - t_start << " s" << std::endl;
+    delete index;
+    return 1;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/hilive.git



More information about the debian-med-commit mailing list