[med-svn] [spades] 01/02: New upstream version 3.11.1+dfsg

Sascha Steinbiss satta at debian.org
Fri Nov 17 16:16:52 UTC 2017


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository spades.

commit 7c4e71e9de8508a118e9631f6321d910d91a5ea5
Author: Sascha Steinbiss <satta at debian.org>
Date:   Fri Nov 17 13:37:47 2017 +0100

    New upstream version 3.11.1+dfsg
---
 LICENSE                                            |    7 +-
 README                                             |    9 +-
 VERSION                                            |    2 +-
 changelog.html                                     |   33 +-
 configs/corrector/corrector.info                   |    3 +-
 configs/corrector/log.properties                   |    2 +
 .../{careful_mode.info => careful_mda_mode.info}   |   14 +-
 configs/debruijn/careful_mode.info                 |   12 +-
 configs/debruijn/config.info                       |   25 +-
 configs/debruijn/log.properties                    |    2 +
 configs/debruijn/mda_mode.info                     |    2 +-
 configs/debruijn/meta_mode.info                    |   31 +-
 configs/debruijn/moleculo_mode.info                |    6 +-
 configs/debruijn/pe_params.info                    |   35 +-
 configs/debruijn/rna_fast_mode.info                |   12 +
 configs/debruijn/rna_mode.info                     |   80 +-
 configs/debruijn/simplification.info               |   36 +-
 configs/debruijn/toy.info                          |    4 +
 configs/debruijn/toy.yaml                          |    4 +
 ext/include/ConsensusCore/Feature.hpp              |  166 --
 ext/include/ConsensusCore/Features.hpp             |   68 -
 .../ConsensusCore/Matrix/SparseMatrix-inl.hpp      |    1 -
 ext/include/ConsensusCore/Poa/PoaGraph.hpp         |    4 +-
 ext/include/ConsensusCore/Version.hpp              |    1 -
 ext/include/boomphf/BooPHF.h                       | 1422 +++++++++++++
 ext/include/llvm/ADT/IntrusiveRefCntPtr.h          |    2 +-
 ext/include/llvm/Support/MathExtras.h              |   46 +
 ext/include/llvm/Support/TrailingObjects.h         |  401 ++++
 ext/src/ConsensusCore/CMakeLists.txt               |    2 -
 ext/src/ConsensusCore/Feature.cpp                  |   64 -
 ext/src/ConsensusCore/Features.cpp                 |   51 -
 ext/src/ConsensusCore/Matrix/DenseMatrix.cpp       |    6 +-
 ext/src/ConsensusCore/Matrix/SparseMatrix.cpp      |    4 +-
 ext/src/ConsensusCore/Poa/PoaConsensus.cpp         |    7 +-
 ext/src/ConsensusCore/Poa/PoaGraph.cpp             |   33 +-
 ext/src/ConsensusCore/Version.cpp                  |    2 -
 manual.html                                        |   88 +-
 metaspades.py                                      |   81 +-
 plasmidspades.py                                   |   81 +-
 rnaspades.py                                       |   81 +-
 rnaspades_manual.html                              |   48 +-
 spades.py                                          |   81 +-
 src/CMakeLists.txt                                 |    5 +
 src/cmake/options.cmake                            |    9 +
 src/cmake/pack.cmake                               |    4 +-
 src/common/CMakeLists.txt                          |    7 +-
 src/common/adt/array_vector.hpp                    |  327 +--
 src/common/adt/bag.hpp                             |   16 +-
 src/common/adt/bf.hpp                              |  285 +--
 src/common/adt/chained_iterator.hpp                |    4 +-
 src/common/adt/concurrent_dsu.hpp                  |   15 +-
 src/common/adt/filter_iterator.hpp                 |   32 +-
 src/common/adt/flat_map.hpp                        |   59 +-
 src/common/adt/flat_set.hpp                        |   40 +-
 src/common/adt/hll.hpp                             |   31 +-
 src/common/adt/iterator_range.hpp                  |   11 +-
 src/common/adt/kmer_hash_vector.hpp                |  370 ----
 src/common/adt/kmer_vector.hpp                     |   22 +-
 src/common/adt/loser_tree.hpp                      |   29 +-
 src/common/adt/parallel_seq_vector.hpp             |  110 -
 src/common/adt/parallel_unordered_map.hpp          |  137 --
 src/common/adt/pointer_iterator.hpp                |    4 +
 src/common/adt/queue_iterator.hpp                  |   22 +-
 src/common/adt/small_pod_vector.hpp                |    6 +-
 .../components/connected_component.cpp             |    4 +-
 .../components/connected_component.hpp             |    2 +-
 src/common/assembly_graph/components/splitters.hpp |   33 +-
 .../construction}/debruijn_graph_constructor.hpp   |  367 ++--
 .../construction}/early_simplification.hpp         |    8 +-
 src/common/assembly_graph/core/graph_core.hpp      |   30 +-
 src/common/assembly_graph/core/graph_iterators.hpp |   13 +-
 src/common/assembly_graph/core/order_and_law.hpp   |    2 +-
 .../assembly_graph/dijkstra/dijkstra_algorithm.hpp |    8 +-
 .../assembly_graph/graph_support/contig_output.hpp |  584 +----
 .../graph_support/coverage_filling.hpp             |    4 +-
 .../graph_support/detail_coverage.hpp              |    5 +-
 .../graph_support/genomic_quality.hpp              |   20 +-
 .../graph_support/parallel_processing.hpp          |   76 +-
 .../graph_support/scaff_supplementary.cpp          |   46 +-
 .../graph_support/scaff_supplementary.hpp          |  110 +-
 .../handlers/edge_labels_handler.hpp               |    2 +-
 .../handlers/edges_position_handler.hpp            |   30 +-
 .../assembly_graph/handlers/id_track_handler.hpp   |   31 +-
 .../index}/edge_index_builders.hpp                 |   84 +-
 .../index}/edge_info_updater.hpp                   |    4 +-
 .../index}/edge_multi_index.hpp                    |    8 +-
 .../index}/edge_position_index.hpp                 |   22 +-
 .../assembly_graph/paths/bidirectional_path.hpp    |  766 ++-----
 .../paths/bidirectional_path_container.hpp         |  210 ++
 .../bidirectional_path_output.cpp                  |   71 +-
 .../bidirectional_path_output.hpp                  |  274 ++-
 .../paths/bidirectional_path_io/io_support.cpp     |  100 +-
 .../paths/bidirectional_path_io/io_support.hpp     |   81 +-
 src/common/assembly_graph/paths/mapping_path.hpp   |  227 +-
 src/common/assembly_graph/paths/path_processor.hpp |   17 +-
 src/common/assembly_graph/stats/picture_dump.hpp   |   47 +-
 src/common/assembly_graph/stats/statistics.hpp     |    7 +-
 src/common/io/dataset_support/dataset_readers.hpp  |    2 +-
 src/common/io/dataset_support/read_converter.hpp   |    4 +-
 src/common/io/kmers/kmer_iterator.hpp              |    2 +-
 src/common/io/kmers/mmapped_reader.hpp             |    9 +-
 src/common/io/kmers/mmapped_writer.hpp             |   14 +-
 src/common/io/reads/binary_converter.hpp           |    2 +-
 src/common/io/reads/binary_streams.hpp             |    4 +-
 src/common/io/reads/fasta_fastq_gz_parser.hpp      |    5 +
 src/common/io/reads/file_reader.hpp                |    4 +-
 src/common/io/reads/ireadstream.hpp                |    4 +
 src/common/io/reads/osequencestream.hpp            |  318 +--
 src/common/io/reads/paired_read.hpp                |    5 -
 src/common/io/reads/read.hpp                       |    2 +-
 src/common/io/reads/read_processor.hpp             |    2 +-
 src/common/io/reads/sequence_reader.hpp            |    2 +-
 src/common/io/reads/single_read.hpp                |   22 +-
 src/common/io/reads/wrapper_collection.hpp         |    2 +-
 src/common/{utils => math}/log.hpp                 |    0
 src/common/modules/alignment/bwa_index.cpp         |   10 +-
 src/common/modules/alignment/edge_index.hpp        |    8 +-
 .../modules/alignment/edge_index_refiller.cpp      |    4 +-
 src/common/modules/alignment/kmer_mapper.hpp       |    4 +-
 .../modules/alignment/kmer_mapper_logger.hpp       |    2 +-
 src/common/modules/alignment/pacbio/pac_index.hpp  |  281 ++-
 .../alignment/pacbio/pacbio_read_structures.hpp    |   60 +-
 src/common/modules/alignment/rna/ss_coverage.hpp   |   61 +
 .../modules/alignment/rna/ss_coverage_filler.hpp   |   62 +
 src/common/modules/alignment/sequence_mapper.hpp   |    5 +-
 .../modules/alignment/sequence_mapper_notifier.hpp |   17 +-
 src/common/modules/alignment/short_read_mapper.hpp |    2 +-
 .../coverage_model/CMakeLists.txt                  |    0
 .../coverage_model/kmer_coverage_model.cpp         |    2 -
 .../coverage_model/kmer_coverage_model.hpp         |    2 -
 src/common/modules/genome_consistance_checker.cpp  |  610 ++++--
 src/common/modules/genome_consistance_checker.hpp  |  221 +-
 src/common/modules/graph_construction.hpp          |   36 +-
 src/common/modules/graph_read_correction.hpp       |    4 +-
 .../modules/path_extend/extension_chooser.hpp      |  269 ++-
 src/common/modules/path_extend/loop_traverser.hpp  |  127 +-
 .../modules/path_extend/overlap_analysis.hpp       |    4 +-
 src/common/modules/path_extend/path_extender.hpp   | 1457 ++++++-------
 src/common/modules/path_extend/path_filter.hpp     |  163 +-
 src/common/modules/path_extend/path_visualizer.hpp |    4 +-
 .../modules/path_extend/pe_config_struct.cpp       |   66 +-
 .../modules/path_extend/pe_config_struct.hpp       |   29 +-
 src/common/modules/path_extend/pe_resolver.hpp     |  700 ++----
 src/common/modules/path_extend/pe_utils.hpp        |  156 +-
 .../path_extend/pipeline/extenders_logic.cpp       |  137 +-
 .../path_extend/pipeline/extenders_logic.hpp       |   40 +-
 .../path_extend/pipeline/launch_support.cpp        |   11 +-
 .../path_extend/pipeline/launch_support.hpp        |   18 +-
 .../modules/path_extend/pipeline/launcher.cpp      |  288 ++-
 .../modules/path_extend/pipeline/launcher.hpp      |   40 +-
 .../scaffolder2015/connection_condition2015.cpp    |    4 +-
 .../scaffolder2015/connection_condition2015.hpp    |    2 +-
 .../scaffolder2015/extension_chooser2015.hpp       |    2 +-
 .../path_extend/scaffolder2015/path_polisher.cpp   |  371 ++--
 .../path_extend/scaffolder2015/path_polisher.hpp   |  120 +-
 .../path_extend/scaffolder2015/scaffold_graph.hpp  |    2 +-
 .../scaffolder2015/scaffold_graph_visualizer.cpp   |   12 +-
 src/common/modules/path_extend/weight_counter.hpp  |   83 +-
 .../modules/simplification/bulge_remover.hpp       |  190 +-
 src/common/modules/simplification/cleaner.hpp      |    6 +
 .../simplification/complex_bulge_remover.hpp       |   46 +-
 .../modules/simplification/complex_tip_clipper.hpp |    2 +-
 src/common/modules/simplification/compressor.hpp   |    2 +-
 .../simplification/dominated_set_finder.hpp        |   10 +-
 .../modules/simplification/ec_threshold_finder.hpp |    8 +-
 .../erroneous_connection_remover.hpp               |   14 +-
 .../parallel_simplification_algorithms.hpp         |   86 +-
 .../simplification/relative_coverage_remover.hpp   |   14 +-
 src/common/modules/simplification/tip_clipper.hpp  |   14 +-
 .../mph_index => paired_info}/CMakeLists.txt       |    8 +-
 src/common/paired_info/distance_estimation.cpp     |  176 ++
 src/common/paired_info/distance_estimation.hpp     |  227 +-
 src/common/paired_info/histogram.hpp               |    4 +-
 src/common/paired_info/paired_info.hpp             |    2 +-
 .../paired_info/smoothing_distance_estimation.cpp  |  185 ++
 .../paired_info/smoothing_distance_estimation.hpp  |  198 +-
 src/common/paired_info/split_path_constructor.hpp  |    2 +-
 .../paired_info/weighted_distance_estimation.cpp   |   63 +
 .../paired_info/weighted_distance_estimation.hpp   |   69 +-
 src/common/paired_info/weights.hpp                 |    3 +-
 src/common/pipeline/config_common.hpp              |    8 +-
 src/common/pipeline/config_struct.cpp              |   58 +-
 src/common/pipeline/config_struct.hpp              |   31 +-
 src/common/pipeline/genomic_info_filler.cpp        |   10 +-
 src/common/pipeline/graph_pack.hpp                 |   17 +-
 src/common/pipeline/graphio.hpp                    |   29 +-
 src/common/pipeline/library.cpp                    |    2 +-
 src/common/pipeline/library.hpp                    |   10 +-
 src/common/pipeline/library.inl                    |    2 +-
 src/common/pipeline/stage.cpp                      |    4 +-
 src/common/sequence/genome_storage.hpp             |   67 +-
 src/common/{utils => sequence}/levenshtein.hpp     |    8 +-
 src/common/{utils => sequence}/range.hpp           |    7 +-
 src/common/sequence/rtseq.hpp                      |   17 +-
 src/common/sequence/seq.hpp                        |   10 +-
 src/common/sequence/sequence.hpp                   |  121 +-
 src/common/sequence/sequence_tools.hpp             |    4 +-
 src/common/sequence/simple_seq.hpp                 |    2 +-
 src/common/stages/construction.cpp                 |    2 +-
 src/common/stages/simplification.cpp               |   47 +-
 .../graph_simplification.hpp                       |   49 +-
 .../simplification_pipeline/rna_simplification.hpp |    9 +-
 src/common/utils/CMakeLists.txt                    |    4 +-
 src/common/utils/autocompletion.cpp                |    2 +-
 src/common/utils/autocompletion.hpp                |    2 +-
 src/common/utils/cpp_utils.hpp                     |    5 +
 .../kmer_extension_index.hpp                       |   53 +-
 .../kmer_extension_index_builder.hpp               |    9 +-
 src/common/utils/{ => filesystem}/copy_file.cpp    |   25 +-
 src/common/utils/{ => filesystem}/copy_file.hpp    |   10 +-
 src/common/utils/{ => filesystem}/file_limit.hpp   |    4 +
 src/common/utils/{ => filesystem}/path_helper.cpp  |   11 +-
 src/common/utils/{ => filesystem}/path_helper.hpp  |   22 +-
 src/common/utils/indices/editable_index.hpp        |  270 ---
 src/common/utils/indices/kmer_splitters.hpp        |  317 ---
 .../common/utils/kmer_mph/1.cpp                    |    0
 .../utils/{mph_index => kmer_mph}/CMakeLists.txt   |    2 +-
 .../utils/{mph_index => kmer_mph}/kmer_index.hpp   |   33 +-
 .../{mph_index => kmer_mph}/kmer_index_builder.hpp |  216 +-
 .../{mph_index => kmer_mph}/kmer_index_traits.hpp  |   28 +-
 src/common/utils/kmer_mph/kmer_splitters.hpp       |  394 ++++
 src/common/utils/logger/log_writers.hpp            |   44 +-
 src/common/utils/logger/logger.hpp                 |    9 +-
 src/common/utils/logger/logger_impl.cpp            |   19 +-
 src/common/utils/md5.h                             |    4 +
 src/common/utils/mph_index/base_hash.hpp           |  293 ---
 src/common/utils/mph_index/bitpair_vector.cpp      |   77 -
 src/common/utils/mph_index/bitpair_vector.hpp      |   27 -
 src/common/utils/mph_index/common.hpp              |   66 -
 src/common/utils/mph_index/emphf_config.hpp        |    6 -
 src/common/utils/mph_index/hypergraph.hpp          |  137 --
 .../utils/mph_index/hypergraph_sorter_seq.hpp      |  130 --
 src/common/utils/mph_index/mphf.hpp                |  136 --
 .../utils/mph_index/ranked_bitpair_vector.hpp      |   91 -
 src/common/utils/{ => parallel}/openmp_wrapper.h   |    0
 .../utils/{ => parallel}/parallel_wrapper.hpp      |    0
 src/common/utils/{ => perf}/memory.hpp             |   11 +-
 src/common/utils/{ => perf}/memory_limit.hpp       |    7 +-
 src/common/utils/{ => perf}/perfcounter.hpp        |   70 +-
 .../utils/{indices => ph_map}/key_with_hash.hpp    |    2 +-
 .../utils/{indices => ph_map}/perfect_hash_map.hpp |   12 +-
 .../perfect_hash_map_builder.hpp                   |   15 +-
 .../utils/{indices => ph_map}/storing_traits.hpp   |   23 +-
 src/common/utils/{indices => ph_map}/values.hpp    |    2 +-
 src/common/utils/segfault_handler.hpp              |    5 +
 src/common/utils/simple_tools.hpp                  |  189 --
 src/common/utils/stacktrace.hpp                    |   11 +-
 src/common/utils/standard_base.hpp                 |   10 +-
 src/common/utils/stl_utils.hpp                     |  141 ++
 src/common/utils/verify.hpp                        |    7 +-
 src/common/visualization/graph_labeler.hpp         |   12 +-
 src/common/visualization/graph_print_utils.hpp     |    8 +-
 src/common/visualization/position_filler.hpp       |   50 +-
 src/common/visualization/visualization_utils.hpp   |   10 +-
 src/projects/cap/assembly_compare.hpp              |    2 +-
 src/projects/cap/assembly_problem_detection.hpp    |    6 +-
 src/projects/cap/cap_commands.hpp                  |    8 +-
 src/projects/cap/cap_environment_manager.hpp       |    2 +-
 src/projects/cap/cap_kmer_index.hpp                |    4 +-
 src/projects/cap/compare_standard.hpp              |    4 +-
 src/projects/cap/comparison_utils.hpp              |    8 +-
 src/projects/cap/deprecated/tools_deprecated.cpp   |    2 +-
 src/projects/cap/diff_masking.hpp                  |    2 +-
 src/projects/cap/gene_analysis.hpp                 |   14 +-
 src/projects/cap/genome_correction.hpp             |    8 +-
 src/projects/cap/longseq.hpp                       |    4 +-
 src/projects/cap/main.cpp                          |    4 +-
 src/projects/cap/mosaic.hpp                        |    8 +-
 src/projects/cap/repeat_masking.hpp                |    8 +-
 src/projects/cap/simple_inversion_finder.hpp       |    4 +-
 src/projects/cap/stats.hpp                         |   24 +-
 src/projects/cclean/CMakeLists.txt                 |   30 -
 src/projects/cclean/adapter_index.cpp              |   50 -
 src/projects/cclean/adapter_index.hpp              |   61 -
 src/projects/cclean/additional.cpp                 |   69 -
 src/projects/cclean/brute_force_clean.cpp          |   97 -
 src/projects/cclean/brute_force_clean.hpp          |   72 -
 src/projects/cclean/comparator.hpp                 |   18 -
 src/projects/cclean/config_struct_cclean.cpp       |   44 -
 src/projects/cclean/config_struct_cclean.hpp       |   42 -
 src/projects/cclean/job_wrappers.cpp               |   97 -
 src/projects/cclean/job_wrappers.hpp               |   73 -
 src/projects/cclean/main.cpp                       |   86 -
 src/projects/cclean/output.cpp                     |   82 -
 src/projects/cclean/output.hpp                     |   49 -
 src/projects/cclean/running_modes.cpp              |  268 ---
 src/projects/cclean/running_modes.hpp              |   93 -
 src/projects/cclean/utils.cpp                      |  136 --
 src/projects/cclean/utils.hpp                      |   58 -
 src/projects/cclean/valid_kmer_generator.hpp       |  198 --
 src/projects/corrector/config_struct.cpp           |    3 +-
 src/projects/corrector/config_struct.hpp           |    1 +
 src/projects/corrector/contig_processor.cpp        |   32 +-
 src/projects/corrector/contig_processor.hpp        |    8 +-
 src/projects/corrector/dataset_processor.cpp       |   34 +-
 src/projects/corrector/dataset_processor.hpp       |    9 +-
 .../corrector/interesting_pos_processor.hpp        |    5 +
 src/projects/corrector/main.cpp                    |   25 +-
 src/projects/corrector/positional_read.hpp         |    7 +-
 .../consensus_contigs_constructor.hpp              |   10 +-
 .../contig_correctors/close_gaps_corrector.hpp     |   16 +-
 .../equal_path_deletion_correction.hpp             |    2 +-
 .../contig_correctors/incorrect_contig_remover.hpp |    2 +-
 .../iterative_redundant_contigs_remover.hpp        |   16 +-
 .../contig_correctors/overlap_searcher.hpp         |   10 +-
 .../contig_correctors/redundant_contig_remover.hpp |    4 +-
 .../overlap_graph.hpp                              |    6 +-
 src/projects/dipspades/dipspades.hpp               |   20 +-
 src/projects/dipspades/dipspades_config.cpp        |    4 +-
 .../conservative_regions_searcher.hpp              |    4 +-
 .../haplotype_assembly/haplotype_assembler.hpp     |    2 +-
 src/projects/dipspades/main.cpp                    |   22 +-
 .../polymorphic_bulge_remover.hpp                  |    8 +-
 src/projects/dipspades/utils/bulge_utils.hpp       |    2 +-
 src/projects/hammer/config_struct_hammer.cpp       |    2 +-
 src/projects/hammer/hamcluster.cpp                 |   14 +-
 src/projects/hammer/hamcluster.hpp                 |    6 +-
 src/projects/hammer/hammer_tools.cpp               |    2 +-
 src/projects/hammer/kmer_cluster.cpp               |   20 +-
 src/projects/hammer/kmer_data.cpp                  |   28 +-
 src/projects/hammer/kmer_data.hpp                  |   13 +-
 src/projects/hammer/main.cpp                       |   10 +-
 src/projects/hammer/parallel_radix_sort.hpp        |    2 +-
 src/projects/ionhammer/CMakeLists.txt              |   22 +-
 src/projects/ionhammer/HSeq.hpp                    |  230 +-
 src/projects/ionhammer/comparator.py               |  215 ++
 src/projects/ionhammer/config_struct.cpp           |  134 +-
 src/projects/ionhammer/config_struct.hpp           |   37 +-
 src/projects/ionhammer/consensus.hpp               |   12 +-
 src/projects/ionhammer/err_helper_table.cpp        |   18 +-
 src/projects/ionhammer/err_helper_table.hpp        |   50 +-
 src/projects/ionhammer/expander.cpp                |   60 -
 src/projects/ionhammer/expander.hpp                |   31 -
 src/projects/ionhammer/flow_space_read.hpp         |   33 +-
 src/projects/ionhammer/gamma_poisson_model.cpp     |   16 +
 src/projects/ionhammer/gamma_poisson_model.hpp     |  869 ++++++++
 src/projects/ionhammer/hamcluster.cpp              |  219 --
 src/projects/ionhammer/hamcluster.hpp              |  191 --
 src/projects/ionhammer/hamcluster_1.cpp            |    5 +
 src/projects/ionhammer/hamcluster_1.h              |   81 +
 src/projects/ionhammer/hkmer.hpp                   |   34 +-
 src/projects/ionhammer/hkmer_distance.hpp          |  217 +-
 src/projects/ionhammer/io_read_corrector.hpp       |  230 ++
 src/projects/ionhammer/kmer_data.cpp               |  119 +-
 src/projects/ionhammer/kmer_data.hpp               |   77 +-
 src/projects/ionhammer/kmer_evaluator.cpp          |   53 +
 src/projects/ionhammer/kmer_helpers.cpp            |    5 +
 src/projects/ionhammer/kmer_helpers.h              |   69 +
 src/projects/ionhammer/main.cpp                    |  497 +++--
 src/projects/ionhammer/normal_quality_model.cpp    |   14 +
 src/projects/ionhammer/normal_quality_model.hpp    |  490 +++++
 src/projects/ionhammer/penalty_estimator.hpp       |  326 +++
 src/projects/ionhammer/quality_metrics.cpp         |    5 +
 src/projects/ionhammer/quality_metrics.h           |  189 ++
 .../ionhammer/quality_thresholds_estimator.cpp     |    5 +
 .../ionhammer/quality_thresholds_estimator.h       |  101 +
 src/projects/ionhammer/read_corrector.hpp          |  563 +++--
 src/projects/ionhammer/read_corrector_new.hpp      |  252 +++
 .../ionhammer/read_corrector_structs_new.h         |  740 +++++++
 src/projects/ionhammer/reference.cpp               |    1 +
 src/projects/ionhammer/reference.h                 |   59 +
 src/projects/ionhammer/subcluster.cpp              |  283 ++-
 src/projects/ionhammer/subcluster.hpp              |  100 +-
 src/projects/ionhammer/thread_utils.h              |   57 +
 src/projects/ionhammer/valid_hkmer_generator.hpp   |  130 +-
 src/projects/mph_test/main.cpp                     |   29 +-
 src/projects/mts/CMakeLists.txt                    |    3 +-
 src/projects/mts/Common.snake                      |   98 +-
 src/projects/mts/README                            |   15 +-
 src/projects/mts/Reassembly.snake                  |   74 +
 src/projects/mts/Snakefile                         |  413 ++--
 src/projects/mts/Stats.snake                       |  395 ++--
 src/projects/mts/annotation.hpp                    |   30 +-
 src/projects/mts/config.yaml                       |   34 +-
 src/projects/mts/contig_abundance.cpp              |   80 +-
 src/projects/mts/contig_abundance.hpp              |   43 +-
 src/projects/mts/contig_abundance_counter.cpp      |   61 +-
 src/projects/mts/gzstream/gzstream.C               |  165 ++
 src/projects/mts/gzstream/gzstream.h               |  121 ++
 src/projects/mts/kmer_multiplicity_counter.cpp     |   14 +-
 src/projects/mts/logger.hpp                        |    2 +-
 src/projects/mts/mts.py                            |   71 +-
 src/projects/mts/multirun.py                       |   84 +
 src/projects/mts/prop_binning.cpp                  |   84 +-
 src/projects/mts/propagate.cpp                     |  120 +-
 src/projects/mts/propagate.hpp                     |   10 +-
 src/projects/mts/read_binning.cpp                  |   74 +-
 src/projects/mts/read_binning.hpp                  |   31 +-
 src/projects/mts/scripts/Dataset_analysis.ipynb    | 2246 ++++++++++++++++++++
 src/projects/mts/scripts/bhtsne.py                 |  242 +++
 src/projects/mts/scripts/bin_profiles.py           |   12 +
 src/projects/mts/scripts/calc_kmers_mpl.py         |   38 -
 src/projects/mts/scripts/canopy_launch.sh          |   17 -
 src/projects/mts/scripts/choose_bins.py            |   20 +
 src/projects/mts/scripts/choose_samples.py         |  115 +-
 src/projects/mts/scripts/clusters2csv.py           |   22 +
 src/projects/mts/scripts/combine_contigs.py        |    8 +-
 src/projects/mts/scripts/common.py                 |   63 +-
 src/projects/mts/scripts/contig_name_filter.py     |   30 +
 src/projects/mts/scripts/convert_output.py         |   53 +
 src/projects/mts/scripts/cut_fasta.py              |   45 +
 src/projects/mts/scripts/filter_bin.py             |   17 +
 src/projects/mts/scripts/filter_nucmer.py          |   62 +-
 src/projects/mts/scripts/gather_stats.py           |   84 +-
 src/projects/mts/scripts/gen_samples.py            |   77 +-
 src/projects/mts/scripts/make_input.py             |   53 +-
 src/projects/mts/scripts/make_points_matrix.py     |   35 -
 src/projects/mts/scripts/parse_output.py           |   58 -
 src/projects/mts/scripts/pca.R                     |   47 +-
 src/projects/mts/scripts/ref_stats.sh              |   63 -
 src/projects/mts/scripts/run_tsne.py               |  239 +++
 src/projects/mts/scripts/split_bins.py             |   18 +-
 src/projects/mts/scripts/validate.pl               |  404 ++++
 src/projects/mts/stats.cpp                         |    8 +-
 src/projects/mts/test.py                           |   55 +-
 src/projects/mts/visualization.hpp                 |    4 +-
 src/projects/online_vis/debruijn_environment.hpp   |    2 +-
 .../drawing_commands/draw_missasemblies.hpp        |    6 +-
 .../draw_part_of_genome_command.hpp                |    2 +-
 .../drawing_commands/draw_polymorphic_regions.hpp  |    4 +-
 .../drawing_commands/draw_poorly_assembled.hpp     |    6 +-
 src/projects/online_vis/errors.hpp                 |    2 +-
 src/projects/online_vis/main.cpp                   |   14 +-
 src/projects/online_vis/online_visualizer.hpp      |    6 +-
 src/projects/online_vis/setting_commands.hpp       |    2 +-
 src/projects/scaffold_correction/main.cpp          |   20 +-
 .../scaffold_correction/scaffold_correction.hpp    |    8 +-
 src/projects/spades/CMakeLists.txt                 |    1 +
 src/projects/spades/chromosome_removal.cpp         |   25 +-
 src/projects/spades/chromosome_removal.hpp         |    1 -
 src/projects/spades/contig_output_stage.cpp        |   82 +-
 src/projects/spades/contig_output_stage.hpp        |   15 +-
 src/projects/spades/distance_estimation.cpp        |   22 +-
 src/projects/spades/gap_closer.cpp                 |    2 -
 src/projects/spades/gap_closing.hpp                |   42 +-
 src/projects/spades/hybrid_aligning.cpp            |    4 +-
 src/projects/spades/hybrid_gap_closer.hpp          |  174 +-
 src/projects/spades/launch.hpp                     |   11 +-
 src/projects/spades/main.cpp                       |   19 +-
 src/projects/spades/pair_info_count.cpp            |  145 +-
 src/projects/spades/repeat_resolving.cpp           |    1 +
 src/projects/spades/second_phase_setup.cpp         |    4 +-
 src/projects/spades/second_phase_setup.hpp         |    7 +-
 .../{series_analysis.hpp => series_analysis.cpp}   |  248 ++-
 src/projects/spades/series_analysis.hpp            |  306 +--
 .../truseq_analysis/AlignmentAnalyserNew.cpp       |    1 -
 .../truseq_analysis/consistent_mapping.cpp         |    3 +-
 src/projects/truseq_analysis/consistent_mapping.h  |   12 +-
 src/projects/truseq_analysis/main.cpp              |   16 +-
 src/spades_pipeline/CMakeLists.txt                 |    2 +-
 src/spades_pipeline/common/SeqIO.py                |   34 +-
 .../spades_pipeline/common/__init__.py             |    0
 src/spades_pipeline/corrector_logic.py             |    5 +-
 src/spades_pipeline/hammer_logic.py                |    9 +-
 src/spades_pipeline/options_storage.py             |   48 +-
 src/spades_pipeline/spades_logic.py                |   41 +-
 src/spades_pipeline/support.py                     |   64 +-
 test_dataset/reference_1K.fa.gz                    |  Bin 534 -> 534 bytes
 458 files changed, 20583 insertions(+), 14612 deletions(-)

diff --git a/LICENSE b/LICENSE
index 0438b8d..be69501 100644
--- a/LICENSE
+++ b/LICENSE
@@ -21,13 +21,14 @@ Current SPAdes contributors:
 
     Dmitry Antipov,
     Anton Bankevich,
+    Elena Bushmanova,
+    Vasisliy Ershov,
     Yuriy Gorshkov,
     Alexey Gurevich,
     Anton Korobeynikov,
-    Dmitriy Meleshko,
     Sergey Nurk,
     Andrey Prjibelski,
-    Yana Safonova,
+    Ivan Tolstoganov,
     Alla Lapidus and
     Pavel Pevzner
 
@@ -37,9 +38,11 @@ Also contributed:
     Mikhail Dvorkin,
     Alexander Kulikov,
     Valery Lesin,
+    Dmitriy Meleshko,
     Sergey Nikolenko,
     Son Pham,
     Alexey Pyshkin,
+    Yana Safonova,
     Vladislav Saveliev,
     Alexander Sirotkin,
     Yakov Sirotkin,
diff --git a/README b/README
index 3e67cc3..5abb78a 100755
--- a/README
+++ b/README
@@ -9,13 +9,14 @@ Current SPAdes contributors:
 
     Dmitry Antipov,
     Anton Bankevich,
+    Elena Bushmanova,
+    Vasisliy Ershov,
     Yuriy Gorshkov,
     Alexey Gurevich,
     Anton Korobeynikov,
-    Dmitriy Meleshko,
     Sergey Nurk,
     Andrey Prjibelski,
-    Yana Safonova,
+    Ivan Tolstoganov,
     Alla Lapidus and
     Pavel Pevzner
 
@@ -25,9 +26,11 @@ Also contributed:
     Mikhail Dvorkin,
     Alexander Kulikov,
     Valery Lesin,
+    Dmitriy Meleshko,
     Sergey Nikolenko,
     Son Pham,
     Alexey Pyshkin,
+    Yana Safonova,
     Vladislav Saveliev,
     Alexander Sirotkin,
     Yakov Sirotkin,
@@ -39,6 +42,8 @@ Also contributed:
 Installation instructions and manual can be found on the website:
 http://cab.spbu.ru/software/spades/
 
+Address for communication: spades.support at cab.spbu.ru
+
 References:
 
 Anton Bankevich, Sergey Nurk, Dmitry Antipov, Alexey A. Gurevich, Mikhail Dvorkin, Alexander S. Kulikov, Valery M. Lesin,
diff --git a/VERSION b/VERSION
index f870be2..371cfe3 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.10.1
+3.11.1
diff --git a/changelog.html b/changelog.html
index e94a96d..524ddba 100644
--- a/changelog.html
+++ b/changelog.html
@@ -3,6 +3,38 @@
 
 <h2>SPAdes Genome Assembler changelog</h2>
 
+<h3>SPAdes 3.11.1, 1 October 2017</h3>
+
+<p>FIX: Handling spaces in path during mismatch correction.</p>
+
+<p>FIX: Python3 support in rnaSPAdes.</p>
+
+<p>FIX: K value estimation for long reads.</p>
+
+<p>FIX: Processing long reads alignments.</p>
+
+<h3>SPAdes 3.11.0, 1 September 2017</h3>
+
+<p>NEW: Support for strand-specific RNA-Seq data in rnaSPAdes.</p>
+
+<p>NEW: Coverage based isoform detection in rnaSPAdes.</p>
+
+<p>NEW: Reworked IonHammer read error correction module.</p>
+
+<p>CHANGE: Improved tandem repeat resolution accuracy.</p>
+
+<p>CHANGE: Better performance of exSPAnder module.</p>
+
+<p>CHANGE: metaSPAdes pipeline improvements.</p>
+
+<p>CHANGE: Better running time and RAM consumption for the entire pipeline.</p>
+
+<p>FIX: Incomplete paths in GFA output.</p>
+
+<p>FIX: Mismatch and indel rate in careful mode for isolate datasets (esp. low covered ones).</p>
+
+<p>FIX: Occasional hanging of edge disconnection procedure in metaSPAdes.</p>
+
 <h3>SPAdes 3.10.1, 1 March 2017</h3>
 
 <p>FIX: Build for MacOS.</p>
@@ -29,7 +61,6 @@
 
 <p>CHANGE: Improved isoform detection algorithm in rnaSPAdes.</p>
 
-
 <h3>SPAdes 3.9.1, 4 December 2016</h3>
 
 <p>FIX: macOS Sierra crash.</p>
diff --git a/configs/corrector/corrector.info b/configs/corrector/corrector.info
index 22740a3..d0373f1 100644
--- a/configs/corrector/corrector.info
+++ b/configs/corrector/corrector.info
@@ -3,5 +3,6 @@ dataset: ./configs/debruijn/datasets/ECOLI_IS220_QUAKE.yaml,
 work_dir: ./test_dataset/input/corrected/tmp, 
 output_dir: ./test_dataset/input/corrected,
 max_nthreads: 16,
-strategy: mapped_squared
+strategy: mapped_squared,
+log_filename: log.properties
 }
diff --git a/configs/corrector/log.properties b/configs/corrector/log.properties
new file mode 100644
index 0000000..4700217
--- /dev/null
+++ b/configs/corrector/log.properties
@@ -0,0 +1,2 @@
+default=INFO
+ContigProcessor=INFO
diff --git a/configs/debruijn/careful_mode.info b/configs/debruijn/careful_mda_mode.info
similarity index 64%
copy from configs/debruijn/careful_mode.info
copy to configs/debruijn/careful_mda_mode.info
index 5cbb786..856d1d3 100644
--- a/configs/debruijn/careful_mode.info
+++ b/configs/debruijn/careful_mda_mode.info
@@ -1,12 +1,9 @@
-
 simp
 {
     ; bulge remover:
     br
     {
-        max_coverage            1000000.0
-        max_relative_coverage       1.5     ; bulge_cov < this * not_bulge_cov
-        parallel false
+        max_relative_coverage		1.1	; bulge_cov < this * not_bulge_cov
     }
     
     ; complex bulge remover
@@ -15,8 +12,13 @@ simp
         enabled false
     }
 
-    ; relative coverage erroneous component remover:
-    rcc
+    final_tc
+    {
+        condition               ""
+    }
+
+    ; bulge remover:
+    final_br
     {
         enabled false
     }
diff --git a/configs/debruijn/careful_mode.info b/configs/debruijn/careful_mode.info
index 5cbb786..c094f0e 100644
--- a/configs/debruijn/careful_mode.info
+++ b/configs/debruijn/careful_mode.info
@@ -1,12 +1,10 @@
-
 simp
 {
     ; bulge remover:
     br
     {
-        max_coverage            1000000.0
-        max_relative_coverage       1.5     ; bulge_cov < this * not_bulge_cov
-        parallel false
+        max_relative_coverage       0.5     ; bulge_cov < this * not_bulge_cov
+        ; parallel false
     }
     
     ; complex bulge remover
@@ -15,6 +13,12 @@ simp
         enabled false
     }
 
+    ; bulge remover:
+    final_br
+    {
+        enabled false
+    }
+
     ; relative coverage erroneous component remover:
     rcc
     {
diff --git a/configs/debruijn/config.info b/configs/debruijn/config.info
index df5179a..0688ecc 100644
--- a/configs/debruijn/config.info
+++ b/configs/debruijn/config.info
@@ -14,10 +14,10 @@ mode base
 ;FIXME remove!
 run_mode false
 project_name    TOY_DATASET
-dataset         ./configs/debruijn/datasets_archive/toy.info
+dataset         ./configs/debruijn/toy.info
 log_filename    log.properties
 
-output_base	      ./data/debruijn/
+output_base	      ./spades_output
 tmp_dir	              spades_tmp/
 
 main_iteration  true
@@ -30,11 +30,11 @@ load_from         latest/saves/ ; tmp or latest
 temp_bin_reads_dir	.bin_reads/
 max_threads		8
 max_memory      120; in Gigabytes
-buffer_size	      	512; in Megabytes
+buffer_size     512; in Megabytes
 
 entry_point construction
 ;entry_point simplification
-;entry_point pacbio_aligning
+;entry_point hybrid_aligning
 ;entry_point late_pair_info_count
 ;entry_point distance_estimation
 ;entry_point repeat_resolving
@@ -69,9 +69,6 @@ compute_paths_number false
 
 ; End of developer_mode parameters
 
-; use unipaths as additional contigs instead of just graph edges
-use_unipaths false 
-
 ;if true simple mismatches are corrected
 correct_mismatches          true
 
@@ -95,6 +92,12 @@ use_scaffolder  true
 
 avoid_rc_connections true
 
+calculate_coverage_for_each_lib false
+strand_specificity {
+    ss_enabled false
+    antisense false
+}
+
 contig_output {
     contigs_name    final_contigs
     scaffolds_name  scaffolds
@@ -133,6 +136,13 @@ kmer_coverage_model {
     coverage_threshold 10.0
 }
 
+; low covered edges remover
+lcer
+{
+    lcer_enabled                     false
+    lcer_coverage_threshold          0.0
+}
+
 pacbio_processor
 {
 ;align and traverse.
@@ -172,3 +182,4 @@ bwa_aligner
 ;flanking coverage range
 flanking_range 55
 series_analysis ""
+save_gp false
diff --git a/configs/debruijn/log.properties b/configs/debruijn/log.properties
index b19eafe..8e792f9 100644
--- a/configs/debruijn/log.properties
+++ b/configs/debruijn/log.properties
@@ -50,6 +50,8 @@ default=INFO
 #LoopDetectingPathExtender=DEBUG
 #SimpleExtender=DEBUG
 #ScaffoldingPathExtender=DEBUG
+#CoordCoverageExtensionChooser=DEBUG
+#SimpleCoverageExtensionChooser=DEBUG
 
 #BWAPairInfo=TRACE
 #LongReadMapper=TRACE
diff --git a/configs/debruijn/mda_mode.info b/configs/debruijn/mda_mode.info
index 11c9815..9af6996 100644
--- a/configs/debruijn/mda_mode.info
+++ b/configs/debruijn/mda_mode.info
@@ -39,7 +39,7 @@ simp
     rcc
     {
         enabled true
-        coverage_gap    20.
+        coverage_gap    10.
         max_length_coeff    2.0
         max_length_with_tips_coeff   3.0
         max_vertex_cnt      30
diff --git a/configs/debruijn/meta_mode.info b/configs/debruijn/meta_mode.info
index 69c7bdc..23022de 100644
--- a/configs/debruijn/meta_mode.info
+++ b/configs/debruijn/meta_mode.info
@@ -32,7 +32,7 @@ simp
     {
         ; rctc: tip_cov < rctc * not_tip_cov
         ; tc_lb: max_tip_length = max((min(k, read_length / 2) * tc_lb), read_length);
-        condition               "{ tc_lb 3.5, , cb 1000000, rctc 2.0 } { tc_lb 6., cb 2.5, rctc 1.0 }"
+        condition               "{ tc_lb 3.5, cb 1000000, rctc 2.0 } { tc_lb 6., cb 2.5, rctc 1.0 }"
     }
 
     ; relative coverage erroneous component remover:
@@ -57,16 +57,17 @@ simp
     relative_ed
     {
         enabled true
-        diff_mult  10.
+        diff_mult  5.
     }
 
 	; bulge remover:
 	br
 	{
 		max_coverage			1000000.0
-		max_relative_coverage		100000.		; bulge_cov < this * not_bulge_cov
+		max_relative_coverage		5.		; bulge_cov < this * not_bulge_cov
 		max_delta			10
 		max_relative_delta		0.1
+        dijkstra_vertex_limit   3000
         parallel true
 	}
 
@@ -90,14 +91,6 @@ simp
         max_relative_delta		0.1
     }
 
-    ; second final bulge remover:
-    ; only in meta mode, inherits settings of final_br
-    second_final_br
-    {
-        max_delta           1500
-        max_number_edges        3
-    }
-
     ; hidden ec remover
     her
     {
@@ -169,8 +162,10 @@ long_reads {
 }
 
 params {
-    remove_overlaps     true
-    cut_all_overlaps  true
+    overlap_removal {
+        enabled true
+        cut_all true
+    }
 
     scaffolding_mode old_pe_2015
 
@@ -187,6 +182,12 @@ params {
     }
     
     use_coordinated_coverage true
+
+    coordinated_coverage
+    {
+       min_path_len           10000
+    }
+
 }
 
 }
@@ -195,6 +196,10 @@ prelim_pe {
 params {
     scaffolding_mode old
 
+    overlap_removal {
+        enabled false
+    }
+
     use_coordinated_coverage false
     remove_overlaps     false
     scaffolding2015 {
diff --git a/configs/debruijn/moleculo_mode.info b/configs/debruijn/moleculo_mode.info
index a3ad118..94cb827 100644
--- a/configs/debruijn/moleculo_mode.info
+++ b/configs/debruijn/moleculo_mode.info
@@ -101,7 +101,11 @@ simp
 pe {
 params {
     normalize_weight        true
-    cut_all_overlaps  true
+
+    overlap_removal {
+        enabled true
+        cut_all true
+    }
 
     scaffolding_mode old
 
diff --git a/configs/debruijn/pe_params.info b/configs/debruijn/pe_params.info
index 0d7a172..fcf5bf3 100644
--- a/configs/debruijn/pe_params.info
+++ b/configs/debruijn/pe_params.info
@@ -18,9 +18,12 @@ params {
     multi_path_extend   false
     ; old | 2015 | combined | old_pe_2015
     scaffolding_mode old_pe_2015
-
-    remove_overlaps     true
-    cut_all_overlaps  false
+    
+    overlap_removal {
+        enabled true
+        end_start_only  false
+        cut_all false
+    }
 
     split_edge_length    99
     normalize_weight     true
@@ -61,25 +64,20 @@ params {
         ;next param should be 0.51 - 1.0 if use_old_score = true and 3.0 otherwise
         min_gap_score   0.7
 
-        max_must_overlap  -2
-        max_can_overlap   0.5
+        max_can_overlap   1.
         short_overlap     6
         artificial_gap    10
-        use_old_score   true
 
         min_overlap_length 10
-        flank_addition_coefficient -5.9
-        flank_multiplication_coefficient 0.97
+        flank_multiplication_coefficient .5
+        flank_addition_coefficient 5
 
         var_coeff 3.0
         basic_overlap_coeff 2.0
     }
 
-    path_cleaning
-    {
-        enabled false
-    }
-    
+    path_cleaning_presets ""
+
     use_coordinated_coverage false
     coordinated_coverage
     {
@@ -88,6 +86,14 @@ params {
        min_path_len           1000
     }
 
+
+    simple_coverage_resolver {
+        enabled false
+        coverage_delta 0.5
+        min_upper_coverage 5
+    }
+
+
     scaffolding2015 {
         ; (median * (1+variation) > unique > median * (1 - variation))
         relative_weight_cutoff 2.0
@@ -112,6 +118,9 @@ params {
     genome_consistency_checker {
         max_gap 1000
         relative_max_gap 0.2
+        use_main_storage true ; if set to true, next two parameters are set to min_unique_length
+        unresolvable_jump 1000 ; length of unresolvable repeats
+        unique_length 500  ; spelling genome in the alphabet of edges longer than this          
     }
 
     uniqueness_analyser {
diff --git a/configs/debruijn/rna_fast_mode.info b/configs/debruijn/rna_fast_mode.info
new file mode 100644
index 0000000..ea66c4a
--- /dev/null
+++ b/configs/debruijn/rna_fast_mode.info
@@ -0,0 +1,12 @@
+simp
+{
+    ier
+    {
+        enabled                       true
+        use_rl_for_max_length         true ; max_length will be taken max with read_length
+        use_rl_for_max_length_any_cov false ; use_rl_for_max_length_any_cov will be taken max with read_length
+        max_length                    110
+        max_coverage                  2
+        max_length_any_cov            0
+    }
+}
diff --git a/configs/debruijn/rna_mode.info b/configs/debruijn/rna_mode.info
index aae3d6f..3db104e 100644
--- a/configs/debruijn/rna_mode.info
+++ b/configs/debruijn/rna_mode.info
@@ -2,6 +2,13 @@ mode rna
 
 preserve_raw_paired_index true
 
+calculate_coverage_for_each_lib true
+strand_specificity {
+    ss_enabled false
+    antisense false
+}
+
+
 contig_output {
     scaffolds_name  transcripts
     ; none  --- do not output broken scaffolds | break_gaps --- break only by N steches | break_all --- break all with overlap < k
@@ -62,14 +69,13 @@ simp
         max_ec_length_coefficient   30
         max_coverage_coeff  5.0
     }
+
     ;all topology based erroneous connection removers are off
     ier
     {
         enabled                     false
-        max_length                  100
-        max_coverage                2
-        max_length_any_cov          0 ; will be taken max with read_length
     }
+
     ; hidden ec remover
     her
     {
@@ -100,12 +106,19 @@ de
 }
 
 pe {
+debug_output    true
+
 params {
     multi_path_extend   true
-    remove_overlaps     false
 
     scaffolding_mode old
 
+    overlap_removal {
+        enabled true
+        end_start_only  true
+        cut_all true
+    }
+
     extension_options
     {
         use_default_single_threshold true
@@ -115,20 +128,65 @@ params {
     scaffolder {
         cutoff        1
         hard_cutoff   10
-
-	cluster_info false
-
-	min_overlap_for_rna_scaffolding 10
+        cluster_info false
+        min_overlap_for_rna_scaffolding 10
     }
 
+    path_cleaning_presets "default soft hard"
+    ; All length cutoffs presented in nucleotides
+    ; So edges less than or equal to (relative cutoff * RL - K) or (absolute cutoff - K) will be deleted
     path_cleaning
     {
         enabled true
-        min_length  30
-        isolated_min_length 50
-        min_length_for_low_covered 150
+        min_length  110
+        isolated_min_length 130
+        isolated_min_cov 4
+        min_length_for_low_covered 140
+        rel_cutoff 1.3
+        rel_isolated_cutoff 1.5
+        rel_low_covered_cutoff 1.6
         min_coverage 2
     }
 
+    ; All length cutoffs presented in nucleotides
+    hard_path_cleaning
+    {
+        enabled true
+        min_length  130
+        isolated_min_length 180
+        isolated_min_cov 8
+        min_length_for_low_covered 180
+        rel_cutoff 1.5
+        rel_isolated_cutoff 2.0
+        rel_low_covered_cutoff 2.0
+        min_coverage 3
+    }
+
+    ; All length cutoffs presented in nucleotides
+    soft_path_cleaning
+    {
+        enabled true
+        min_length  85
+        isolated_min_length 100
+        isolated_min_cov 2
+        min_length_for_low_covered 130
+        rel_cutoff 1.05
+        rel_isolated_cutoff 1.2
+        rel_low_covered_cutoff 1.5
+        min_coverage 1
+    }
+
+    use_coordinated_coverage false
+    coordinated_coverage {
+       max_edge_length_repeat 1000
+       delta                  0.5
+       min_path_len           300
+    }
+
+    simple_coverage_resolver {
+        enabled true
+        coverage_delta 0.5
+        min_upper_coverage 2
+    }
 }
 }
diff --git a/configs/debruijn/simplification.info b/configs/debruijn/simplification.info
index 3ee8e02..7580b9b 100644
--- a/configs/debruijn/simplification.info
+++ b/configs/debruijn/simplification.info
@@ -20,23 +20,24 @@ simp
         condition               "{ tc_lb 3.5, cb 1000000, rctc 2.0 } { tc_lb 10., cb auto }"
     }
   
-	; bulge remover:
-	br
-	{
-		enabled				true
+    ; bulge remover:
+    br
+    {
+        enabled				true
         main_iteration_only false
-		max_bulge_length_coefficient	3.	; max_bulge_length = max_bulge_length_coefficient * k
-       	max_additive_length_coefficient 100
-		max_coverage			1000.0
-		max_relative_coverage		1.1	; bulge_cov < this * not_bulge_cov
-		max_delta			3
-		max_relative_delta		0.1
+        max_bulge_length_coefficient	3.	; max_bulge_length = max_bulge_length_coefficient * k
+        max_additive_length_coefficient 100
+        max_coverage			1000.0
+        max_relative_coverage		1.1	; bulge_cov < this * not_bulge_cov
+        max_delta			3
+        max_relative_delta		0.1
         max_number_edges        1000
+        dijkstra_vertex_limit   3000
         parallel true
         buff_size 10000
         buff_cov_diff 2.
         buff_cov_rel_diff 0.2
-	}
+    }
 	
 	; erroneous connections remover:
 	ec
@@ -94,6 +95,7 @@ simp
         max_delta			3
         max_relative_delta		0.1
         max_number_edges        1000
+        dijkstra_vertex_limit   3000
         parallel true
         buff_size 10000
         buff_cov_diff 2.
@@ -143,9 +145,11 @@ simp
     ier
     {
         enabled                     true
-        max_length                  0
+        use_rl_for_max_length         false ; max_length will be taken max with read_length
+        use_rl_for_max_length_any_cov true ; use_rl_for_max_length_any_cov will be taken max with read_length
+        max_length                  0 ; will be taken max with read_length if option above is set
         max_coverage                2
-        max_length_any_cov          150 ; will be taken max with read_length 
+        max_length_any_cov          150 ; will be taken max with read_length if option above is set
     }
     
     ; topology tip clipper:
@@ -193,9 +197,11 @@ simp
         ier
         {
             enabled                     true
-            max_length                  0
+            use_rl_for_max_length         false ; max_length will be taken max with read_length
+            use_rl_for_max_length_any_cov true ; use_rl_for_max_length_any_cov will be taken max with read_length
+            max_length                  0 ; will be taken max with read_length if option above is set
             max_coverage                0
-            max_length_any_cov          0 ; will be taken max with read_length 
+            max_length_any_cov          0 ; will be taken max with read_length if option above is set
         }
 
         tip_condition   "{ tc_lb 3.5, cb auto }"
diff --git a/configs/debruijn/toy.info b/configs/debruijn/toy.info
new file mode 100644
index 0000000..7cf3669
--- /dev/null
+++ b/configs/debruijn/toy.info
@@ -0,0 +1,4 @@
+reads	toy.yaml
+single_cell	false
+; RL	100
+
diff --git a/configs/debruijn/toy.yaml b/configs/debruijn/toy.yaml
new file mode 100644
index 0000000..19958c6
--- /dev/null
+++ b/configs/debruijn/toy.yaml
@@ -0,0 +1,4 @@
+- left reads: [../../test_dataset/ecoli_1K_1.fq.gz]
+  orientation: fr
+  right reads: [../../test_dataset/ecoli_1K_2.fq.gz]
+  type: paired-end
diff --git a/ext/include/ConsensusCore/Feature.hpp b/ext/include/ConsensusCore/Feature.hpp
deleted file mode 100644
index da3f707..0000000
--- a/ext/include/ConsensusCore/Feature.hpp
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: David Alexander
-
-#pragma once
-
-#include <algorithm>
-#include <boost/range.hpp>
-#include <boost/shared_array.hpp>
-#include <boost/utility.hpp>
-#include <cassert>
-#include <string>
-#include <vector>
-
-#include "Types.hpp"
-
-namespace ConsensusCore
-{
-    // Feature/Features object usage caveats:
-    //  - Feature and Features objects _must_ be stored by value, not reference
-    //  - The underlying array must be allocated using new[]
-    template <typename T>
-    class Feature : private boost::shared_array<T>
-    {
-    public:
-        // \brief Allocate a new feature object, copying content from ptr.
-        Feature(const T* ptr, int length)
-            : boost::shared_array<T>(new T[length]),
-              length_(length)
-        {
-            assert(length >= 0);
-            std::copy(ptr, ptr + length, get());
-        }
-
-        // \brief Allocate and zero-fill a new feature object of given length.
-        explicit Feature(int length)
-            : boost::shared_array<T>(new T[length]()),
-              length_(length)
-        {
-            assert(length >= 0);
-        }
-
-        int Length() const
-        {
-            return length_;
-        }
-
-        const T& operator[](int i) const
-        {
-            return this->boost::shared_array<T>::operator[](i);
-        }
-
-        T& operator[](int i)
-        {
-            return this->boost::shared_array<T>::operator[](i);
-        }
-
-        T ElementAt(int i) const
-        {
-            return (*this)[i];
-        }
-
-    private:
-        int length_;
-
-#ifndef SWIG
-    public:
-        T* get()
-        {
-            return this->boost::shared_array<T>::get();
-        }
-
-        const T* get() const
-        {
-            return this->boost::shared_array<T>::get();
-        }
-
-        operator std::string() const;
-#endif  // !SWIG
-    };
-
-
-#ifndef SWIG
-    //
-    // Support for boost::foreach
-    //
-    template<typename T>
-    inline const T* range_begin(const Feature<T>& f)
-    {
-        return f.get();
-    }
-
-    template<typename T>
-    inline const T* range_end(const Feature<T>& f)
-    {
-        return f.get() + f.Length();
-    }
-
-    template<typename T>
-    inline T* range_begin(Feature<T>& f) // NOLINT
-    {
-        return f.get();
-    }
-
-    template<typename T>
-    inline T* range_end(Feature<T>& f)  // NOLINT
-    {
-        return f.get() + f.Length();
-    }
-#endif  // !SWIG
-
-    typedef Feature<float> FloatFeature;
-    typedef Feature<char> CharFeature;
-    typedef Feature<int> IntFeature;
-}
-
-
-#ifndef SWIG
-namespace boost
-{
-    template<typename T>
-    struct range_const_iterator<ConsensusCore::Feature<T> >
-    {
-        typedef const T* type;
-    };
-
-    template<typename T>
-    struct range_mutable_iterator<ConsensusCore::Feature<T> >
-    {
-        typedef T* type;
-    };
-}
-#endif  // !SWIG
diff --git a/ext/include/ConsensusCore/Features.hpp b/ext/include/ConsensusCore/Features.hpp
deleted file mode 100644
index 58f7a5a..0000000
--- a/ext/include/ConsensusCore/Features.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: David Alexander
-
-#pragma once
-
-#include <boost/range.hpp>
-#include <boost/shared_array.hpp>
-#include <boost/utility.hpp>
-#include <string>
-#include <vector>
-
-#include "Feature.hpp"
-#include "Types.hpp"
-
-namespace ConsensusCore
-{
-    /// \brief An object containing observed features from a sequencing run.
-    struct SequenceFeatures
-    {
-    public:
-        explicit SequenceFeatures(const std::string& seq);
-        int Length() const             { return sequence_.Length(); }
-        Feature<char> Sequence() const { return sequence_; }
-
-        /// Access to the sequence bases
-        const char& operator[] (int i) const { return sequence_[i]; }
-        char ElementAt(int i) const          { return (*this)[i]; }
-
-
-    private:
-        Feature<char> sequence_;
-    };
-}
-
diff --git a/ext/include/ConsensusCore/Matrix/SparseMatrix-inl.hpp b/ext/include/ConsensusCore/Matrix/SparseMatrix-inl.hpp
index 6e286ac..3e7f189 100644
--- a/ext/include/ConsensusCore/Matrix/SparseMatrix-inl.hpp
+++ b/ext/include/ConsensusCore/Matrix/SparseMatrix-inl.hpp
@@ -38,7 +38,6 @@
 #pragma once
 
 #include <algorithm>
-#include <boost/tuple/tuple.hpp>
 #include <cassert>
 #include <utility>
 
diff --git a/ext/include/ConsensusCore/Poa/PoaGraph.hpp b/ext/include/ConsensusCore/Poa/PoaGraph.hpp
index a0168de..ed79034 100644
--- a/ext/include/ConsensusCore/Poa/PoaGraph.hpp
+++ b/ext/include/ConsensusCore/Poa/PoaGraph.hpp
@@ -37,10 +37,10 @@
 
 #pragma once
 
-#include <boost/tuple/tuple.hpp>
 #include <vector>
 #include <string>
 #include <utility>
+#include <tuple>
 
 #include "Types.hpp"
 #include "Mutation.hpp"
@@ -65,7 +65,7 @@ namespace ConsensusCore
         // TODO(dalexander): move this method to PoaConsensus so we don't have to use a tuple
         // interface here (which was done to avoid a circular dep on PoaConsensus).
 #ifndef SWIG
-        boost::tuple<std::string, float, std::vector< std::pair<Mutation*, float> >* >
+        std::tuple<std::string, float, std::vector< std::pair<Mutation*, float> >* >
         FindConsensus(const PoaConfig& config) const;
 #endif  // !SWIG
 
diff --git a/ext/include/ConsensusCore/Version.hpp b/ext/include/ConsensusCore/Version.hpp
index c550d30..06e3fae 100644
--- a/ext/include/ConsensusCore/Version.hpp
+++ b/ext/include/ConsensusCore/Version.hpp
@@ -37,7 +37,6 @@
 
 #include <string>
 #include <vector>
-#include <boost/tuple/tuple.hpp>
 
 #pragma once
 
diff --git a/ext/include/boomphf/BooPHF.h b/ext/include/boomphf/BooPHF.h
new file mode 100644
index 0000000..de84687
--- /dev/null
+++ b/ext/include/boomphf/BooPHF.h
@@ -0,0 +1,1422 @@
+// BooPHF library
+// intended to be a minimal perfect hash function with fast and low memory
+// construction, at the cost of (slightly) higher bits/elem than other state of
+// the art libraries once built.  should work with arbitray large number of
+// elements, based on a cascade of "collision-free" bit arrays
+
+#pragma once
+#include <stdio.h>
+#include <climits>
+#include <stdlib.h>
+#include <iostream>
+#include <math.h>
+
+#include <array>
+#include <unordered_map>
+#include <vector>
+#include <assert.h>
+#include <sys/time.h>
+#include <string.h>
+#include <memory> // for make_shared
+#include <unistd.h>
+
+//#define CCDEBUG
+//#define PDEBUG
+
+namespace boomphf {
+
+inline uint64_t printPt( pthread_t pt) {
+    unsigned char *ptc = (unsigned char*)(void*)(&pt);
+    uint64_t res =0;
+    for (size_t i=0; i<sizeof(pt); i++) {
+        res+= (unsigned)(ptc[i]);
+    }
+    return res;
+}
+
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark utils
+////////////////////////////////////////////////////////////////
+
+
+// iterator from disk file of uint64_t with buffered read,   todo template
+template <typename basetype>
+class bfile_iterator : public std::iterator<std::forward_iterator_tag, basetype>{
+  public:
+
+    bfile_iterator()
+            : _is(nullptr)
+            , _pos(0) ,_inbuff (0), _cptread(0)
+    {
+        _buffsize = 10000;
+        _buffer = (basetype *) malloc(_buffsize*sizeof(basetype));
+    }
+
+    bfile_iterator(const bfile_iterator& cr)
+    {
+        _buffsize = cr._buffsize;
+        _pos = cr._pos;
+        _is = cr._is;
+        _buffer = (basetype *) malloc(_buffsize*sizeof(basetype));
+        memcpy(_buffer,cr._buffer,_buffsize*sizeof(basetype) );
+        _inbuff = cr._inbuff;
+        _cptread = cr._cptread;
+        _elem = cr._elem;
+    }
+
+    bfile_iterator(FILE* is): _is(is) , _pos(0) ,_inbuff (0), _cptread(0)
+    {
+        //printf("bf it %p\n",_is);
+        _buffsize = 10000;
+        _buffer = (basetype *) malloc(_buffsize*sizeof(basetype));
+        int reso = fseek(_is,0,SEEK_SET);
+        advance();
+    }
+
+    ~bfile_iterator()
+    {
+        if(_buffer!=NULL)
+            free(_buffer);
+    }
+
+
+    basetype const& operator*()  {  return _elem;  }
+
+    bfile_iterator& operator++()
+    {
+        advance();
+        return *this;
+    }
+
+    friend bool operator==(bfile_iterator const& lhs, bfile_iterator const& rhs)
+    {
+        if (!lhs._is || !rhs._is)  {  if (!lhs._is && !rhs._is) {  return true; } else {  return false;  } }
+        assert(lhs._is == rhs._is);
+        return rhs._pos == lhs._pos;
+    }
+
+    friend bool operator!=(bfile_iterator const& lhs, bfile_iterator const& rhs)  {  return !(lhs == rhs);  }
+
+  private:
+    void advance()
+    {
+
+        //printf("_cptread %i _inbuff %i \n",_cptread,_inbuff);
+
+        _pos++;
+
+        if(_cptread >= _inbuff)
+        {
+
+            int res = fread(_buffer,sizeof(basetype),_buffsize,_is);
+
+            //printf("read %i new elem last %llu  %p\n",res,_buffer[res-1],_is);
+            _inbuff = res; _cptread = 0;
+
+            if(res == 0)
+            {
+                _is = nullptr;
+                _pos = 0;
+                return;
+            }
+        }
+
+        _elem = _buffer[_cptread];
+        _cptread ++;
+    }
+    basetype _elem;
+    FILE * _is;
+    unsigned long _pos;
+
+    basetype * _buffer; // for buffered read
+    int _inbuff, _cptread;
+    int _buffsize;
+};
+
+
+template <typename type_elem>
+class file_binary{
+  public:
+
+    file_binary(const char* filename)
+    {
+        _is = fopen(filename, "rb");
+
+        if (!_is) {
+            throw std::invalid_argument("Error opening " + std::string(filename));
+        }
+    }
+
+    ~file_binary()
+    {
+        fclose(_is);
+    }
+
+    bfile_iterator<type_elem> begin() const
+    {
+        return bfile_iterator<type_elem>(_is);
+    }
+
+    bfile_iterator<type_elem> end() const {return bfile_iterator<type_elem>(); }
+
+    size_t        size () const  {  return 0;  }//todo ?
+
+  private:
+    FILE * _is;
+};
+
+
+#define L8 0x0101010101010101ULL // Every lowest 8th bit set: 00000001...
+#define G2 0xAAAAAAAAAAAAAAAAULL // Every highest 2nd bit: 101010...
+#define G4 0x3333333333333333ULL // 00110011 ... used to group the sum of 4 bits.
+#define G8 0x0F0F0F0F0F0F0F0FULL
+
+static inline unsigned popcount_64(uint64_t x) {
+    // Step 1:  00 - 00 = 0;  01 - 00 = 01; 10 - 01 = 01; 11 - 01 = 10;
+    x = x - ((x & G2) >> 1);
+    // step 2:  add 2 groups of 2.
+    x = (x & G4) + ((x >> 2) & G4);
+    // 2 groups of 4.
+    x = (x + (x >> 4)) & G8;
+    // Using a multiply to collect the 8 groups of 8 together.
+    x = x * L8 >> 56;
+    return x;
+}
+
+///// progress bar
+class Progress
+{
+  public:
+    int timer_mode;
+    struct timeval timestamp;
+    double heure_debut, heure_actuelle ;
+    std::string   message;
+
+    uint64_t done;
+    uint64_t todo;
+    int subdiv ; // progress printed every 1/subdiv of total to do
+    double partial;
+    int _nthreads;
+    std::vector<double > partial_threaded;
+    std::vector<uint64_t > done_threaded;
+
+    double steps ; //steps = todo/subidv
+
+    void init(uint64_t ntasks, const char * msg,int nthreads =1)
+    {
+        _nthreads = nthreads;
+        message = std::string(msg);
+        gettimeofday(&timestamp, NULL);
+        heure_debut = timestamp.tv_sec +(timestamp.tv_usec/1000000.0);
+
+        //fprintf(stderr,"| %-*s |\n",98,msg);
+
+        todo= ntasks;
+        done = 0;
+        partial =0;
+
+        partial_threaded.resize(_nthreads);
+        done_threaded.resize(_nthreads);
+
+        for (int ii=0; ii<_nthreads;ii++) partial_threaded[ii]=0;
+        for (int ii=0; ii<_nthreads;ii++) done_threaded[ii]=0;
+        subdiv= 1000;
+        steps = (double)todo / (double)subdiv;
+
+        if(!timer_mode)
+        {
+            fprintf(stderr,"[");fflush(stderr);
+        }
+    }
+
+    void finish()
+    {
+        set(todo);
+        if(timer_mode)
+            fprintf(stderr,"\n");
+        else
+            fprintf(stderr,"]\n");
+
+        fflush(stderr);
+        todo= 0;
+        done = 0;
+        partial =0;
+
+    }
+    void finish_threaded()// called by only one of the threads
+    {
+        done = 0;
+        double rem = 0;
+        for (int ii=0; ii<_nthreads;ii++) done += (done_threaded[ii] );
+        for (int ii=0; ii<_nthreads;ii++) partial += (partial_threaded[ii] );
+
+        finish();
+
+    }
+    void inc(uint64_t ntasks_done)
+    {
+        done += ntasks_done;
+        partial += ntasks_done;
+
+
+        while(partial >= steps)
+        {
+            if(timer_mode)
+            {
+                gettimeofday(&timestamp, NULL);
+                heure_actuelle = timestamp.tv_sec +(timestamp.tv_usec/1000000.0);
+                double elapsed = heure_actuelle - heure_debut;
+                double speed = done / elapsed;
+                double rem = (todo-done) / speed;
+                if(done>todo) rem=0;
+                int min_e  = (int)(elapsed / 60) ;
+                elapsed -= min_e*60;
+                int min_r  = (int)(rem / 60) ;
+                rem -= min_r*60;
+
+                fprintf(stderr,"%c[%s]  %-5.3g%%   elapsed: %3i min %-2.0f sec   remaining: %3i min %-2.0f sec",13,
+                        message.c_str(),
+                        100*(double)done/todo,
+                        min_e,elapsed,min_r,rem);
+
+            }
+            else
+            {
+                fprintf(stderr,"-");fflush(stderr);
+            }
+            partial -= steps;
+        }
+
+
+    }
+
+    void inc(uint64_t ntasks_done, int tid) //threads collaborate to this same progress bar
+    {
+        partial_threaded[tid] += ntasks_done;
+        done_threaded[tid] += ntasks_done;
+        while(partial_threaded[tid] >= steps)
+        {
+            if(timer_mode)
+            {
+                struct timeval timet;
+                double now;
+                gettimeofday(&timet, NULL);
+                now = timet.tv_sec +(timet.tv_usec/1000000.0);
+                uint64_t total_done  = 0;
+                for (int ii=0; ii<_nthreads;ii++) total_done += (done_threaded[ii] );
+                double elapsed = now - heure_debut;
+                double speed = total_done / elapsed;
+                double rem = (todo-total_done) / speed;
+                if(total_done > todo) rem =0;
+                int min_e  =  (int)(elapsed / 60) ;
+                elapsed -= min_e*60;
+                int min_r  =  (int)(rem / 60) ;
+                rem -= min_r*60;
+
+                fprintf(stderr,"%c[%s]  %-5.3g%%   elapsed: %3i min %-2.0f sec   remaining: %3i min %-2.0f sec",13,
+                        message.c_str(),
+                        100*(double)total_done/todo,
+                        min_e,elapsed,min_r,rem);
+            }
+            else
+            {
+                fprintf(stderr,"-");fflush(stderr);
+            }
+            partial_threaded[tid] -= steps;
+
+        }
+
+    }
+
+    void set(uint64_t ntasks_done) {
+        if (ntasks_done > done)
+            inc(ntasks_done-done);
+    }
+    Progress () :     timer_mode(0) {}
+    //include timer, to print ETA ?
+};
+
+
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark hasher
+////////////////////////////////////////////////////////////////
+
+typedef std::array<uint64_t,2> hash_pair_t;
+
+typedef hash_pair_t internal_hash_t; // ou hash_pair_t directement ?  __uint128_t
+
+typedef decltype(std::declval<file_binary<internal_hash_t> >().begin()) diskit_hash128_t;
+typedef decltype(std::declval< std::vector< internal_hash_t> >().begin()) vectorit_hash128_t;
+
+struct internalHasher {
+    uint64_t operator()(const internal_hash_t& key) const {
+        uint64_t s0 = key[0];
+        uint64_t s1 = key[1];
+        s1 ^= s1 << 23;
+        return  (s1 ^ s0 ^ (s1 >> 17) ^ (s0 >> 26)) + s0;
+    }
+};
+
+template<class SingleHasher_t> class XorshiftHashFunctors {
+    /*  Xorshift128*
+        Written in 2014 by Sebastiano Vigna (vigna at acm.org)
+
+        To the extent possible under law, the author has dedicated all copyright
+        and related and neighboring rights to this software to the public domain
+        worldwide. This software is distributed without any warranty.
+
+        See <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+        This is the fastest generator passing BigCrush without
+        systematic failures, but due to the relatively short period it is
+        acceptable only for applications with a mild amount of parallelism;
+        otherwise, use a xorshift1024* generator.
+
+        The state must be seeded so that it is not everywhere zero. If you have
+        a nonzero 64-bit seed, we suggest to pass it twice through
+        MurmurHash3's avalanching function. */
+  public:
+    template<class Item>
+    hash_pair_t hashpair128(const Item& key) const {
+        auto h = singleHasher(key);
+        return { h.first, h.second };
+    }
+
+    //return next hash an update state s
+    uint64_t next(hash_pair_t &s) const {
+        uint64_t s1 = s[0];
+        const uint64_t s0 = s[1];
+        s[0] = s0;
+        s1 ^= s1 << 23; // a
+        return (s[1] = (s1 ^ s0 ^ (s1 >> 17) ^ (s0 >> 26))) + s0; // b, c
+    }
+
+  private:
+    SingleHasher_t singleHasher;
+};
+
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark iterators
+////////////////////////////////////////////////////////////////
+
+template <typename Iterator>
+struct iter_range {
+    iter_range(Iterator b, Iterator e)
+            : m_begin(b), m_end(e) {}
+
+    Iterator begin() const { return m_begin; }
+    Iterator end() const { return m_end; }
+
+    Iterator m_begin, m_end;
+};
+
+template <typename Iterator>
+iter_range<Iterator> range(Iterator begin, Iterator end) {
+    return iter_range<Iterator>(begin, end);
+}
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark BitVector
+////////////////////////////////////////////////////////////////
+
+class bitVector {
+
+  public:
+
+    bitVector() : _size(0)
+    {
+        _bitArray = nullptr;
+    }
+
+    bitVector(uint64_t n) : _size(n)
+    {
+        _nchar  = (1ULL+n/64ULL);
+        _bitArray =  (uint64_t *) calloc (_nchar,sizeof(uint64_t));
+    }
+
+    ~bitVector()
+    {
+        if(_bitArray != nullptr)
+            free(_bitArray);
+    }
+
+    //copy constructor
+    bitVector(bitVector const &r)
+    {
+        _size =  r._size;
+        _nchar = r._nchar;
+        _ranks = r._ranks;
+        _bitArray = (uint64_t *) calloc (_nchar,sizeof(uint64_t));
+        memcpy(_bitArray, r._bitArray, _nchar*sizeof(uint64_t) );
+    }
+
+    // Copy assignment operator
+    bitVector &operator=(bitVector const &r)
+    {
+        if (&r != this)
+        {
+            _size =  r._size;
+            _nchar = r._nchar;
+            _ranks = r._ranks;
+            if(_bitArray != nullptr)
+                free(_bitArray);
+            _bitArray = (uint64_t *) calloc (_nchar,sizeof(uint64_t));
+            memcpy(_bitArray, r._bitArray, _nchar*sizeof(uint64_t) );
+        }
+        return *this;
+    }
+
+    // Move assignment operator
+    bitVector &operator=(bitVector &&r)
+    {
+        //printf("bitVector move assignment \n");
+        if (&r != this)
+        {
+            if(_bitArray != nullptr)
+                free(_bitArray);
+
+            _size =  std::move (r._size);
+            _nchar = std::move (r._nchar);
+            _ranks = std::move (r._ranks);
+            _bitArray = r._bitArray;
+            r._bitArray = nullptr;
+        }
+        return *this;
+    }
+    // Move constructor
+    bitVector(bitVector &&r) : _bitArray ( nullptr),_size(0)
+    {
+        *this = std::move(r);
+    }
+
+
+    void resize(uint64_t newsize)
+    {
+        //printf("bitvector resize from  %llu bits to %llu \n",_size,newsize);
+        _nchar  = (1ULL+newsize/64ULL);
+        _bitArray = (uint64_t *) realloc(_bitArray,_nchar*sizeof(uint64_t));
+        _size = newsize;
+    }
+
+    size_t size() const
+    {
+        return _size;
+    }
+
+    uint64_t bitSize() const {return (_nchar*64ULL + _ranks.capacity()*64ULL );}
+
+    //clear whole array
+    void clear()
+    {
+        memset(_bitArray,0,_nchar*sizeof(uint64_t));
+    }
+
+    //clear collisions in interval, only works with start and size multiple of 64
+    void clearCollisions(uint64_t start, size_t size, bitVector * cc)
+    {
+        assert( (start & 63) ==0);
+        assert( (size & 63) ==0);
+        uint64_t ids = (start/64ULL);
+        for(uint64_t ii =0;  ii< (size/64ULL); ii++ )
+        {
+            _bitArray[ids+ii] =  _bitArray[ids+ii] & (~ (cc->get64(ii)) );
+        }
+
+        cc->clear();
+    }
+
+
+    //clear interval, only works with start and size multiple of 64
+    void clear(uint64_t start, size_t size)
+    {
+        assert( (start & 63) ==0);
+        assert( (size & 63) ==0);
+        memset(_bitArray + (start/64ULL),0,(size/64ULL)*sizeof(uint64_t));
+    }
+
+    //for debug purposes
+    void print() const
+    {
+        printf("bit array of size %lli: \n",_size);
+        for(uint64_t ii = 0; ii< _size; ii++)
+        {
+            if(ii%10==0)
+                printf(" (%llu) ",ii);
+            int val = (_bitArray[ii >> 6] >> (ii & 63 ) ) & 1;
+            printf("%i",val);
+        }
+        printf("\n");
+
+        printf("rank array : size %lu \n",_ranks.size());
+        for (uint64_t ii = 0; ii< _ranks.size(); ii++)
+        {
+            printf("%llu :  %lli,  ",ii,_ranks[ii]);
+        }
+        printf("\n");
+    }
+
+    // return value at pos
+    uint64_t operator[](uint64_t pos) const {
+        //unsigned char * _bitArray8 = (unsigned char *) _bitArray;
+        //return (_bitArray8[pos >> 3ULL] >> (pos & 7 ) ) & 1;
+        return (_bitArray[pos >> 6ULL] >> (pos & 63)) & 1;
+    }
+
+    //atomically   return old val and set to 1
+    uint64_t atomic_test_and_set(uint64_t pos) {
+        uint64_t oldval =   __sync_fetch_and_or(_bitArray + (pos >> 6), (uint64_t) (1ULL << (pos & 63)) );
+        return (oldval >> (pos & 63)) & 1;
+    }
+
+
+    uint64_t get(uint64_t pos) const {
+        return (*this)[pos];
+    }
+
+    uint64_t get64(uint64_t cell64) const {
+        return _bitArray[cell64];
+    }
+
+    //set bit pos to 1
+    void set(uint64_t pos) {
+        assert(pos<_size);
+        //_bitArray [pos >> 6] |=   (1ULL << (pos & 63) ) ;
+        __sync_fetch_and_or (_bitArray + (pos >> 6ULL), (1ULL << (pos & 63)) );
+    }
+
+    //set bit pos to 0
+    void reset(uint64_t pos) {
+        //_bitArray [pos >> 6] &=   ~(1ULL << (pos & 63) ) ;
+        __sync_fetch_and_and (_bitArray + (pos >> 6ULL), ~(1ULL << (pos & 63) ));
+    }
+
+    // return value of last rank
+    // add offset to all ranks computed
+    uint64_t build_ranks(uint64_t offset = 0) {
+        _ranks.reserve(2 + _size/_nb_bits_per_rank_sample);
+
+        uint64_t curent_rank = offset;
+        for (size_t ii = 0; ii < _nchar; ii++) {
+            if (((ii*64)  % _nb_bits_per_rank_sample) == 0) {
+                _ranks.push_back(curent_rank);
+            }
+            curent_rank +=  popcount_64(_bitArray[ii]);
+        }
+
+        return curent_rank;
+    }
+
+    uint64_t rank(uint64_t pos) const {
+        uint64_t word_idx = pos / 64ULL;
+        uint64_t word_offset = pos % 64;
+        uint64_t block = pos / _nb_bits_per_rank_sample;
+        uint64_t r = _ranks[block];
+        for (uint64_t w = block * _nb_bits_per_rank_sample / 64; w < word_idx; ++w)
+            r += popcount_64(_bitArray[w]);
+        uint64_t mask = (uint64_t(1) << word_offset ) - 1;
+        r += popcount_64( _bitArray[word_idx] & mask);
+
+        return r;
+    }
+
+
+
+    void save(std::ostream& os) const {
+        os.write(reinterpret_cast<char const*>(&_size), sizeof(_size));
+        os.write(reinterpret_cast<char const*>(&_nchar), sizeof(_nchar));
+        os.write(reinterpret_cast<char const*>(_bitArray), (std::streamsize)(sizeof(uint64_t) * _nchar));
+        size_t sizer = _ranks.size();
+        os.write(reinterpret_cast<char const*>(&sizer),  sizeof(size_t));
+        os.write(reinterpret_cast<char const*>(_ranks.data()), (std::streamsize)(sizeof(_ranks[0]) * _ranks.size()));
+    }
+
+    void load(std::istream& is) {
+        is.read(reinterpret_cast<char*>(&_size), sizeof(_size));
+        is.read(reinterpret_cast<char*>(&_nchar), sizeof(_nchar));
+        this->resize(_size);
+        is.read(reinterpret_cast<char *>(_bitArray), (std::streamsize)(sizeof(uint64_t) * _nchar));
+
+        size_t sizer;
+        is.read(reinterpret_cast<char *>(&sizer),  sizeof(size_t));
+        _ranks.resize(sizer);
+        is.read(reinterpret_cast<char*>(_ranks.data()), (std::streamsize)(sizeof(_ranks[0]) * _ranks.size()));
+    }
+
+
+  protected:
+    uint64_t*  _bitArray;
+    //uint64_t* _bitArray;
+    uint64_t _size;
+    uint64_t _nchar;
+
+    // epsilon =  64 / _nb_bits_per_rank_sample   bits
+    // additional size for rank is epsilon * _size
+    static const uint64_t _nb_bits_per_rank_sample = 512; //512 seems ok
+    std::vector<uint64_t> _ranks;
+};
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark level
+////////////////////////////////////////////////////////////////
+
+
+static inline uint64_t fastrange64(uint64_t word, uint64_t p) {
+    //return word %  p;
+    return (uint64_t)(((__uint128_t)word * (__uint128_t)p) >> 64);
+}
+
+class level{
+  public:
+    level() {}
+
+    ~level() {}
+
+    uint64_t get(uint64_t hash_raw) const {
+        //	uint64_t hashi =    hash_raw %  hash_domain; //
+        //uint64_t hashi = (uint64_t)(  ((__uint128_t) hash_raw * (__uint128_t) hash_domain) >> 64ULL);
+        uint64_t hashi = fastrange64(hash_raw,hash_domain);
+        return bitset.get(hashi);
+    }
+
+    uint64_t idx_begin;
+    uint64_t hash_domain;
+    bitVector bitset;
+};
+
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark mphf
+////////////////////////////////////////////////////////////////
+
+#define NBBUFF 10000
+//#define NBBUFF 2
+
+template<typename Range,typename Iterator>
+struct thread_args {
+    void * boophf;
+    Range const * range;
+    std::shared_ptr<void> it_p; /* used to be "Iterator it" but because of fastmode, iterator is polymorphic; TODO: think about whether it should be a unique_ptr actually */
+    std::shared_ptr<void> until_p; /* to cache the "until" variable */
+    int level;
+};
+
+//forward declaration
+
+template <typename Hasher_t, typename Range, typename it_type>
+void * thread_processLevel(void * args);
+
+/* Hasher_t returns a single hash when operator()(elem_t key) is called.
+   if used with XorshiftHashFunctors, it must have the following operator: operator()(elem_t key, uint64_t seed) */
+template<typename Hasher_t>
+class mphf {
+    /* this mechanisms gets P hashes out of Hasher_t */
+    typedef XorshiftHashFunctors<Hasher_t> MultiHasher_t ;
+
+  public:
+    mphf()
+            : _built(false) {}
+
+    ~mphf() {}
+
+    // allow perc_elem_loaded  elements to be loaded in ram for faster construction (default 3%), set to 0 to desactivate
+    template <typename Range>
+    mphf(size_t n, const Range &input_range,
+         int num_thread = 1,
+         double gamma = 2.0,
+         bool writeEach = true, bool progress =true, float perc_elem_loaded = 0.03)
+            :
+            _nb_levels(0), _gamma(gamma), _hash_domain(size_t(ceil(double(n) * gamma))), _nelem(n), _num_thread(num_thread), _percent_elem_loaded_for_fastMode (perc_elem_loaded), _withprogress(progress) {
+        if (n ==0)
+            return;
+
+        _fastmode = false;
+        if (_percent_elem_loaded_for_fastMode > 0.0)
+            _fastmode =true;
+
+        if (writeEach) {
+            _writeEachLevel =true;
+            _fastmode = false;
+        } else {
+            _writeEachLevel = false;
+        }
+
+        setup();
+
+        if (_withprogress) {
+            _progressBar.timer_mode=1;
+
+            double total_raw = _nb_levels;
+
+            double sum_geom_read =  ( 1.0 / (1.0 - _proba_collision));
+            double total_writeEach = sum_geom_read + 1.0;
+
+            double total_fastmode_ram =  (_fastModeLevel+1) +  ( pow(_proba_collision,_fastModeLevel)) * (_nb_levels-(_fastModeLevel+1))   ;
+
+            printf("for info, total work write each  : %.3f    total work inram from level %i : %.3f  total work raw : %.3f \n",total_writeEach,_fastModeLevel,total_fastmode_ram,total_raw);
+
+            if(writeEach)
+            {
+                _progressBar.init(_nelem * total_writeEach, "Building BooPHF",num_thread);
+            }
+            else if(_fastmode)
+                _progressBar.init( _nelem * total_fastmode_ram, "Building BooPHF",num_thread);
+            else
+                _progressBar.init( _nelem * _nb_levels ,"Building BooPHF",num_thread);
+        }
+
+        uint64_t offset = 0;
+        for(int ii = 0; ii< _nb_levels; ii++)
+        {
+            _tempBitset =  new bitVector(_levels[ii].hash_domain); // temp collision bitarray for this level
+
+            processLevel(input_range,ii);
+
+            _levels[ii].bitset.clearCollisions(0 , _levels[ii].hash_domain , _tempBitset);
+
+            offset = _levels[ii].bitset.build_ranks(offset);
+
+            delete _tempBitset;
+        }
+
+        if(_withprogress)
+            _progressBar.finish_threaded();
+
+
+        _lastbitsetrank = offset ;
+
+        //printf("used temp ram for construction : %lli MB \n",setLevelFastmode.capacity()* sizeof(elem_t) /1024ULL/1024ULL);
+
+        std::vector<internal_hash_t>().swap(setLevelFastmode);   // clear setLevelFastmode reallocating
+
+
+        pthread_mutex_destroy(&_mutex);
+
+        _built = true;
+    }
+
+
+    template<class elem_t>
+    uint64_t lookup(elem_t elem) {
+        if (!_built) return ULLONG_MAX;
+
+        //auto hashes = _hasher(elem);
+        uint64_t non_minimal_hp,minimal_hp;
+        int level;
+
+        hash_pair_t bbhash = _hasher.hashpair128(elem);
+        uint64_t level_hash = getLevel(bbhash, elem, &level);
+
+        if (level == (_nb_levels-1)) {
+            //auto in_final_map  = _final_hash.find (elem);
+            auto in_final_map  = _final_hash.find(bbhash);
+            if (in_final_map == _final_hash.end()) {
+                //elem was not in orignal set of keys
+                return ULLONG_MAX; //  means elem not in set
+            } else {
+                minimal_hp =  in_final_map->second + _lastbitsetrank;
+                //printf("lookup %llu  level %i   --> %llu \n",elem,level,minimal_hp);
+                return minimal_hp;
+            }
+            //				minimal_hp = _final_hash[elem] + _lastbitsetrank;
+            //				return minimal_hp;
+        } else {
+            //non_minimal_hp =  level_hash %  _levels[level].hash_domain; // in fact non minimal hp would be  + _levels[level]->idx_begin
+            non_minimal_hp = fastrange64(level_hash,_levels[level].hash_domain);
+        }
+        minimal_hp = _levels[level].bitset.rank(non_minimal_hp);
+        //	printf("lookup %llu  level %i   --> %llu \n",elem,level,minimal_hp);
+
+        return minimal_hp;
+    }
+
+    uint64_t size() const {
+        return _nelem;
+    }
+
+    uint64_t mem_size() const {
+        uint64_t totalsizeBitset = 0;
+        for (int ii = 0; ii < _nb_levels; ii++)
+            totalsizeBitset += _levels[ii].bitset.bitSize();
+
+        uint64_t totalsize = totalsizeBitset +  _final_hash.size()*42*8 ;  // unordered map takes approx 42B per elem [personal test] (42B with uint64_t key, would be larger for other type of elem)
+
+        /*
+        printf("Bitarray    %12llu  bits (%.2f %%)   (array + ranks )\n",
+               totalsizeBitset, 100*(float)totalsizeBitset/totalsize);
+        printf("final hash  %12lu  bits (%.2f %%) (nb in final hash %lu)\n",
+               _final_hash.size()*42*8, 100*(float)(_final_hash.size()*42*8)/totalsize,
+               _final_hash.size() );
+        */
+
+        return totalsize / 8;
+    }
+
+    template <typename Iterator> void fillBuffer(std::vector<internal_hash_t> &buffer, std::shared_ptr<Iterator> shared_it, std::shared_ptr<Iterator> until_p,
+                                                 uint64_t &inbuff, bool & isRunning) {
+        auto until = *until_p;
+        pthread_mutex_lock(&_mutex);
+        for (; inbuff<NBBUFF && (*shared_it)!=until; ++(*shared_it)) {
+            buffer[inbuff]= _hasher.hashpair128(*(*shared_it));
+            inbuff++;
+        }
+
+        if ((*shared_it)==until)
+            isRunning =false;
+        pthread_mutex_unlock(&_mutex);
+
+    }
+
+
+    //specialization for internal_hash_t iterator : may be from file or from vector in ram (diskit_hash128_t or vectorit_hash128_t), but same implem for both, below
+    void fillBuffer (std::vector<internal_hash_t> &buffer,
+                     std::shared_ptr<diskit_hash128_t> shared_it, std::shared_ptr<diskit_hash128_t> until_p, uint64_t & inbuff, bool & isRunning) {
+        fillBufferCommon128(buffer,shared_it,until_p,inbuff,isRunning);
+    }
+    void fillBuffer (std::vector<internal_hash_t>  & buffer,std::shared_ptr<vectorit_hash128_t> shared_it, std::shared_ptr<vectorit_hash128_t> until_p,uint64_t & inbuff, bool & isRunning) {
+        fillBufferCommon128(buffer,shared_it,until_p,inbuff,isRunning);
+    }
+
+    template <typename Iterator> void fillBufferCommon128(std::vector<internal_hash_t>  & buffer,std::shared_ptr<Iterator> shared_it, std::shared_ptr<Iterator> until_p,
+                                                          uint64_t &inbuff, bool &isRunning) {
+        auto until = *until_p;
+        pthread_mutex_lock(&_mutex);
+        for (; inbuff<NBBUFF && (*shared_it)!=until; ++(*shared_it)) {
+            buffer[inbuff]= (*(*shared_it)); inbuff++;
+        }
+
+        if((*shared_it)==until) isRunning =false;
+        pthread_mutex_unlock(&_mutex);
+    }
+
+    template <typename Iterator>  //typename Range,
+    void pthread_processLevel(std::vector<internal_hash_t> &buffer, std::shared_ptr<Iterator> shared_it, std::shared_ptr<Iterator> until_p, int i) {
+        uint64_t nb_done =0;
+        int tid =  __sync_fetch_and_add (&_nb_living, 1);
+        uint64_t inbuff =0;
+
+        uint64_t writebuff =0;
+        std::vector< internal_hash_t > & myWriteBuff = bufferperThread[tid];
+        for (bool isRunning=true;  isRunning ; ) {
+            //safely copy n items into buffer
+            //call to specialized function accordin to iterator type (may be iterator over keys (first 2 levels), or iterator over 128 bit hashes)
+            fillBuffer(buffer,shared_it,until_p,inbuff,isRunning);
+
+            //do work on the n elems of the buffer
+            for (uint64_t ii=0; ii<inbuff ; ii++) {
+                //internal_hash_t val = buffer[ii];
+                internal_hash_t val  = buffer[ii];
+                internal_hash_t bbhash = buffer[ii];
+
+                //auto hashes = _hasher(val);
+                //hash_pair_t bbhash;
+                int level;
+                uint64_t level_hash;
+                if(_writeEachLevel)
+                    getLevel(bbhash,&level, i,i-1);
+                else
+                    getLevel(bbhash,&level, i);
+
+                //					#ifdef PDEBUG
+                //					printf("processing %llu %llu  level %i\n",val[0],val[1], level);
+                //#endif
+
+
+                //__sync_fetch_and_add(& _cptTotalProcessed,1);
+
+                if(level == i) //insert into lvl i
+                {
+
+#ifdef CCDEBUG
+                    __sync_fetch_and_add(& _cptLevel,1);
+#endif
+
+
+
+                    if(_fastmode && i == _fastModeLevel)
+                    {
+
+                        uint64_t idxl2 = __sync_fetch_and_add(& _idxLevelsetLevelFastmode,1);
+                        //si depasse taille attendue pour setLevelFastmode, fall back sur slow mode mais devrait pas arriver si hash ok et proba avec nous
+                        if(idxl2>= setLevelFastmode.size())
+                            _fastmode = false;
+                        else
+                            setLevelFastmode[idxl2] = val; // create set for fast mode
+                    }
+
+                    //insert to level i+1 : either next level of the cascade or final hash if last level reached
+                    if(i == _nb_levels-1) //stop cascade here, insert into exact hash
+                    {
+
+                        uint64_t hashidx =  __sync_fetch_and_add(&_hashidx, 1);
+
+                        pthread_mutex_lock(&_mutex); //see later if possible to avoid this, mais pas bcp item vont la
+                        // calc rank de fin  precedent level qq part, puis init hashidx avec ce rank, direct minimal, pas besoin inser ds bitset et rank
+
+                        if (_final_hash.count(val)) // key already in final hash
+                        {
+                            fprintf(stderr,"The impossible happened : collision on 128 bit hashes... please switch to safe branch, and play the lottery.");
+                            fprintf(stderr,"Another more likely explanation might be that you have duplicate keys in your input.\
+                                        If so, you can ignore this message, but be aware that too many duplicate keys will increase ram usage\n");
+                        }
+                        _final_hash[val] = hashidx;
+
+
+                        pthread_mutex_unlock(&_mutex);
+                    }
+                    else
+                    {
+
+                        //ils ont reach ce level
+                        //insert elem into curr level on disk --> sera utilise au level+1 , (mais encore besoin filtre)
+
+                        if(_writeEachLevel && i > 0 && i < _nb_levels -1)
+                        {
+                            if(writebuff>=NBBUFF)
+                            {
+                                //flush buffer
+                                flockfile(_currlevelFile);
+                                fwrite(myWriteBuff.data(),sizeof(internal_hash_t),writebuff,_currlevelFile);
+                                funlockfile(_currlevelFile);
+                                writebuff = 0;
+
+                            }
+
+                            myWriteBuff[writebuff++] = val;
+
+                        }
+
+
+
+                        //#ifdef PDEBUG
+                        //							printf("inserting into level %i ",i);
+                        //#endif
+
+
+                        // computes next hash
+                        if (level == 0)
+                            level_hash = bbhash[0];
+                        else if (level == 1)
+                            level_hash = bbhash[1];
+                        else
+                            level_hash = _hasher.next(bbhash);
+                        insertIntoLevel(level_hash,i); //should be safe
+                    }
+                }
+
+                nb_done++;
+                if ((nb_done&1023) ==0  && _withprogress) {_progressBar.inc(nb_done,tid);nb_done=0; }
+
+            }
+
+            inbuff = 0;
+        }
+
+        if(_writeEachLevel && writebuff>0) {
+            //flush buffer
+            flockfile(_currlevelFile);
+            fwrite(myWriteBuff.data(),sizeof(internal_hash_t),writebuff,_currlevelFile);
+            funlockfile(_currlevelFile);
+            writebuff = 0;
+        }
+
+    }
+
+
+    void save(std::ostream& os) const {
+        os.write(reinterpret_cast<char const*>(&_gamma), sizeof(_gamma));
+        os.write(reinterpret_cast<char const*>(&_nb_levels), sizeof(_nb_levels));
+        os.write(reinterpret_cast<char const*>(&_lastbitsetrank), sizeof(_lastbitsetrank));
+        os.write(reinterpret_cast<char const*>(&_nelem), sizeof(_nelem));
+        for(int ii=0; ii<_nb_levels; ii++)
+        {
+            _levels[ii].bitset.save(os);
+        }
+
+        //save final hash
+        size_t final_hash_size = _final_hash.size();
+
+        os.write(reinterpret_cast<char const*>(&final_hash_size), sizeof(size_t));
+
+        for (auto it = _final_hash.begin(); it != _final_hash.end(); ++it )
+        {
+            os.write(reinterpret_cast<char const*>(&(it->first)), sizeof(internal_hash_t));
+            os.write(reinterpret_cast<char const*>(&(it->second)), sizeof(uint64_t));
+        }
+
+    }
+
+    void load(std::istream& is) {
+        is.read(reinterpret_cast<char*>(&_gamma), sizeof(_gamma));
+        is.read(reinterpret_cast<char*>(&_nb_levels), sizeof(_nb_levels));
+        is.read(reinterpret_cast<char*>(&_lastbitsetrank), sizeof(_lastbitsetrank));
+        is.read(reinterpret_cast<char*>(&_nelem), sizeof(_nelem));
+
+        _levels.resize(_nb_levels);
+
+
+        for(int ii=0; ii<_nb_levels; ii++)
+        {
+            //_levels[ii].bitset = new bitVector();
+            _levels[ii].bitset.load(is);
+        }
+
+
+
+        //mini setup, recompute size of each level
+        _proba_collision = 1.0 -  pow(((_gamma*(double)_nelem -1 ) / (_gamma*(double)_nelem)),_nelem-1);
+        uint64_t previous_idx =0;
+        _hash_domain = (size_t)  (ceil(double(_nelem) * _gamma)) ;
+        for(int ii=0; ii<_nb_levels; ii++)
+        {
+            //_levels[ii] = new level();
+            _levels[ii].idx_begin = previous_idx;
+            _levels[ii].hash_domain =  (( (uint64_t) (_hash_domain * pow(_proba_collision,ii)) + 63) / 64 ) * 64;
+            if(_levels[ii].hash_domain == 0 )
+                _levels[ii].hash_domain  = 64 ;
+            previous_idx += _levels[ii].hash_domain;
+        }
+
+        //restore final hash
+
+        _final_hash.clear();
+        size_t final_hash_size ;
+
+        is.read(reinterpret_cast<char *>(&final_hash_size), sizeof(size_t));
+
+        for(unsigned int ii=0; ii<final_hash_size; ii++)
+        {
+            internal_hash_t key;
+            uint64_t value;
+
+            is.read(reinterpret_cast<char *>(&key), sizeof(internal_hash_t));
+            is.read(reinterpret_cast<char *>(&value), sizeof(uint64_t));
+
+            _final_hash[key] = value;
+        }
+        _built = true;
+    }
+
+
+  private :
+
+    void setup()
+    {
+        pthread_mutex_init(&_mutex, NULL);
+
+        _pid = getpid() + printPt(pthread_self()) ;// + pthread_self();
+        //printf("pt self %llu  pid %i \n",printPt(pthread_self()),_pid);
+
+        _cptTotalProcessed=0;
+
+
+        if(_fastmode)
+        {
+            setLevelFastmode.resize(_percent_elem_loaded_for_fastMode * (double)_nelem );
+        }
+
+
+        bufferperThread.resize(_num_thread);
+        if(_writeEachLevel)
+        {
+            for(int ii=0; ii<_num_thread; ii++)
+            {
+                bufferperThread[ii].resize(NBBUFF);
+            }
+        }
+
+        _proba_collision = 1.0 -  pow(((_gamma*(double)_nelem -1 ) / (_gamma*(double)_nelem)),_nelem-1);
+
+        double sum_geom =_gamma * ( 1.0 +  _proba_collision / (1.0 - _proba_collision));
+        //printf("proba collision %f  sum_geom  %f   \n",_proba_collision,sum_geom);
+
+        _nb_levels = 25; // 25
+        _levels.resize(_nb_levels);
+
+        //build levels
+        uint64_t previous_idx =0;
+        for(int ii=0; ii<_nb_levels; ii++)
+        {
+
+            _levels[ii].idx_begin = previous_idx;
+
+            // round size to nearest superior multiple of 64, makes it easier to clear a level
+            _levels[ii].hash_domain =  (( (uint64_t) (_hash_domain * pow(_proba_collision,ii)) + 63) / 64 ) * 64;
+            if(_levels[ii].hash_domain == 0 ) _levels[ii].hash_domain  = 64 ;
+            previous_idx += _levels[ii].hash_domain;
+
+            //printf("build level %i bit array : start %12llu, size %12llu  ",ii,_levels[ii]->idx_begin,_levels[ii]->hash_domain );
+            //printf(" expected elems : %.2f %% total \n",100.0*pow(_proba_collision,ii));
+
+        }
+
+        for(int ii=0; ii<_nb_levels; ii++)
+        {
+            if(pow(_proba_collision,ii) < _percent_elem_loaded_for_fastMode)
+            {
+                _fastModeLevel = ii;
+                //printf("fast mode level :  %i \n",ii);
+                break;
+            }
+        }
+    }
+
+    //overload getLevel with either elem_t or internal_hash_t
+    template<class elem_t>
+    uint64_t getLevel(hash_pair_t bbhash, elem_t val, int *res_level, int maxlevel = 100, int minlevel = 0) const {
+        int level = 0;
+        uint64_t hash_raw=0;
+
+        for (int ii = 0; ii < (_nb_levels-1) && ii < maxlevel ; ii++) {
+            //calc le hash suivant
+            if (ii == 0)
+                hash_raw = bbhash[0];
+            else if (ii == 1)
+                hash_raw = bbhash[1];
+            else
+                hash_raw = _hasher.next(bbhash);
+
+            if (ii >= minlevel && _levels[ii].get(hash_raw))
+                break;
+
+            level++;
+        }
+
+        *res_level = level;
+        return hash_raw;
+    }
+
+
+    // compute level and returns hash of last level reached
+    // FIXME: The usage of getLevel here is *super* confusing, really.
+    uint64_t getLevel(internal_hash_t &bbhash,int * res_level, int maxlevel = 100, int minlevel =0) const {
+        int level = 0;
+        uint64_t hash_raw=0;
+
+        for (int ii = 0; ii<(_nb_levels-1) &&  ii < maxlevel ; ii++) {
+            //calc le hash suivant
+            if (ii == 0)
+                hash_raw = bbhash[0];
+            else if (ii == 1)
+                hash_raw = bbhash[1];
+            else
+                hash_raw = _hasher.next(bbhash);
+
+            if (ii >= minlevel && _levels[ii].get(hash_raw))
+                break;
+
+            level++;
+        }
+
+        *res_level = level;
+        return hash_raw;
+    }
+
+
+    //insert into bitarray
+    void insertIntoLevel(uint64_t level_hash, int i) {
+        //	uint64_t hashl =  level_hash % _levels[i].hash_domain;
+        uint64_t hashl = fastrange64( level_hash,_levels[i].hash_domain);
+
+        //#ifdef PDEBUG
+        //			printf(" :  %llu / %llu \n ",hashl,_levels[i].hash_domain);
+        //#endif
+
+        if (_levels[i].bitset.atomic_test_and_set(hashl))
+            _tempBitset->atomic_test_and_set(hashl);
+    }
+
+    //loop to insert into level i
+    template <typename Range>
+    void processLevel(Range const& input_range,int i) {
+        ////alloc the bitset for this level
+        _levels[i].bitset = bitVector(_levels[i].hash_domain);
+
+        //printf("---process level %i   wr %i fast %i ---\n",i,_writeEachLevel,_fastmode);
+
+        char fname_old[1000];
+        sprintf(fname_old,"temp_p%i_level_%i",_pid,i-2);
+
+        char fname_curr[1000];
+        sprintf(fname_curr,"temp_p%i_level_%i",_pid,i);
+
+        char fname_prev[1000];
+        sprintf(fname_prev,"temp_p%i_level_%i",_pid,i-1);
+
+        if (_writeEachLevel) {
+            //file management :
+            if(i>2) //delete previous file
+            {
+                unlink(fname_old);
+            }
+
+            if(i< _nb_levels-1 && i > 0 ) //create curr file
+            {
+                _currlevelFile = fopen(fname_curr,"w");
+            }
+        }
+
+
+        _cptLevel = 0;
+        _hashidx = 0;
+        _idxLevelsetLevelFastmode =0;
+        _nb_living =0;
+        //create  threads
+        pthread_t *tab_threads= new pthread_t [_num_thread];
+        typedef decltype(input_range.begin()) it_type;
+        thread_args<Range, it_type> t_arg; // meme arg pour tous
+        t_arg.boophf = this;
+        t_arg.range = &input_range;
+        t_arg.it_p = std::static_pointer_cast<void>(std::make_shared<it_type>(input_range.begin()));
+        t_arg.until_p = std::static_pointer_cast<void>(std::make_shared<it_type>(input_range.end()));
+
+        t_arg.level = i;
+
+        if (_writeEachLevel && (i > 1)) {
+
+            auto data_iterator_level = file_binary<internal_hash_t>(fname_prev);
+
+            //typedef decltype(data_iterator_level.begin()) disklevel_it_type;
+            //diskit_hash128_t
+
+            //data_iterator_level.begin();
+
+            t_arg.it_p = std::static_pointer_cast<void>(std::make_shared<diskit_hash128_t>(data_iterator_level.begin()));
+            t_arg.until_p = std::static_pointer_cast<void>(std::make_shared<diskit_hash128_t>(data_iterator_level.end()));
+
+            for (int ii=0;ii<_num_thread;ii++)
+                pthread_create(&tab_threads[ii], NULL, thread_processLevel<Hasher_t, Range, diskit_hash128_t>, &t_arg); //&t_arg[ii]
+
+
+            //must join here before the block is closed and file_binary is destroyed (and closes the file)
+            for(int ii=0;ii<_num_thread;ii++)
+            {
+                pthread_join(tab_threads[ii], NULL);
+            }
+
+        } else {
+            if (_fastmode && i >= (_fastModeLevel+1)) {
+                //   we'd like to do t_arg.it = data_iterator.begin() but types are different;
+                //   so, casting to (void*) because of that; and we remember the type in the template
+                //	typedef decltype(setLevelFastmode.begin()) fastmode_it_type; // vectorit_hash128_t
+                t_arg.it_p =  std::static_pointer_cast<void>(std::make_shared<vectorit_hash128_t>(setLevelFastmode.begin()));
+                t_arg.until_p =  std::static_pointer_cast<void>(std::make_shared<vectorit_hash128_t>(setLevelFastmode.end()));
+
+                //       we'd like to do t_arg.it = data_iterator.begin() but types are different;
+                //       so, casting to (void*) because of that; and we remember the type in the template
+
+                for (int ii=0;ii<_num_thread;ii++)
+                    pthread_create (&tab_threads[ii], NULL,  thread_processLevel<Hasher_t, Range, vectorit_hash128_t>, &t_arg); //&t_arg[ii]
+
+            } else {
+                //printf(" _ _ basic mode \n");
+                for(int ii=0;ii<_num_thread;ii++)
+                    pthread_create (&tab_threads[ii], NULL,  thread_processLevel<Hasher_t, Range, decltype(input_range.begin())>, &t_arg); //&t_arg[ii]
+            }
+            //joining
+            for(int ii=0;ii<_num_thread;ii++)
+            {
+                pthread_join(tab_threads[ii], NULL);
+            }
+        }
+
+#ifdef CCDEBUG
+        printf("\ngoing to level %i  : %llu elems  %.2f %%  expected : %.2f %% \n",i,_cptLevel,100.0* _cptLevel/(float)_nelem,100.0* pow(_proba_collision,i) );
+#endif
+        //printf("\ncpt total processed %llu \n",_cptTotalProcessed);
+        if(_fastmode && i == _fastModeLevel) //shrink to actual number of elements in set
+        {
+            //printf("\nresize setLevelFastmode to %lli \n",_idxLevelsetLevelFastmode);
+            setLevelFastmode.resize(_idxLevelsetLevelFastmode);
+        }
+        delete [] tab_threads;
+
+        if(_writeEachLevel)
+        {
+            if(i< _nb_levels-1 && i>0)
+            {
+                fflush(_currlevelFile);
+                fclose(_currlevelFile);
+            }
+
+            if(i== _nb_levels- 1) //delete last file
+            {
+                unlink(fname_prev);
+            }
+        }
+
+    }
+
+  private:
+    std::vector<level> _levels;
+    int _nb_levels;
+    MultiHasher_t _hasher;
+    bitVector * _tempBitset;
+
+    double _gamma;
+    uint64_t _hash_domain;
+    uint64_t _nelem;
+    std::unordered_map<internal_hash_t,uint64_t, internalHasher> _final_hash; // internalHasher   Hasher_t
+    Progress _progressBar;
+    int _nb_living;
+    int _num_thread;
+    uint64_t _hashidx;
+    double _proba_collision;
+    uint64_t _lastbitsetrank;
+    uint64_t _idxLevelsetLevelFastmode;
+    uint64_t _cptLevel;
+    uint64_t _cptTotalProcessed;
+
+    // fast build mode , requires  that _percent_elem_loaded_for_fastMode %   elems are loaded in ram
+    float _percent_elem_loaded_for_fastMode ;
+    bool _fastmode;
+    std::vector< internal_hash_t > setLevelFastmode;
+    //	std::vector< internal_hash_t > setLevelFastmode_next; // todo shrinker le set e nram a chaque niveau  ?
+
+    std::vector< std::vector< internal_hash_t > > bufferperThread;
+
+    int _fastModeLevel;
+    bool _withprogress;
+    bool _built;
+    bool _writeEachLevel;
+    FILE * _currlevelFile;
+    int _pid;
+  public:
+    pthread_mutex_t _mutex;
+};
+
+////////////////////////////////////////////////////////////////
+#pragma mark -
+#pragma mark threading
+////////////////////////////////////////////////////////////////
+
+template<typename Hasher_t, typename Range, typename it_type>
+void *thread_processLevel(void * args) {
+    if (args ==NULL) return NULL;
+
+    thread_args<Range,it_type> *targ = (thread_args<Range,it_type>*) args;
+    mphf<Hasher_t> * obw = (mphf<Hasher_t > *) targ->boophf;
+    int level = targ->level;
+    std::vector<internal_hash_t> buffer;
+    buffer.resize(NBBUFF);
+
+    pthread_mutex_t * mutex =  & obw->_mutex;
+
+    pthread_mutex_lock(mutex); // from comment above: "//get starting iterator for this thread, must be protected (must not be currently used by other thread to copy elems in buff)"
+    std::shared_ptr<it_type> startit = std::static_pointer_cast<it_type>(targ->it_p);
+    std::shared_ptr<it_type> until_p = std::static_pointer_cast<it_type>(targ->until_p);
+    pthread_mutex_unlock(mutex);
+
+    obw->pthread_processLevel(buffer, startit, until_p, level);
+
+    return NULL;
+}
+}
diff --git a/ext/include/llvm/ADT/IntrusiveRefCntPtr.h b/ext/include/llvm/ADT/IntrusiveRefCntPtr.h
index 8057ec1..784856d 100644
--- a/ext/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/ext/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -108,7 +108,7 @@ public:
 
   void Release() const {
     int NewRefCount = --RefCount;
-    assert(NewRefCount >= 0 && "Reference count was already zero.");
+    // assert(NewRefCount >= 0 && "Reference count was already zero.");
     if (NewRefCount == 0)
       delete static_cast<const Derived*>(this);
   }
diff --git a/ext/include/llvm/Support/MathExtras.h b/ext/include/llvm/Support/MathExtras.h
index 8c0b110..bc843e7 100644
--- a/ext/include/llvm/Support/MathExtras.h
+++ b/ext/include/llvm/Support/MathExtras.h
@@ -579,6 +579,52 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
 ///
 /// Examples:
 /// \code
+///   alignTo(5, 8) = 8
+///   alignTo(17, 8) = 24
+///   alignTo(~0LL, 8) = 0
+///   alignTo(321, 255) = 510
+///
+///   alignTo(5, 8, 7) = 7
+///   alignTo(17, 8, 1) = 17
+///   alignTo(~0LL, 8, 3) = 3
+///   alignTo(321, 255, 42) = 552
+/// \endcode
+inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
+  assert(Align != 0u && "Align can't be 0.");
+  Skew %= Align;
+  return (Value + Align - 1 - Skew) / Align * Align + Skew;
+}
+
+/// Returns the next integer (mod 2**64) that is greater than or equal to
+/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
+template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
+  static_assert(Align != 0u, "Align must be non-zero");
+  return (Value + Align - 1) / Align * Align;
+}
+
+/// \c alignTo for contexts where a constant expression is required.
+/// \sa alignTo
+///
+/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
+template <uint64_t Align>
+struct AlignTo {
+  static_assert(Align != 0u, "Align must be non-zero");
+  template <uint64_t Value>
+  struct from_value {
+    static const uint64_t value = (Value + Align - 1) / Align * Align;
+  };
+};
+
+/// Returns the next integer (mod 2**64) that is greater than or equal to
+/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
+///
+/// If non-zero \p Skew is specified, the return value will be a minimal
+/// integer that is greater than or equal to \p Value and equal to
+/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
+/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
+///
+/// Examples:
+/// \code
 ///   RoundUpToAlignment(5, 8) = 8
 ///   RoundUpToAlignment(17, 8) = 24
 ///   RoundUpToAlignment(~0LL, 8) = 0
diff --git a/ext/include/llvm/Support/TrailingObjects.h b/ext/include/llvm/Support/TrailingObjects.h
new file mode 100644
index 0000000..cb5a52b
--- /dev/null
+++ b/ext/include/llvm/Support/TrailingObjects.h
@@ -0,0 +1,401 @@
+//===--- TrailingObjects.h - Variable-length classes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This header defines support for implementing classes that have
+/// some trailing object (or arrays of objects) appended to them. The
+/// main purpose is to make it obvious where this idiom is being used,
+/// and to make the usage more idiomatic and more difficult to get
+/// wrong.
+///
+/// The TrailingObject template abstracts away the reinterpret_cast,
+/// pointer arithmetic, and size calculations used for the allocation
+/// and access of appended arrays of objects, and takes care that they
+/// are all allocated at their required alignment. Additionally, it
+/// ensures that the base type is final -- deriving from a class that
+/// expects data appended immediately after it is typically not safe.
+///
+/// Users are expected to derive from this template, and provide
+/// numTrailingObjects implementations for each trailing type except
+/// the last, e.g. like this sample:
+///
+/// \code
+/// class VarLengthObj : private TrailingObjects<VarLengthObj, int, double> {
+///   friend TrailingObjects;
+///
+///   unsigned NumInts, NumDoubles;
+///   size_t numTrailingObjects(OverloadToken<int>) const { return NumInts; }
+///  };
+/// \endcode
+///
+/// You can access the appended arrays via 'getTrailingObjects', and
+/// determine the size needed for allocation via
+/// 'additionalSizeToAlloc' and 'totalSizeToAlloc'.
+///
+/// All the methods implemented by this class are are intended for use
+/// by the implementation of the class, not as part of its interface
+/// (thus, private inheritance is suggested).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TRAILINGOBJECTS_H
+#define LLVM_SUPPORT_TRAILINGOBJECTS_H
+
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/type_traits.h"
+#include <new>
+#include <type_traits>
+
+namespace llvm {
+
+namespace trailing_objects_internal {
+/// Helper template to calculate the max alignment requirement for a set of
+/// objects.
+template <typename First, typename... Rest> class AlignmentCalcHelper {
+private:
+  enum {
+    FirstAlignment = alignof(First),
+    RestAlignment = AlignmentCalcHelper<Rest...>::Alignment,
+  };
+
+public:
+  enum {
+    Alignment = FirstAlignment > RestAlignment ? FirstAlignment : RestAlignment
+  };
+};
+
+template <typename First> class AlignmentCalcHelper<First> {
+public:
+  enum { Alignment = alignof(First) };
+};
+
+/// The base class for TrailingObjects* classes.
+class TrailingObjectsBase {
+protected:
+  /// OverloadToken's purpose is to allow specifying function overloads
+  /// for different types, without actually taking the types as
+  /// parameters. (Necessary because member function templates cannot
+  /// be specialized, so overloads must be used instead of
+  /// specialization.)
+  template <typename T> struct OverloadToken {};
+};
+
+/// This helper template works-around MSVC 2013's lack of useful
+/// alignas() support. The argument to LLVM_ALIGNAS(), in MSVC, is
+/// required to be a literal integer. But, you *can* use template
+/// specialization to select between a bunch of different LLVM_ALIGNAS
+/// expressions...
+template <int Align>
+class TrailingObjectsAligner : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(1) TrailingObjectsAligner<1> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(2) TrailingObjectsAligner<2> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(4) TrailingObjectsAligner<4> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(8) TrailingObjectsAligner<8> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(16) TrailingObjectsAligner<16> : public TrailingObjectsBase {
+};
+template <>
+class LLVM_ALIGNAS(32) TrailingObjectsAligner<32> : public TrailingObjectsBase {
+};
+
+// Just a little helper for transforming a type pack into the same
+// number of a different type. e.g.:
+//   ExtractSecondType<Foo..., int>::type
+template <typename Ty1, typename Ty2> struct ExtractSecondType {
+  typedef Ty2 type;
+};
+
+// TrailingObjectsImpl is somewhat complicated, because it is a
+// recursively inheriting template, in order to handle the template
+// varargs. Each level of inheritance picks off a single trailing type
+// then recurses on the rest. The "Align", "BaseTy", and
+// "TopTrailingObj" arguments are passed through unchanged through the
+// recursion. "PrevTy" is, at each level, the type handled by the
+// level right above it.
+
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
+          typename... MoreTys>
+class TrailingObjectsImpl {
+  // The main template definition is never used -- the two
+  // specializations cover all possibilities.
+};
+
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
+          typename NextTy, typename... MoreTys>
+class TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
+                          MoreTys...>
+    : public TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy,
+                                 MoreTys...> {
+
+  typedef TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy, MoreTys...>
+      ParentType;
+
+  struct RequiresRealignment {
+    static const bool value = alignof(PrevTy) < alignof(NextTy);
+  };
+
+  static constexpr bool requiresRealignment() {
+    return RequiresRealignment::value;
+  }
+
+protected:
+  // Ensure the inherited getTrailingObjectsImpl is not hidden.
+  using ParentType::getTrailingObjectsImpl;
+
+  // These two functions are helper functions for
+  // TrailingObjects::getTrailingObjects. They recurse to the left --
+  // the result for each type in the list of trailing types depends on
+  // the result of calling the function on the type to the
+  // left. However, the function for the type to the left is
+  // implemented by a *subclass* of this class, so we invoke it via
+  // the TopTrailingObj, which is, via the
+  // curiously-recurring-template-pattern, the most-derived type in
+  // this recursion, and thus, contains all the overloads.
+  static const NextTy *
+  getTrailingObjectsImpl(const BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<NextTy>) {
+    auto *Ptr = TopTrailingObj::getTrailingObjectsImpl(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>()) +
+                TopTrailingObj::callNumTrailingObjects(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
+
+    if (requiresRealignment())
+      return reinterpret_cast<const NextTy *>(
+          llvm::alignAddr(Ptr, alignof(NextTy)));
+    else
+      return reinterpret_cast<const NextTy *>(Ptr);
+  }
+
+  static NextTy *
+  getTrailingObjectsImpl(BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<NextTy>) {
+    auto *Ptr = TopTrailingObj::getTrailingObjectsImpl(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>()) +
+                TopTrailingObj::callNumTrailingObjects(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
+
+    if (requiresRealignment())
+      return reinterpret_cast<NextTy *>(llvm::alignAddr(Ptr, alignof(NextTy)));
+    else
+      return reinterpret_cast<NextTy *>(Ptr);
+  }
+
+  // Helper function for TrailingObjects::additionalSizeToAlloc: this
+  // function recurses to superclasses, each of which requires one
+  // fewer size_t argument, and adds its own size.
+  static constexpr size_t additionalSizeToAllocImpl(
+      size_t SizeSoFar, size_t Count1,
+      typename ExtractSecondType<MoreTys, size_t>::type... MoreCounts) {
+    return ParentType::additionalSizeToAllocImpl(
+        (requiresRealignment() ? llvm::alignTo<alignof(NextTy)>(SizeSoFar)
+                               : SizeSoFar) +
+            sizeof(NextTy) * Count1,
+        MoreCounts...);
+  }
+};
+
+// The base case of the TrailingObjectsImpl inheritance recursion,
+// when there's no more trailing types.
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy>
+class TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy>
+    : public TrailingObjectsAligner<Align> {
+protected:
+  // This is a dummy method, only here so the "using" doesn't fail --
+  // it will never be called, because this function recurses backwards
+  // up the inheritance chain to subclasses.
+  static void getTrailingObjectsImpl();
+
+  static constexpr size_t additionalSizeToAllocImpl(size_t SizeSoFar) {
+    return SizeSoFar;
+  }
+
+  template <bool CheckAlignment> static void verifyTrailingObjectsAlignment() {}
+};
+
+} // end namespace trailing_objects_internal
+
+// Finally, the main type defined in this file, the one intended for users...
+
+/// See the file comment for details on the usage of the
+/// TrailingObjects type.
+template <typename BaseTy, typename... TrailingTys>
+class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl<
+                            trailing_objects_internal::AlignmentCalcHelper<
+                                TrailingTys...>::Alignment,
+                            BaseTy, TrailingObjects<BaseTy, TrailingTys...>,
+                            BaseTy, TrailingTys...> {
+
+  template <int A, typename B, typename T, typename P, typename... M>
+  friend class trailing_objects_internal::TrailingObjectsImpl;
+
+  template <typename... Tys> class Foo {};
+
+  typedef trailing_objects_internal::TrailingObjectsImpl<
+      trailing_objects_internal::AlignmentCalcHelper<TrailingTys...>::Alignment,
+      BaseTy, TrailingObjects<BaseTy, TrailingTys...>, BaseTy, TrailingTys...>
+      ParentType;
+  using TrailingObjectsBase = trailing_objects_internal::TrailingObjectsBase;
+
+  using ParentType::getTrailingObjectsImpl;
+
+  // This function contains only a static_assert BaseTy is final. The
+  // static_assert must be in a function, and not at class-level
+  // because BaseTy isn't complete at class instantiation time, but
+  // will be by the time this function is instantiated.
+  static void verifyTrailingObjectsAssertions() {
+#ifdef LLVM_IS_FINAL
+    static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final.");
+#endif
+  }
+
+  // These two methods are the base of the recursion for this method.
+  static const BaseTy *
+  getTrailingObjectsImpl(const BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<BaseTy>) {
+    return Obj;
+  }
+
+  static BaseTy *
+  getTrailingObjectsImpl(BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<BaseTy>) {
+    return Obj;
+  }
+
+  // callNumTrailingObjects simply calls numTrailingObjects on the
+  // provided Obj -- except when the type being queried is BaseTy
+  // itself. There is always only one of the base object, so that case
+  // is handled here. (An additional benefit of indirecting through
+  // this function is that consumers only say "friend
+  // TrailingObjects", and thus, only this class itself can call the
+  // numTrailingObjects function.)
+  static size_t
+  callNumTrailingObjects(const BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<BaseTy>) {
+    return 1;
+  }
+
+  template <typename T>
+  static size_t callNumTrailingObjects(const BaseTy *Obj,
+                                       TrailingObjectsBase::OverloadToken<T>) {
+    return Obj->numTrailingObjects(TrailingObjectsBase::OverloadToken<T>());
+  }
+
+public:
+  // Make this (privately inherited) member public.
+#ifndef _MSC_VER
+  using ParentType::OverloadToken;
+#else
+  // MSVC bug prevents the above from working, at least up through CL
+  // 19.10.24629.
+  template <typename T>
+  using OverloadToken = typename ParentType::template OverloadToken<T>;
+#endif
+
+  /// Returns a pointer to the trailing object array of the given type
+  /// (which must be one of those specified in the class template). The
+  /// array may have zero or more elements in it.
+  template <typename T> const T *getTrailingObjects() const {
+    verifyTrailingObjectsAssertions();
+    // Forwards to an impl function with overloads, since member
+    // function templates can't be specialized.
+    return this->getTrailingObjectsImpl(
+        static_cast<const BaseTy *>(this),
+        TrailingObjectsBase::OverloadToken<T>());
+  }
+
+  /// Returns a pointer to the trailing object array of the given type
+  /// (which must be one of those specified in the class template). The
+  /// array may have zero or more elements in it.
+  template <typename T> T *getTrailingObjects() {
+    verifyTrailingObjectsAssertions();
+    // Forwards to an impl function with overloads, since member
+    // function templates can't be specialized.
+    return this->getTrailingObjectsImpl(
+        static_cast<BaseTy *>(this), TrailingObjectsBase::OverloadToken<T>());
+  }
+
+  /// Returns the size of the trailing data, if an object were
+  /// allocated with the given counts (The counts are in the same order
+  /// as the template arguments). This does not include the size of the
+  /// base object.  The template arguments must be the same as those
+  /// used in the class; they are supplied here redundantly only so
+  /// that it's clear what the counts are counting in callers.
+  template <typename... Tys>
+  static constexpr typename std::enable_if<
+      std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
+  additionalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
+                        TrailingTys, size_t>::type... Counts) {
+    return ParentType::additionalSizeToAllocImpl(0, Counts...);
+  }
+
+  /// Returns the total size of an object if it were allocated with the
+  /// given trailing object counts. This is the same as
+  /// additionalSizeToAlloc, except it *does* include the size of the base
+  /// object.
+  template <typename... Tys>
+  static constexpr typename std::enable_if<
+      std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
+  totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
+                   TrailingTys, size_t>::type... Counts) {
+    return sizeof(BaseTy) + ParentType::additionalSizeToAllocImpl(0, Counts...);
+  }
+
+  /// A type where its ::with_counts template member has a ::type member
+  /// suitable for use as uninitialized storage for an object with the given
+  /// trailing object counts. The template arguments are similar to those
+  /// of additionalSizeToAlloc.
+  ///
+  /// Use with FixedSizeStorageOwner, e.g.:
+  ///
+  /// \code{.cpp}
+  ///
+  /// MyObj::FixedSizeStorage<void *>::with_counts<1u>::type myStackObjStorage;
+  /// MyObj::FixedSizeStorageOwner
+  ///     myStackObjOwner(new ((void *)&myStackObjStorage) MyObj);
+  /// MyObj *const myStackObjPtr = myStackObjOwner.get();
+  ///
+  /// \endcode
+  template <typename... Tys> struct FixedSizeStorage {
+    template <size_t... Counts> struct with_counts {
+      enum { Size = totalSizeToAlloc<Tys...>(Counts...) };
+      typedef llvm::AlignedCharArray<alignof(BaseTy), Size> type;
+    };
+  };
+
+  /// A type that acts as the owner for an object placed into fixed storage.
+  class FixedSizeStorageOwner {
+  public:
+    FixedSizeStorageOwner(BaseTy *p) : p(p) {}
+    ~FixedSizeStorageOwner() {
+      assert(p && "FixedSizeStorageOwner owns null?");
+      p->~BaseTy();
+    }
+
+    BaseTy *get() { return p; }
+    const BaseTy *get() const { return p; }
+
+  private:
+    FixedSizeStorageOwner(const FixedSizeStorageOwner &) = delete;
+    FixedSizeStorageOwner(FixedSizeStorageOwner &&) = delete;
+    FixedSizeStorageOwner &operator=(const FixedSizeStorageOwner &) = delete;
+    FixedSizeStorageOwner &operator=(FixedSizeStorageOwner &&) = delete;
+
+    BaseTy *const p;
+  };
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/ext/src/ConsensusCore/CMakeLists.txt b/ext/src/ConsensusCore/CMakeLists.txt
index e737da1..5e482fb 100644
--- a/ext/src/ConsensusCore/CMakeLists.txt
+++ b/ext/src/ConsensusCore/CMakeLists.txt
@@ -6,8 +6,6 @@ include_directories(${EXT_DIR}/include/ConsensusCore)
 add_library(ConsensusCore STATIC 
             AffineAlignment.cpp
             Coverage.cpp
-            Feature.cpp
-            Features.cpp
             Mutation.cpp
             PairwiseAlignment.cpp
             Sequence.cpp
diff --git a/ext/src/ConsensusCore/Feature.cpp b/ext/src/ConsensusCore/Feature.cpp
deleted file mode 100644
index 78d9191..0000000
--- a/ext/src/ConsensusCore/Feature.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: David Alexander
-
-#include "Feature.hpp"
-
-#include <string>
-
-namespace ConsensusCore {
-
-#ifndef SWIG
-    template<>
-    Feature<float>::operator std::string() const
-    {
-        return "<Float feature>";
-    }
-
-    template<>
-    Feature<char>::operator std::string() const
-    {
-        return std::string(get(), Length());
-    }
-#endif  // !SWIG
-
-
-    template class ConsensusCore::Feature<char>;
-    template class ConsensusCore::Feature<float>;
-    template class ConsensusCore::Feature<int>;
-}
-
-
diff --git a/ext/src/ConsensusCore/Features.cpp b/ext/src/ConsensusCore/Features.cpp
deleted file mode 100644
index f65a12a..0000000
--- a/ext/src/ConsensusCore/Features.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted (subject to the limitations in the
-// disclaimer below) provided that the following conditions are met:
-//
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//
-//  * Redistributions in binary form must reproduce the above
-//    copyright notice, this list of conditions and the following
-//    disclaimer in the documentation and/or other materials provided
-//    with the distribution.
-//
-//  * Neither the name of Pacific Biosciences nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
-// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-// SUCH DAMAGE.
-
-// Author: David Alexander
-
-#include "Features.hpp"
-
-#include <algorithm>
-#include <string>
-
-#include "Feature.hpp"
-#include "Types.hpp"
-
-
-ConsensusCore::SequenceFeatures::SequenceFeatures(const std::string& seq)
-    : sequence_(seq.c_str(), seq.length())
-{}
-
-
diff --git a/ext/src/ConsensusCore/Matrix/DenseMatrix.cpp b/ext/src/ConsensusCore/Matrix/DenseMatrix.cpp
index 3d7d92d..0e3841e 100644
--- a/ext/src/ConsensusCore/Matrix/DenseMatrix.cpp
+++ b/ext/src/ConsensusCore/Matrix/DenseMatrix.cpp
@@ -39,7 +39,7 @@
 #include "Matrix/DenseMatrix.hpp"
 
 #include <algorithm>
-#include <boost/tuple/tuple.hpp>
+#include <tuple>
 #include <cassert>
 
 #include "LFloat.hpp"
@@ -73,7 +73,7 @@ namespace ConsensusCore {
         for (int col = 0; col < Columns(); ++col)
         {
             int start, end;
-            boost::tie(start, end) = UsedRowRange(col);
+            std::tie(start, end) = UsedRowRange(col);
             filledEntries += (end - start);
         }
         return filledEntries;
@@ -101,7 +101,7 @@ namespace ConsensusCore {
     {
         // make sure no used entries are outside of the bands
         int start, end;
-        boost::tie(start, end) = UsedRowRange(column);
+        std::tie(start, end) = UsedRowRange(column);
         assert(0 <= start && start <= end && end <= Rows());
         for (int i = 0; i < Rows(); i++)
         {
diff --git a/ext/src/ConsensusCore/Matrix/SparseMatrix.cpp b/ext/src/ConsensusCore/Matrix/SparseMatrix.cpp
index c397c57..d4432bd 100644
--- a/ext/src/ConsensusCore/Matrix/SparseMatrix.cpp
+++ b/ext/src/ConsensusCore/Matrix/SparseMatrix.cpp
@@ -38,7 +38,7 @@
 #include "Matrix/SparseMatrix.hpp"
 
 #include <algorithm>
-#include <boost/tuple/tuple.hpp>
+#include <tuple>
 
 namespace ConsensusCore {
     // Performance insensitive routines are not inlined
@@ -85,7 +85,7 @@ namespace ConsensusCore {
         for (int col = 0; col < Columns(); ++col)
         {
             int start, end;
-            boost::tie(start, end) = UsedRowRange(col);
+            std::tie(start, end) = UsedRowRange(col);
             filledEntries += (end - start);
         }
         return filledEntries;
diff --git a/ext/src/ConsensusCore/Poa/PoaConsensus.cpp b/ext/src/ConsensusCore/Poa/PoaConsensus.cpp
index bc0e615..04a849f 100644
--- a/ext/src/ConsensusCore/Poa/PoaConsensus.cpp
+++ b/ext/src/ConsensusCore/Poa/PoaConsensus.cpp
@@ -37,9 +37,6 @@
 
 #include "Poa/PoaConsensus.hpp"
 
-#include <boost/algorithm/string.hpp>
-#include <boost/foreach.hpp>
-#include <boost/tuple/tuple.hpp>
 #include <string>
 #include <utility>
 #include <vector>
@@ -47,8 +44,6 @@
 #include "Poa/PoaConfig.hpp"
 #include "Utils.hpp"
 
-using boost::tie;
-
 namespace ConsensusCore
 {
     PoaConsensus::PoaConsensus(const PoaConfig& config)
@@ -80,7 +75,7 @@ namespace ConsensusCore
             }
             pc->poaGraph_->AddSequence(read, config);
         }
-        boost::tie(pc->consensusSequence_, pc->score_, pc->variants_) =
+        std::tie(pc->consensusSequence_, pc->score_, pc->variants_) =
             pc->poaGraph_->FindConsensus(config);
         return pc;
     }
diff --git a/ext/src/ConsensusCore/Poa/PoaGraph.cpp b/ext/src/ConsensusCore/Poa/PoaGraph.cpp
index b31f334..080cd4b 100644
--- a/ext/src/ConsensusCore/Poa/PoaGraph.cpp
+++ b/ext/src/ConsensusCore/Poa/PoaGraph.cpp
@@ -45,7 +45,6 @@
 #include <boost/graph/adjacency_list.hpp>
 #include <boost/graph/graphviz.hpp>
 #include <boost/graph/topological_sort.hpp>
-#include <boost/tuple/tuple.hpp>
 #include <boost/utility.hpp>
 #include <cassert>
 #include <cfloat>
@@ -55,6 +54,8 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <unordered_set>
+#include <unordered_map>
 
 #include "Poa/PoaConfig.hpp"
 #include "Types.hpp"
@@ -201,7 +202,7 @@ namespace ConsensusCore
         {}
     };
 
-    typedef unordered_map<Vertex, const AlignmentColumn*> AlignmentColumnMap;
+    typedef std::unordered_map<Vertex, const AlignmentColumn*> AlignmentColumnMap;
 
     //
     // Graph::Impl methods
@@ -238,7 +239,7 @@ namespace ConsensusCore
         void AddSequence(const std::string& sequence, const PoaConfig& config);
 
         // TODO(dalexander): make this const
-        tuple<string, float, vector< pair<Mutation*, float> >*>
+        std::tuple<std::string, float, std::vector< std::pair<Mutation*, float> >*>
         FindConsensus(const PoaConfig& config);
 
         int NumSequences() const;
@@ -497,7 +498,7 @@ namespace ConsensusCore
         VertexInfoMap vertexInfoMap = get(vertex_info, g);
         std::list<Vertex> sortedVertices(num_vertices(g));
         topological_sort(g, sortedVertices.rbegin());
-        unordered_map<Vertex, Vertex> bestPrevVertex;
+        std::unordered_map<Vertex, Vertex> bestPrevVertex;
 
         // ignore ^ and $
         // TODO(dalexander): find a cleaner way to do this
@@ -696,11 +697,11 @@ namespace ConsensusCore
     }
 
 
-    static boost::unordered_set<Vertex>
+    static std::unordered_set<Vertex>
     childVertices(Vertex v,
                   BoostGraph& g)
     {
-        boost::unordered_set<Vertex> result;
+        std::unordered_set<Vertex> result;
         foreach (Edge e, out_edges(v, g))
         {
             result.insert(target(e, g));
@@ -708,11 +709,11 @@ namespace ConsensusCore
         return result;
     }
 
-    static boost::unordered_set<Vertex>
+    static std::unordered_set<Vertex>
     parentVertices(Vertex v,
                    BoostGraph& g)
     {
-        boost::unordered_set<Vertex> result;
+        std::unordered_set<Vertex> result;
         foreach (Edge e, in_edges(v, g))
         {
             result.insert(source(e, g));
@@ -721,7 +722,7 @@ namespace ConsensusCore
     }
 
 
-    tuple<string, float, vector< pair<Mutation*, float> >* >
+    std::tuple<string, float, vector< pair<Mutation*, float> >* >
     PoaGraph::Impl::FindConsensus(const PoaConfig& config)
     {
         std::stringstream ss;
@@ -736,14 +737,14 @@ namespace ConsensusCore
         // if requested, identify likely sequence variants
 
         // will be deallocated by PoaConsensus destructor.
-        vector< pair<Mutation*, float> >* variants = new vector< pair<Mutation*, float> >();
+        vector< std::pair<Mutation*, float> >* variants = new vector< pair<Mutation*, float> >();
 
         if (true)  // TODO(dalexander): Add a flag to PoaConfig
         {
             for (int i = 2; i < (int)bestPath.size() - 2; i++) // NOLINT
             {
                 Vertex v = bestPath[i];
-                boost::unordered_set<Vertex> children = childVertices(v, g_);
+                std::unordered_set<Vertex> children = childVertices(v, g_);
 
                 // Look for a direct edge from the current node to the node
                 // two spaces down---suggesting a deletion with respect to
@@ -758,7 +759,7 @@ namespace ConsensusCore
                 // This indicates we should try inserting the base at i + 1.
 
                 // Parents of (i + 1)
-                boost::unordered_set<Vertex> lookBack = parentVertices(bestPath[i + 1], g_);
+                std::unordered_set<Vertex> lookBack = parentVertices(bestPath[i + 1], g_);
 
                 // (We could do this in STL using std::set sorted on score, which would then
                 // provide an intersection mechanism (in <algorithm>) but that actually ends
@@ -768,7 +769,7 @@ namespace ConsensusCore
 
                 foreach (Vertex v, children)
                 {
-                    boost::unordered_set<Vertex>::iterator found = lookBack.find(v);
+                    std::unordered_set<Vertex>::iterator found = lookBack.find(v);
                     if (found != lookBack.end())
                     {
                         float score = vertexInfoMap_[*found]->Score;
@@ -800,7 +801,7 @@ namespace ConsensusCore
                 {
                     if (v == bestPath[i + 1]) continue;
 
-                    boost::unordered_set<Vertex>::iterator found = lookBack.find(v);
+                    std::unordered_set<Vertex>::iterator found = lookBack.find(v);
                     if (found != lookBack.end())
                     {
                         float score = vertexInfoMap_[*found]->Score;
@@ -824,7 +825,7 @@ namespace ConsensusCore
             }
         }
 
-        return boost::make_tuple(ss.str(), 0.0f, variants);  // TODO(dalexander): where do we get scores?
+        return std::tuple<string, float, vector< pair<Mutation*, float> >* >{ ss.str(), 0.0f, variants };  // TODO(dalexander): where do we get scores?
     }
 
     inline int
@@ -865,7 +866,7 @@ namespace ConsensusCore
         return impl->NumSequences();
     }
 
-    tuple<string, float, std::vector< std::pair<Mutation*, float> >* >
+    std::tuple<string, float, std::vector< std::pair<Mutation*, float> >* >
     PoaGraph::FindConsensus(const PoaConfig& config) const
     {
         return impl->FindConsensus(config);
diff --git a/ext/src/ConsensusCore/Version.cpp b/ext/src/ConsensusCore/Version.cpp
index 898fe96..eacf29a 100644
--- a/ext/src/ConsensusCore/Version.cpp
+++ b/ext/src/ConsensusCore/Version.cpp
@@ -38,8 +38,6 @@
 #include <string>
 #include <vector>
 #include <boost/format.hpp>
-#include <boost/tuple/tuple.hpp>
-#include <boost/tuple/tuple_comparison.hpp>
 
 #include "ConsensusCore/Version.hpp"
 
diff --git a/manual.html b/manual.html
index e94fbe6..44b5c2b 100644
--- a/manual.html
+++ b/manual.html
@@ -1,6 +1,6 @@
 <html>
 <head>
-    <title>SPAdes 3.10.1 Manual</title>
+    <title>SPAdes 3.11.1 Manual</title>
     <style type="text/css">
         .code {
             background-color: lightgray;
@@ -8,7 +8,7 @@
     </style>
 </head>
 <body>
-<h1>SPAdes 3.10.1 Manual</h1>
+<h1>SPAdes 3.11.1 Manual</h1>
 
 1. <a href="#sec1">About SPAdes</a><br>
     1.1. <a href="#sec1.1">Supported data types</a><br>
@@ -35,16 +35,16 @@
 <h2>1. About SPAdes</h2>
 <p>
     SPAdes – St. Petersburg genome assembler – is an assembly toolkit containing various assembly pipelines. This manual will help you to install and run SPAdes. 
-SPAdes version 3.10.1 was released under GPLv2 on March 1, 2017 and can be downloaded from  <a href="http://cab.spbu.ru/software/spades/" target="_blank">http://cab.spbu.ru/software/spades/</a>.
+SPAdes version 3.11.1 was released under GPLv2 on October 1, 2017 and can be downloaded from  <a href="http://cab.spbu.ru/software/spades/" target="_blank">http://cab.spbu.ru/software/spades/</a>.
 
 <a name="sec1.1"></a>
 <h3>1.1 Supported data types</h3>
 <p>
     The current version of SPAdes works with Illumina or IonTorrent reads and is capable of providing hybrid assemblies using PacBio, Oxford Nanopore and Sanger reads. You can also provide additional contigs that will be used as long reads.
 <p>
-    Version 3.10.1 of SPAdes supports paired-end reads, mate-pairs and unpaired reads. SPAdes can take as input several paired-end and mate-pair libraries simultaneously. Note, that SPAdes was initially designed for small genomes. It was tested on bacterial (both single-cell MDA and standard isolates), fungal and other small genomes. SPAdes is not intended for larger genomes (e.g. mammalian size genomes). For such purposes you can use it at your own risk.
+    Version 3.11.1 of SPAdes supports paired-end reads, mate-pairs and unpaired reads. SPAdes can take as input several paired-end and mate-pair libraries simultaneously. Note, that SPAdes was initially designed for small genomes. It was tested on bacterial (both single-cell MDA and standard isolates), fungal and other small genomes. SPAdes is not intended for larger genomes (e.g. mammalian size genomes). For such purposes you can use it at your own risk.
 <p>
-    SPAdes 3.10.1 includes the following additional pipelines:
+    SPAdes 3.11.1 includes the following additional pipelines:
     <ul>
         <li>dipSPAdes – a module for assembling highly polymorphic diploid genomes (see <a href="dipspades_manual.html" target="_blank">dipSPAdes manual</a>).</li>
         <li>metaSPAdes – a pipeline for metagenomic data sets (see <a href="#meta">metaSPAdes options</a>). </li>
@@ -98,42 +98,42 @@ SPAdes comes in several separate modules:
 
         <tr>
             <td> BayesHammer </td>
-            <td align="center"> 29m </td>
+            <td align="center"> 26m </td>
             <td align="center"> 7.1 </td>
             <td align="center"> 11 </td>
-            <td align="center"> 34m </td>
-            <td align="center"> 7.6 </td>
-            <td align="center"> 8.8 </td>
+            <td align="center"> 31m </td>
+            <td align="center"> 6.9 </td>
+            <td align="center"> 11.3 </td>
         </tr>
 
         <tr>
             <td> SPAdes </td>
-            <td align="center"> 11m </td>
-            <td align="center"> 8.4 </td>
-            <td align="center"> 1.6 </td>
-            <td align="center"> 17m </td>
-            <td align="center"> 8 </td>
-            <td align="center"> 3.0 </td>
+            <td align="center"> 8m </td>
+            <td align="center"> 8.1 </td>
+            <td align="center"> 1.5 </td>
+            <td align="center"> 12m </td>
+            <td align="center"> 7.9 </td>
+            <td align="center"> 2.6 </td>
         </tr>
 
         <tr>
             <td> MismatchCorrector </td>
-            <td align="center"> 13m </td>
+            <td align="center"> 20m </td>
             <td align="center"> 1.8 </td>
-            <td align="center"> 27.1 </td>
-            <td align="center"> 16m </td>
+            <td align="center"> 27.7 </td>
+            <td align="center"> 25m </td>
             <td align="center"> 1.8 </td>
-            <td align="center"> 25.5 </td>
+            <td align="center"> 28.3 </td>
         </tr>
 
         <tr>
             <td> Whole pipeline </td>
-            <td align="center"> 53m </td>
-            <td align="center"> 8.4 </td>
-            <td align="center"> 29.6 </td>
-            <td align="center"> 1h 7m </td>
-            <td align="center"> 8 </td>
-            <td align="center"> 28.3 </td>
+            <td align="center"> 54m </td>
+            <td align="center"> 8.1 </td>
+            <td align="center"> 30.2 </td>
+            <td align="center"> 1h 8m </td>
+            <td align="center"> 7.9 </td>
+            <td align="center"> 31.1 </td>
         </tr>
     </table>
 
@@ -143,7 +143,7 @@ SPAdes comes in several separate modules:
         <li> Running SPAdes without preliminary read error correction (e.g. without BayesHammer or IonHammer) will likely require more time and memory. </li>
         <li> Each module removes its temporary files as soon as it finishes. </li>
         <li> SPAdes uses 512 Mb per thread for buffers, which results in higher memory consumption. If you set memory limit manually, SPAdes will use smaller buffers and thus less RAM. </li>
-        <li> Performance statistics is given for SPAdes version 3.10.1. </li>
+        <li> Performance statistics is given for SPAdes version 3.11.1. </li>
     </ul>
 
 
@@ -151,19 +151,19 @@ SPAdes comes in several separate modules:
 <h2>2. Installation</h2>
 <p>
 
-    SPAdes requires a 64-bit Linux system or Mac OS and Python (supported versions are 2.4, 2.5, 2.6, 2.7, 3.2, 3.3, 3.4 and 3.5) to be pre-installed on it. To obtain SPAdes you can either download binaries or download source code and compile it yourself.
+    SPAdes requires a 64-bit Linux system or Mac OS and Python (supported versions are Python2: 2.4–2.7, and Python3: 3.2 and higher) to be pre-installed on it. To obtain SPAdes you can either download binaries or download source code and compile it yourself.
 
 <a name="sec2.1"></a>
 <h3>2.1 Downloading SPAdes Linux binaries</h3>
 
 <p>
-    To download <a href="http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1-Linux.tar.gz">SPAdes Linux binaries</a> and extract them, go to the directory in which you wish SPAdes to be installed and run:
+    To download <a href="http://cab.spbu.ru/files/release3.11.1/SPAdes-3.11.1-Linux.tar.gz">SPAdes Linux binaries</a> and extract them, go to the directory in which you wish SPAdes to be installed and run:
 
 <pre  class="code">
 <code>
-    wget http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1-Linux.tar.gz
-    tar -xzf SPAdes-3.10.1-Linux.tar.gz
-    cd SPAdes-3.10.1-Linux/bin/
+    wget http://cab.spbu.ru/files/release3.11.1/SPAdes-3.11.1-Linux.tar.gz
+    tar -xzf SPAdes-3.11.1-Linux.tar.gz
+    cd SPAdes-3.11.1-Linux/bin/
 </code>
 </pre>
 
@@ -192,13 +192,13 @@ SPAdes comes in several separate modules:
 <h3>2.2 Downloading SPAdes binaries for Mac</h3>
 
 <p>
-    To obtain <a href="http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1-Darwin.tar.gz">SPAdes binaries for Mac</a>, go to the directory in which you wish SPAdes to be installed and run:
+    To obtain <a href="http://cab.spbu.ru/files/release3.11.1/SPAdes-3.11.1-Darwin.tar.gz">SPAdes binaries for Mac</a>, go to the directory in which you wish SPAdes to be installed and run:
 
 <pre  class="code">
 <code>
-    curl http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1-Darwin.tar.gz -o SPAdes-3.10.1-Darwin.tar.gz
-    tar -zxf SPAdes-3.10.1-Darwin.tar.gz
-    cd SPAdes-3.10.1-Darwin/bin/
+    curl http://cab.spbu.ru/files/release3.11.1/SPAdes-3.11.1-Darwin.tar.gz -o SPAdes-3.11.1-Darwin.tar.gz
+    tar -zxf SPAdes-3.11.1-Darwin.tar.gz
+    cd SPAdes-3.11.1-Darwin/bin/
 </code>
 </pre>
 
@@ -236,13 +236,13 @@ SPAdes comes in several separate modules:
     </ul>
 
 <p>
-    If you meet these requirements, you can download the <a href="http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1.tar.gz">SPAdes source code</a>: 
+    If you meet these requirements, you can download the <a href="http://cab.spbu.ru/files/release3.11.1/SPAdes-3.11.1.tar.gz">SPAdes source code</a>: 
 
 <pre class="code">
 <code>
-    wget http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1.tar.gz
-    tar -xzf SPAdes-3.10.1.tar.gz
-    cd SPAdes-3.10.1
+    wget http://cab.spbu.ru/files/release3.11.1/SPAdes-3.11.1.tar.gz
+    tar -xzf SPAdes-3.11.1.tar.gz
+    cd SPAdes-3.11.1
 </code>
 </pre>
 
@@ -354,7 +354,7 @@ Thank you for using SPAdes!
     SPAdes takes as input paired-end reads, mate-pairs and single (unpaired) reads in FASTA and FASTQ. For IonTorrent data SPAdes also supports unpaired reads in unmapped BAM format (like the one produced by Torrent Server). However, in order to run read error correction, reads should be in FASTQ or BAM format. Sanger, Oxford Nanopore and PacBio CLR reads can be provided in both formats since SPAdes does not run error correction for these types of data.
 
 <p>
-    To run SPAdes 3.10.1 you need at least one library of the following types:
+    To run SPAdes 3.11.1 you need at least one library of the following types:
     <ul>
         <li>Illumina paired-end/high-quality mate-pairs/unpaired reads</li>
         <li>IonTorrent paired-end/high-quality mate-pairs/unpaired reads</li>
@@ -382,6 +382,7 @@ SPAdes supports mate-pair only assembly. However, we recommend to use only high-
 
 <p> Notes:
     <ul>
+        <li>It is strongly suggested to provide multiple paired-end and mate-pair libraries according to their insert size (from smallest to longest).
         <li>It is not recommended to run SPAdes on PacBio reads with low coverage (less than 5).</li>
         <li>We suggest not to run SPAdes on PacBio reads for large genomes.</li>
         <li>SPAdes accepts gzip-compressed files.</li>
@@ -1209,13 +1210,10 @@ we append suffix <code>_component_X</code>, where <code>X</code> is the id of th
 <a name="sec5">
 <h2>5. Feedback and bug reports</h2>
 <p>
-    Your comments, bug reports, and suggestions are very welcomed. They will help us to further improve SPAdes.
-
-<p>
-    If you have any troubles running SPAdes, please send us <code>params.txt</code> and <code>spades.log</code> from the directory <code><output_dir></code>.
+    Your comments, bug reports, and suggestions are very welcomed. They will help us to further improve SPAdes. If you have any troubles running SPAdes, please send us <code>params.txt</code> and <code>spades.log</code> from the directory <code><output_dir></code>. 
 
 <p>
-    Address for communications: <a href="mailto:spades.support at cab.spbu.ru" target="_blank">spades.support at cab.spbu.ru</a>.
+    You leave you comments and bug repoers at <a href="https://github.com/ablab/spades/issues" target="_blank">our GitHub repository tracker</a> or sent it via e-mail: <a href="mailto:spades.support at cab.spbu.ru" target="_blank">spades.support at cab.spbu.ru</a>.
 
 <br/><br/><br/><br/><br/>
 
diff --git a/metaspades.py b/metaspades.py
index ff31c92..8c96a80 100755
--- a/metaspades.py
+++ b/metaspades.py
@@ -158,7 +158,7 @@ def print_used_values(cfg, log):
 
 
 def fill_cfg(options_to_parse, log, secondary_filling=False):
-    skip_output_dir=secondary_filling
+    skip_output_dir = secondary_filling
     skip_stop_after = secondary_filling
     load_processed_dataset=secondary_filling
 
@@ -211,6 +211,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.configs_dir = support.check_dir_existence(arg)
         elif opt == "--reference":
             options_storage.reference = support.check_file_existence(arg, 'reference', log)
+            options_storage.developer_mode = True
         elif opt == "--series-analysis":
             options_storage.series_analysis = support.check_file_existence(arg, 'series-analysis', log)
         elif opt == "--dataset":
@@ -239,8 +240,19 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.large_genome = True
         elif opt == "--plasmid":
             options_storage.plasmid = True
+
         elif opt == "--rna":
             options_storage.rna = True
+        elif opt.startswith("--ss-"):  # strand specificity, RNA-Seq only
+            if opt == "--ss-rf":
+                options_storage.strand_specific = True
+            elif opt == "--ss-fr":
+                options_storage.strand_specific = False
+        elif opt == "--fast":  # fast run, RNA-Seq only
+            options_storage.fast = True
+        elif opt == "--fast:false":
+            options_storage.fast = False
+
         elif opt == "--iontorrent":
             options_storage.iontorrent = True
         elif opt == "--disable-gzip-output":
@@ -295,6 +307,8 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong PHRED quality offset value: ' + arg +
                               ' (should be either 33, 64, or \'auto\')', log)
+        elif opt == "--save-gp":
+            options_storage.save_gp = True
         elif opt == "--cov-cutoff":
             if arg == 'auto' or arg == 'off':
                 options_storage.cov_cutoff = arg
@@ -303,6 +317,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong value for --cov-cutoff option: ' + arg +
                               ' (should be a positive float number, or \'auto\', or \'off\')', log)
+        elif opt == "--hidden-cov-cutoff":
+            if support.is_float(arg) and float(arg) > 0.0:
+                options_storage.lcer_cutoff = float(arg)
+            else:
+                support.error('wrong value for --hidden-cov-cutoff option: ' + arg +
+                              ' (should be a positive float number)', log)
         elif opt == '-i' or opt == "--iterations":
             options_storage.iterations = int(arg)
 
@@ -356,7 +376,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("the output_dir should exist for --continue and for --restart-from!", log)
         os.makedirs(options_storage.output_dir)
     if options_storage.restart_from:
-        if options_storage.continue_mode: # saving parameters specified with --restart-from
+        if options_storage.continue_mode:  # saving parameters specified with --restart-from
             if not support.dataset_is_empty(dataset_data):
                 support.error("you cannot specify reads with --restart-from option!", log)
             options_storage.save_restart_options(log)
@@ -370,6 +390,10 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("you cannot specify --careful in RNA-Seq mode!", log)
         if options_storage.k_mers and options_storage.k_mers != 'auto' and len(options_storage.k_mers) > 1:
             support.error("you cannot specify multiple k-mer sizes in RNA-Seq mode!", log)
+    if [options_storage.meta, options_storage.large_genome, options_storage.truseq_mode,
+       options_storage.rna, options_storage.plasmid, options_storage.single_cell].count(True) > 1:
+        support.error("you cannot simultaneously use more than one mode out of "
+                      "Metagenomic, Large genome, Illumina TruSeq, RNA-Seq, Plasmid, and Single-cell!", log)
     if options_storage.continue_mode:
         return None, None
 
@@ -403,11 +427,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if len(dataset_data) != len(support.get_lib_ids_by_type(dataset_data, spades_logic.READS_TYPES_USED_IN_RNA_SEQ)):
             support.error('you cannot specify any data types except ' +
                           ', '.join(spades_logic.READS_TYPES_USED_IN_RNA_SEQ) + ' in RNA-Seq mode!')
-        if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
-            support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
+        #if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
+        #    support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
 
     if existing_dataset_data is None:
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
 
     options_storage.set_default_values()
     ### FILLING cfg
@@ -454,12 +479,27 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if options_storage.k_mers:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers
         elif options_storage.rna:
-            cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_RNA
+            k_value = options_storage.K_MERS_RNA[0]
+            if not options_storage.iontorrent:
+                k_value = int(support.get_reads_length(dataset_data, log) / 2) - 1
+                if k_value % 2 == 0:
+                    k_value -= 1
+                if k_value < options_storage.MIN_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too small, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MIN_K))
+                    k_value = options_storage.MIN_K
+                if k_value > options_storage.MAX_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too large, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MAX_K))
+                    k_value = options_storage.MAX_K
+            cfg["assembly"].__dict__["iterative_K"] = k_value
         else:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_SHORT
         cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr
         cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode
         cfg["assembly"].__dict__["cov_cutoff"] = options_storage.cov_cutoff
+        cfg["assembly"].__dict__["lcer_cutoff"] = options_storage.lcer_cutoff
+        cfg["assembly"].__dict__["save_gp"] = options_storage.save_gp
         if options_storage.spades_heap_check:
             cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check
         if options_storage.read_buffer_size:
@@ -658,7 +698,8 @@ def main(args):
         if support.dataset_has_additional_contigs(dataset_data):
             dataset_data = support.process_Ns_in_additional_contigs(dataset_data, dir_for_split_reads, log)
         options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
         cfg["dataset"].yaml_filename = options_storage.dataset_yaml_filename
 
     try:
@@ -894,29 +935,37 @@ def main(args):
             if "assembly" in cfg and os.path.isfile(result_contigs_filename):
                 message = " * Assembled contigs are in " + support.process_spaces(result_contigs_filename)
                 log.info(message)
-            if options_storage.rna:
-                if "assembly" in cfg and os.path.isfile(result_transcripts_filename):
+            if options_storage.rna and "assembly" in cfg:
+                if os.path.isfile(result_transcripts_filename):
                     message = " * Assembled transcripts are in " + support.process_spaces(result_transcripts_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_transcripts_paths_filename):
+                if os.path.isfile(result_transcripts_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the transcripts are in " + \
                               support.process_spaces(result_transcripts_paths_filename)
                     log.info(message)
-            else:
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_filename):
+                for filtering_type in options_storage.filtering_types:
+                    result_filtered_transcripts_filename = os.path.join(cfg["common"].output_dir,
+                                                                        filtering_type + "_filtered_" +
+                                                                        options_storage.transcripts_name)
+                    if os.path.isfile(result_filtered_transcripts_filename):
+                        message = " * " + filtering_type.capitalize() + " filtered transcripts are in " + \
+                                  support.process_spaces(result_filtered_transcripts_filename)
+                        log.info(message)
+            elif "assembly" in cfg:
+                if os.path.isfile(result_scaffolds_filename):
                     message = " * Assembled scaffolds are in " + support.process_spaces(result_scaffolds_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename):
+                if os.path.isfile(result_assembly_graph_filename):
                     message = " * Assembly graph is in " + support.process_spaces(result_assembly_graph_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename_gfa):
+                if os.path.isfile(result_assembly_graph_filename_gfa):
                     message = " * Assembly graph in GFA format is in " + support.process_spaces(result_assembly_graph_filename_gfa)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_contigs_paths_filename):
+                if os.path.isfile(result_contigs_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the contigs are in " + \
                               support.process_spaces(result_contigs_paths_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_paths_filename):
+                if os.path.isfile(result_scaffolds_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the scaffolds are in " + \
                               support.process_spaces(result_scaffolds_paths_filename)
                     log.info(message)
diff --git a/plasmidspades.py b/plasmidspades.py
index ff31c92..8c96a80 100755
--- a/plasmidspades.py
+++ b/plasmidspades.py
@@ -158,7 +158,7 @@ def print_used_values(cfg, log):
 
 
 def fill_cfg(options_to_parse, log, secondary_filling=False):
-    skip_output_dir=secondary_filling
+    skip_output_dir = secondary_filling
     skip_stop_after = secondary_filling
     load_processed_dataset=secondary_filling
 
@@ -211,6 +211,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.configs_dir = support.check_dir_existence(arg)
         elif opt == "--reference":
             options_storage.reference = support.check_file_existence(arg, 'reference', log)
+            options_storage.developer_mode = True
         elif opt == "--series-analysis":
             options_storage.series_analysis = support.check_file_existence(arg, 'series-analysis', log)
         elif opt == "--dataset":
@@ -239,8 +240,19 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.large_genome = True
         elif opt == "--plasmid":
             options_storage.plasmid = True
+
         elif opt == "--rna":
             options_storage.rna = True
+        elif opt.startswith("--ss-"):  # strand specificity, RNA-Seq only
+            if opt == "--ss-rf":
+                options_storage.strand_specific = True
+            elif opt == "--ss-fr":
+                options_storage.strand_specific = False
+        elif opt == "--fast":  # fast run, RNA-Seq only
+            options_storage.fast = True
+        elif opt == "--fast:false":
+            options_storage.fast = False
+
         elif opt == "--iontorrent":
             options_storage.iontorrent = True
         elif opt == "--disable-gzip-output":
@@ -295,6 +307,8 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong PHRED quality offset value: ' + arg +
                               ' (should be either 33, 64, or \'auto\')', log)
+        elif opt == "--save-gp":
+            options_storage.save_gp = True
         elif opt == "--cov-cutoff":
             if arg == 'auto' or arg == 'off':
                 options_storage.cov_cutoff = arg
@@ -303,6 +317,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong value for --cov-cutoff option: ' + arg +
                               ' (should be a positive float number, or \'auto\', or \'off\')', log)
+        elif opt == "--hidden-cov-cutoff":
+            if support.is_float(arg) and float(arg) > 0.0:
+                options_storage.lcer_cutoff = float(arg)
+            else:
+                support.error('wrong value for --hidden-cov-cutoff option: ' + arg +
+                              ' (should be a positive float number)', log)
         elif opt == '-i' or opt == "--iterations":
             options_storage.iterations = int(arg)
 
@@ -356,7 +376,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("the output_dir should exist for --continue and for --restart-from!", log)
         os.makedirs(options_storage.output_dir)
     if options_storage.restart_from:
-        if options_storage.continue_mode: # saving parameters specified with --restart-from
+        if options_storage.continue_mode:  # saving parameters specified with --restart-from
             if not support.dataset_is_empty(dataset_data):
                 support.error("you cannot specify reads with --restart-from option!", log)
             options_storage.save_restart_options(log)
@@ -370,6 +390,10 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("you cannot specify --careful in RNA-Seq mode!", log)
         if options_storage.k_mers and options_storage.k_mers != 'auto' and len(options_storage.k_mers) > 1:
             support.error("you cannot specify multiple k-mer sizes in RNA-Seq mode!", log)
+    if [options_storage.meta, options_storage.large_genome, options_storage.truseq_mode,
+       options_storage.rna, options_storage.plasmid, options_storage.single_cell].count(True) > 1:
+        support.error("you cannot simultaneously use more than one mode out of "
+                      "Metagenomic, Large genome, Illumina TruSeq, RNA-Seq, Plasmid, and Single-cell!", log)
     if options_storage.continue_mode:
         return None, None
 
@@ -403,11 +427,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if len(dataset_data) != len(support.get_lib_ids_by_type(dataset_data, spades_logic.READS_TYPES_USED_IN_RNA_SEQ)):
             support.error('you cannot specify any data types except ' +
                           ', '.join(spades_logic.READS_TYPES_USED_IN_RNA_SEQ) + ' in RNA-Seq mode!')
-        if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
-            support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
+        #if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
+        #    support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
 
     if existing_dataset_data is None:
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
 
     options_storage.set_default_values()
     ### FILLING cfg
@@ -454,12 +479,27 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if options_storage.k_mers:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers
         elif options_storage.rna:
-            cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_RNA
+            k_value = options_storage.K_MERS_RNA[0]
+            if not options_storage.iontorrent:
+                k_value = int(support.get_reads_length(dataset_data, log) / 2) - 1
+                if k_value % 2 == 0:
+                    k_value -= 1
+                if k_value < options_storage.MIN_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too small, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MIN_K))
+                    k_value = options_storage.MIN_K
+                if k_value > options_storage.MAX_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too large, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MAX_K))
+                    k_value = options_storage.MAX_K
+            cfg["assembly"].__dict__["iterative_K"] = k_value
         else:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_SHORT
         cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr
         cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode
         cfg["assembly"].__dict__["cov_cutoff"] = options_storage.cov_cutoff
+        cfg["assembly"].__dict__["lcer_cutoff"] = options_storage.lcer_cutoff
+        cfg["assembly"].__dict__["save_gp"] = options_storage.save_gp
         if options_storage.spades_heap_check:
             cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check
         if options_storage.read_buffer_size:
@@ -658,7 +698,8 @@ def main(args):
         if support.dataset_has_additional_contigs(dataset_data):
             dataset_data = support.process_Ns_in_additional_contigs(dataset_data, dir_for_split_reads, log)
         options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
         cfg["dataset"].yaml_filename = options_storage.dataset_yaml_filename
 
     try:
@@ -894,29 +935,37 @@ def main(args):
             if "assembly" in cfg and os.path.isfile(result_contigs_filename):
                 message = " * Assembled contigs are in " + support.process_spaces(result_contigs_filename)
                 log.info(message)
-            if options_storage.rna:
-                if "assembly" in cfg and os.path.isfile(result_transcripts_filename):
+            if options_storage.rna and "assembly" in cfg:
+                if os.path.isfile(result_transcripts_filename):
                     message = " * Assembled transcripts are in " + support.process_spaces(result_transcripts_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_transcripts_paths_filename):
+                if os.path.isfile(result_transcripts_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the transcripts are in " + \
                               support.process_spaces(result_transcripts_paths_filename)
                     log.info(message)
-            else:
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_filename):
+                for filtering_type in options_storage.filtering_types:
+                    result_filtered_transcripts_filename = os.path.join(cfg["common"].output_dir,
+                                                                        filtering_type + "_filtered_" +
+                                                                        options_storage.transcripts_name)
+                    if os.path.isfile(result_filtered_transcripts_filename):
+                        message = " * " + filtering_type.capitalize() + " filtered transcripts are in " + \
+                                  support.process_spaces(result_filtered_transcripts_filename)
+                        log.info(message)
+            elif "assembly" in cfg:
+                if os.path.isfile(result_scaffolds_filename):
                     message = " * Assembled scaffolds are in " + support.process_spaces(result_scaffolds_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename):
+                if os.path.isfile(result_assembly_graph_filename):
                     message = " * Assembly graph is in " + support.process_spaces(result_assembly_graph_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename_gfa):
+                if os.path.isfile(result_assembly_graph_filename_gfa):
                     message = " * Assembly graph in GFA format is in " + support.process_spaces(result_assembly_graph_filename_gfa)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_contigs_paths_filename):
+                if os.path.isfile(result_contigs_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the contigs are in " + \
                               support.process_spaces(result_contigs_paths_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_paths_filename):
+                if os.path.isfile(result_scaffolds_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the scaffolds are in " + \
                               support.process_spaces(result_scaffolds_paths_filename)
                     log.info(message)
diff --git a/rnaspades.py b/rnaspades.py
index ff31c92..8c96a80 100755
--- a/rnaspades.py
+++ b/rnaspades.py
@@ -158,7 +158,7 @@ def print_used_values(cfg, log):
 
 
 def fill_cfg(options_to_parse, log, secondary_filling=False):
-    skip_output_dir=secondary_filling
+    skip_output_dir = secondary_filling
     skip_stop_after = secondary_filling
     load_processed_dataset=secondary_filling
 
@@ -211,6 +211,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.configs_dir = support.check_dir_existence(arg)
         elif opt == "--reference":
             options_storage.reference = support.check_file_existence(arg, 'reference', log)
+            options_storage.developer_mode = True
         elif opt == "--series-analysis":
             options_storage.series_analysis = support.check_file_existence(arg, 'series-analysis', log)
         elif opt == "--dataset":
@@ -239,8 +240,19 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.large_genome = True
         elif opt == "--plasmid":
             options_storage.plasmid = True
+
         elif opt == "--rna":
             options_storage.rna = True
+        elif opt.startswith("--ss-"):  # strand specificity, RNA-Seq only
+            if opt == "--ss-rf":
+                options_storage.strand_specific = True
+            elif opt == "--ss-fr":
+                options_storage.strand_specific = False
+        elif opt == "--fast":  # fast run, RNA-Seq only
+            options_storage.fast = True
+        elif opt == "--fast:false":
+            options_storage.fast = False
+
         elif opt == "--iontorrent":
             options_storage.iontorrent = True
         elif opt == "--disable-gzip-output":
@@ -295,6 +307,8 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong PHRED quality offset value: ' + arg +
                               ' (should be either 33, 64, or \'auto\')', log)
+        elif opt == "--save-gp":
+            options_storage.save_gp = True
         elif opt == "--cov-cutoff":
             if arg == 'auto' or arg == 'off':
                 options_storage.cov_cutoff = arg
@@ -303,6 +317,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong value for --cov-cutoff option: ' + arg +
                               ' (should be a positive float number, or \'auto\', or \'off\')', log)
+        elif opt == "--hidden-cov-cutoff":
+            if support.is_float(arg) and float(arg) > 0.0:
+                options_storage.lcer_cutoff = float(arg)
+            else:
+                support.error('wrong value for --hidden-cov-cutoff option: ' + arg +
+                              ' (should be a positive float number)', log)
         elif opt == '-i' or opt == "--iterations":
             options_storage.iterations = int(arg)
 
@@ -356,7 +376,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("the output_dir should exist for --continue and for --restart-from!", log)
         os.makedirs(options_storage.output_dir)
     if options_storage.restart_from:
-        if options_storage.continue_mode: # saving parameters specified with --restart-from
+        if options_storage.continue_mode:  # saving parameters specified with --restart-from
             if not support.dataset_is_empty(dataset_data):
                 support.error("you cannot specify reads with --restart-from option!", log)
             options_storage.save_restart_options(log)
@@ -370,6 +390,10 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("you cannot specify --careful in RNA-Seq mode!", log)
         if options_storage.k_mers and options_storage.k_mers != 'auto' and len(options_storage.k_mers) > 1:
             support.error("you cannot specify multiple k-mer sizes in RNA-Seq mode!", log)
+    if [options_storage.meta, options_storage.large_genome, options_storage.truseq_mode,
+       options_storage.rna, options_storage.plasmid, options_storage.single_cell].count(True) > 1:
+        support.error("you cannot simultaneously use more than one mode out of "
+                      "Metagenomic, Large genome, Illumina TruSeq, RNA-Seq, Plasmid, and Single-cell!", log)
     if options_storage.continue_mode:
         return None, None
 
@@ -403,11 +427,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if len(dataset_data) != len(support.get_lib_ids_by_type(dataset_data, spades_logic.READS_TYPES_USED_IN_RNA_SEQ)):
             support.error('you cannot specify any data types except ' +
                           ', '.join(spades_logic.READS_TYPES_USED_IN_RNA_SEQ) + ' in RNA-Seq mode!')
-        if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
-            support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
+        #if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
+        #    support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
 
     if existing_dataset_data is None:
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
 
     options_storage.set_default_values()
     ### FILLING cfg
@@ -454,12 +479,27 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if options_storage.k_mers:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers
         elif options_storage.rna:
-            cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_RNA
+            k_value = options_storage.K_MERS_RNA[0]
+            if not options_storage.iontorrent:
+                k_value = int(support.get_reads_length(dataset_data, log) / 2) - 1
+                if k_value % 2 == 0:
+                    k_value -= 1
+                if k_value < options_storage.MIN_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too small, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MIN_K))
+                    k_value = options_storage.MIN_K
+                if k_value > options_storage.MAX_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too large, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MAX_K))
+                    k_value = options_storage.MAX_K
+            cfg["assembly"].__dict__["iterative_K"] = k_value
         else:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_SHORT
         cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr
         cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode
         cfg["assembly"].__dict__["cov_cutoff"] = options_storage.cov_cutoff
+        cfg["assembly"].__dict__["lcer_cutoff"] = options_storage.lcer_cutoff
+        cfg["assembly"].__dict__["save_gp"] = options_storage.save_gp
         if options_storage.spades_heap_check:
             cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check
         if options_storage.read_buffer_size:
@@ -658,7 +698,8 @@ def main(args):
         if support.dataset_has_additional_contigs(dataset_data):
             dataset_data = support.process_Ns_in_additional_contigs(dataset_data, dir_for_split_reads, log)
         options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
         cfg["dataset"].yaml_filename = options_storage.dataset_yaml_filename
 
     try:
@@ -894,29 +935,37 @@ def main(args):
             if "assembly" in cfg and os.path.isfile(result_contigs_filename):
                 message = " * Assembled contigs are in " + support.process_spaces(result_contigs_filename)
                 log.info(message)
-            if options_storage.rna:
-                if "assembly" in cfg and os.path.isfile(result_transcripts_filename):
+            if options_storage.rna and "assembly" in cfg:
+                if os.path.isfile(result_transcripts_filename):
                     message = " * Assembled transcripts are in " + support.process_spaces(result_transcripts_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_transcripts_paths_filename):
+                if os.path.isfile(result_transcripts_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the transcripts are in " + \
                               support.process_spaces(result_transcripts_paths_filename)
                     log.info(message)
-            else:
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_filename):
+                for filtering_type in options_storage.filtering_types:
+                    result_filtered_transcripts_filename = os.path.join(cfg["common"].output_dir,
+                                                                        filtering_type + "_filtered_" +
+                                                                        options_storage.transcripts_name)
+                    if os.path.isfile(result_filtered_transcripts_filename):
+                        message = " * " + filtering_type.capitalize() + " filtered transcripts are in " + \
+                                  support.process_spaces(result_filtered_transcripts_filename)
+                        log.info(message)
+            elif "assembly" in cfg:
+                if os.path.isfile(result_scaffolds_filename):
                     message = " * Assembled scaffolds are in " + support.process_spaces(result_scaffolds_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename):
+                if os.path.isfile(result_assembly_graph_filename):
                     message = " * Assembly graph is in " + support.process_spaces(result_assembly_graph_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename_gfa):
+                if os.path.isfile(result_assembly_graph_filename_gfa):
                     message = " * Assembly graph in GFA format is in " + support.process_spaces(result_assembly_graph_filename_gfa)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_contigs_paths_filename):
+                if os.path.isfile(result_contigs_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the contigs are in " + \
                               support.process_spaces(result_contigs_paths_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_paths_filename):
+                if os.path.isfile(result_scaffolds_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the scaffolds are in " + \
                               support.process_spaces(result_scaffolds_paths_filename)
                     log.info(message)
diff --git a/rnaspades_manual.html b/rnaspades_manual.html
index 5a23b1e..6cd729e 100644
--- a/rnaspades_manual.html
+++ b/rnaspades_manual.html
@@ -13,7 +13,9 @@
 1. <a href="#sec1">About rnaSPAdes</a><br>
 2. <a href="#sec2">rnaSPAdes specifics</a><br>
     2.1. <a href="#sec2.1">Running rnaSPAdes</a><br>
-    2.2. <a href="#sec2.2">rnaSPAdes output</a><br>
+    2.2. <a href="#sec2.2">rnaSPAdes-specific options</a><br>
+    2.2. <a href="#sec2.3">Assemblying strand-specific RNA-Seq</a><br>
+    2.3. <a href="#sec2.4">rnaSPAdes output</a><br>
 3. <a href="#sec3">Assembly evaluation</a><br>
 4. <a href="#sec4">Citation</a><br>
 5. <a href="#sec5">Feedback and bug reports</a><br>
@@ -47,18 +49,50 @@ or
 
 Note that we assume that SPAdes installation directory is added to the <code>PATH</code> variable (provide full path to rnaSPAdes executable otherwise: <code><rnaspades installation dir>/rnaspades.py</code>). 
 
-<p>Here are several notes regarding options :
+<p>Here are several notes regarding rnaSPAdes options:
     <ul>
-        <li>rnaSPAdes can take as an input only one paired-end library and multiple single-end libraries.</li>
+        <li>rnaSPAdes can take as an input only paired-end and single-end libraries.</li>
         <li>rnaSPAdes does not support <code>--careful</code> and <code>--cov-cutoff</code> options.</li>
-        <li>rnaSPAdes is not compatible with other pipeline options such as <code>--meta</code>, <code>--sc</code> and <code>--plasmid</code>.</li>
-        <li>rnaSPAdes works using only a single k-mer size (55 by the default). We strongly recommend not to change this parameter. In case your RNA-Seq data set contains long Illumina reads (150 bp and longer) you may try to use longer k-mer size (approximately half of the read length). In case you have any doubts about your run, do not hesitate to contact us using e-mail given below.</li>
+        <li>rnaSPAdes is not compatible with other pipeline options such as <code>--meta</code>, <code>--sc</code> and <code>--plasmid</code>. If you wish to assemble metatranscriptomic data just run rnaSPAdes as it is.</li>
+        <li>rnaSPAdes works using only a single k-mer size (automatically detected using read length by the default). We strongly recommend not to change this parameter. In case you have any doubts about your run, do not hesitate to contact us using e-mail given below.</li>
     </ul>
 
 <a name="sec2.2"></a>
-<h3>2.2 rnaSPAdes output</h3>
+<h3>2.2 rnaSPAdes-specific options</h3>
 <p>
-rnaSPAdes outputs only one FASTA file named <code>transcripts.fasta</code>. The corresponding file with paths in the <code>assembly_graph.fastg</code> is <code>transcripts.paths</code>.
+    <code>--fast</code><br>
+        Speeds up isoform detection stage by removing short low-covered isolated edges from the graph. Note, that short low-expressed transcripts may be missing when this option is used.
+</p>
+
+<a name="sec2.3"></a>
+<h3>2.3 Assemblying strand-specific RNA-Seq</h3>
+<p>rnaSPAdes now supports strand-specific RNA-Seq dataset. You can indicate that the dataset is strand-specific using one of the following options:
+
+<p>
+    <code>--ss-fr</code><br>
+        The data set is strand-specific and first read in pair corresponds to <b>actual</b> gene strand.
+</p>
+
+<p>
+    <code>--ss-rf</code><br>
+        The data set is strand-specific and first read in pair corresponds to <b>reverse</b> gene strand.
+</p>
+
+If the data set is single-end use <code>--ss-fr</code> option in case when reads correspond to gene strand and <code>--ss-rf</code> otherwise.
+
+
+<a name="sec2.4"></a>
+<h3>2.4 rnaSPAdes output</h3>
+<p>
+rnaSPAdes outputs one main FASTA file named <code>transcripts.fasta</code>. The corresponding file with paths in the <code>assembly_graph.fastg</code> is <code>transcripts.paths</code>.
+
+<p> 
+In addition rnaSPAdes outputs transcripts with different level of filtration into <code><output_dir>/</code>: <br>
+    <ul>
+        <li><code>hard_filtered_transcripts.fasta</code> – includes only long and reliable transcripts with rather high expression.</li>
+        <li><code>soft_filtered_transcripts.fasta</code> – includes short and low-expressed transcipts, likely to contain junk sequences.</li>
+    </ul>
+We reccomend to use main <code>transcripts.fasta</code> file in case you don't have any specific needs for you projects. Do not hesitate to contact us using e-mail given below.
 
 <p>
    Contigs/scaffolds names in rnaSPAdes output FASTA files have the following format: <br><code>>NODE_97_length_6237_cov_11.9819_g8_i2</code><br> Similarly to SPAdes, <code>97</code> is the number of the transcript, <code>6237</code> is its sequence length in nucleotides and <code>11.9819</code> is the k-mer coverage. Note that the k-mer coverage is always lower than the read (per-base) coverage. <code>g8_i2</code> correspond to the gene number 8 and isoform number 2 within this gene. Tr [...]
diff --git a/spades.py b/spades.py
index ff31c92..8c96a80 100755
--- a/spades.py
+++ b/spades.py
@@ -158,7 +158,7 @@ def print_used_values(cfg, log):
 
 
 def fill_cfg(options_to_parse, log, secondary_filling=False):
-    skip_output_dir=secondary_filling
+    skip_output_dir = secondary_filling
     skip_stop_after = secondary_filling
     load_processed_dataset=secondary_filling
 
@@ -211,6 +211,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.configs_dir = support.check_dir_existence(arg)
         elif opt == "--reference":
             options_storage.reference = support.check_file_existence(arg, 'reference', log)
+            options_storage.developer_mode = True
         elif opt == "--series-analysis":
             options_storage.series_analysis = support.check_file_existence(arg, 'series-analysis', log)
         elif opt == "--dataset":
@@ -239,8 +240,19 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             options_storage.large_genome = True
         elif opt == "--plasmid":
             options_storage.plasmid = True
+
         elif opt == "--rna":
             options_storage.rna = True
+        elif opt.startswith("--ss-"):  # strand specificity, RNA-Seq only
+            if opt == "--ss-rf":
+                options_storage.strand_specific = True
+            elif opt == "--ss-fr":
+                options_storage.strand_specific = False
+        elif opt == "--fast":  # fast run, RNA-Seq only
+            options_storage.fast = True
+        elif opt == "--fast:false":
+            options_storage.fast = False
+
         elif opt == "--iontorrent":
             options_storage.iontorrent = True
         elif opt == "--disable-gzip-output":
@@ -295,6 +307,8 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong PHRED quality offset value: ' + arg +
                               ' (should be either 33, 64, or \'auto\')', log)
+        elif opt == "--save-gp":
+            options_storage.save_gp = True
         elif opt == "--cov-cutoff":
             if arg == 'auto' or arg == 'off':
                 options_storage.cov_cutoff = arg
@@ -303,6 +317,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             else:
                 support.error('wrong value for --cov-cutoff option: ' + arg +
                               ' (should be a positive float number, or \'auto\', or \'off\')', log)
+        elif opt == "--hidden-cov-cutoff":
+            if support.is_float(arg) and float(arg) > 0.0:
+                options_storage.lcer_cutoff = float(arg)
+            else:
+                support.error('wrong value for --hidden-cov-cutoff option: ' + arg +
+                              ' (should be a positive float number)', log)
         elif opt == '-i' or opt == "--iterations":
             options_storage.iterations = int(arg)
 
@@ -356,7 +376,7 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("the output_dir should exist for --continue and for --restart-from!", log)
         os.makedirs(options_storage.output_dir)
     if options_storage.restart_from:
-        if options_storage.continue_mode: # saving parameters specified with --restart-from
+        if options_storage.continue_mode:  # saving parameters specified with --restart-from
             if not support.dataset_is_empty(dataset_data):
                 support.error("you cannot specify reads with --restart-from option!", log)
             options_storage.save_restart_options(log)
@@ -370,6 +390,10 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
             support.error("you cannot specify --careful in RNA-Seq mode!", log)
         if options_storage.k_mers and options_storage.k_mers != 'auto' and len(options_storage.k_mers) > 1:
             support.error("you cannot specify multiple k-mer sizes in RNA-Seq mode!", log)
+    if [options_storage.meta, options_storage.large_genome, options_storage.truseq_mode,
+       options_storage.rna, options_storage.plasmid, options_storage.single_cell].count(True) > 1:
+        support.error("you cannot simultaneously use more than one mode out of "
+                      "Metagenomic, Large genome, Illumina TruSeq, RNA-Seq, Plasmid, and Single-cell!", log)
     if options_storage.continue_mode:
         return None, None
 
@@ -403,11 +427,12 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if len(dataset_data) != len(support.get_lib_ids_by_type(dataset_data, spades_logic.READS_TYPES_USED_IN_RNA_SEQ)):
             support.error('you cannot specify any data types except ' +
                           ', '.join(spades_logic.READS_TYPES_USED_IN_RNA_SEQ) + ' in RNA-Seq mode!')
-        if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
-            support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
+        #if len(support.get_lib_ids_by_type(dataset_data, 'paired-end')) > 1:
+        #    support.error('you cannot specify more than one paired-end library in RNA-Seq mode!')
 
     if existing_dataset_data is None:
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
 
     options_storage.set_default_values()
     ### FILLING cfg
@@ -454,12 +479,27 @@ def fill_cfg(options_to_parse, log, secondary_filling=False):
         if options_storage.k_mers:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.k_mers
         elif options_storage.rna:
-            cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_RNA
+            k_value = options_storage.K_MERS_RNA[0]
+            if not options_storage.iontorrent:
+                k_value = int(support.get_reads_length(dataset_data, log) / 2) - 1
+                if k_value % 2 == 0:
+                    k_value -= 1
+                if k_value < options_storage.MIN_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too small, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MIN_K))
+                    k_value = options_storage.MIN_K
+                if k_value > options_storage.MAX_K:
+                    log.info("\n" + 'Default k value (' + str(k_value) + ') is too large, all k values should be between %d and %d. Setting k=%d.\n'
+                             % (options_storage.MIN_K, options_storage.MAX_K, options_storage.MAX_K))
+                    k_value = options_storage.MAX_K
+            cfg["assembly"].__dict__["iterative_K"] = k_value
         else:
             cfg["assembly"].__dict__["iterative_K"] = options_storage.K_MERS_SHORT
         cfg["assembly"].__dict__["disable_rr"] = options_storage.disable_rr
         cfg["assembly"].__dict__["diploid_mode"] = options_storage.diploid_mode
         cfg["assembly"].__dict__["cov_cutoff"] = options_storage.cov_cutoff
+        cfg["assembly"].__dict__["lcer_cutoff"] = options_storage.lcer_cutoff
+        cfg["assembly"].__dict__["save_gp"] = options_storage.save_gp
         if options_storage.spades_heap_check:
             cfg["assembly"].__dict__["heap_check"] = options_storage.spades_heap_check
         if options_storage.read_buffer_size:
@@ -658,7 +698,8 @@ def main(args):
         if support.dataset_has_additional_contigs(dataset_data):
             dataset_data = support.process_Ns_in_additional_contigs(dataset_data, dir_for_split_reads, log)
         options_storage.dataset_yaml_filename = os.path.join(options_storage.output_dir, "input_dataset.yaml")
-        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'))
+        pyyaml.dump(dataset_data, open(options_storage.dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
         cfg["dataset"].yaml_filename = options_storage.dataset_yaml_filename
 
     try:
@@ -894,29 +935,37 @@ def main(args):
             if "assembly" in cfg and os.path.isfile(result_contigs_filename):
                 message = " * Assembled contigs are in " + support.process_spaces(result_contigs_filename)
                 log.info(message)
-            if options_storage.rna:
-                if "assembly" in cfg and os.path.isfile(result_transcripts_filename):
+            if options_storage.rna and "assembly" in cfg:
+                if os.path.isfile(result_transcripts_filename):
                     message = " * Assembled transcripts are in " + support.process_spaces(result_transcripts_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_transcripts_paths_filename):
+                if os.path.isfile(result_transcripts_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the transcripts are in " + \
                               support.process_spaces(result_transcripts_paths_filename)
                     log.info(message)
-            else:
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_filename):
+                for filtering_type in options_storage.filtering_types:
+                    result_filtered_transcripts_filename = os.path.join(cfg["common"].output_dir,
+                                                                        filtering_type + "_filtered_" +
+                                                                        options_storage.transcripts_name)
+                    if os.path.isfile(result_filtered_transcripts_filename):
+                        message = " * " + filtering_type.capitalize() + " filtered transcripts are in " + \
+                                  support.process_spaces(result_filtered_transcripts_filename)
+                        log.info(message)
+            elif "assembly" in cfg:
+                if os.path.isfile(result_scaffolds_filename):
                     message = " * Assembled scaffolds are in " + support.process_spaces(result_scaffolds_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename):
+                if os.path.isfile(result_assembly_graph_filename):
                     message = " * Assembly graph is in " + support.process_spaces(result_assembly_graph_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_assembly_graph_filename_gfa):
+                if os.path.isfile(result_assembly_graph_filename_gfa):
                     message = " * Assembly graph in GFA format is in " + support.process_spaces(result_assembly_graph_filename_gfa)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_contigs_paths_filename):
+                if os.path.isfile(result_contigs_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the contigs are in " + \
                               support.process_spaces(result_contigs_paths_filename)
                     log.info(message)
-                if "assembly" in cfg and os.path.isfile(result_scaffolds_paths_filename):
+                if os.path.isfile(result_scaffolds_paths_filename):
                     message = " * Paths in the assembly graph corresponding to the scaffolds are in " + \
                               support.process_spaces(result_scaffolds_paths_filename)
                     log.info(message)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d539593..c87e8c6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -29,6 +29,11 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${SPADES_TOOLS_BINARY_DIR})
 set(EXT_DIR "${CMAKE_SOURCE_DIR}/../ext")
 set(SPADES_CFG_DIR "${CMAKE_SOURCE_DIR}/../configs")
 
+# Uncomment for gprof profiling
+#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
+#SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
+#SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
+
 # Everything option-dependent
 include(options)
 
diff --git a/src/cmake/options.cmake b/src/cmake/options.cmake
index 3bc0aef..0dbcdf2 100644
--- a/src/cmake/options.cmake
+++ b/src/cmake/options.cmake
@@ -41,6 +41,15 @@ if (SPADES_STATIC_BUILD)
   set(Boost_USE_STATIC_RUNTIME     ON)
 endif()
 
+option(SPADES_USE_GPROF "gprof profiler" OFF)
+
+if (SPADES_USE_GPROF)
+  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
+  SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
+  SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
+  SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pg") 
+endif()
+
 # Define minimum and maximum K
 set(SPADES_MIN_K 1 CACHE INTEGER "Minimum k-mer length")
 set(SPADES_MAX_K 128 CACHE INTEGER "Maximum k-mer length")
diff --git a/src/cmake/pack.cmake b/src/cmake/pack.cmake
index a121170..17f2eef 100644
--- a/src/cmake/pack.cmake
+++ b/src/cmake/pack.cmake
@@ -12,9 +12,9 @@ set(CPACK_PACKAGE_NAME "SPAdes")
 set(CPACK_PACKAGE_VENDOR "Saint Petersburg State University")
 set(CPACK_PACKAGE_DESCRIPTION_FILE "${SPADES_MAIN_SRC_DIR}/../README")
 set(CPACK_RESOURCE_FILE_LICENSE "${SPADES_MAIN_SRC_DIR}/../LICENSE")
-set(CPACK_PACKAGE_VERSION "3.10.1")
+set(CPACK_PACKAGE_VERSION "3.11.1")
 set(CPACK_PACKAGE_VERSION_MAJOR "3")
-set(CPACK_PACKAGE_VERSION_MINOR "10")
+set(CPACK_PACKAGE_VERSION_MINOR "11")
 set(CPACK_PACKAGE_VERSION_PATCH "1")
 set(CPACK_STRIP_FILES bin/spades bin/hammer bin/ionhammer bin/dipspades bin/spades-bwa bin/corrector bin/scaffold_correction)
 
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 52bd90a..22a95c4 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -11,12 +11,13 @@ add_subdirectory(pipeline)
 add_subdirectory(assembly_graph)
 add_subdirectory(modules/path_extend)
 add_subdirectory(modules)
+add_subdirectory(paired_info)
 add_subdirectory(stages)
 add_subdirectory(utils)
 add_subdirectory(io)
-add_subdirectory(utils/mph_index)
-add_subdirectory(utils/coverage_model)
+add_subdirectory(utils/kmer_mph)
+add_subdirectory(modules/coverage_model)
 
 add_library(common_modules STATIC empty.cpp)
 
-target_link_libraries(common_modules assembly_graph input pipeline coverage_model path_extend stages utils mph_index modules)
+target_link_libraries(common_modules assembly_graph input pipeline coverage_model paired_info path_extend stages utils mph_index modules)
diff --git a/src/common/adt/array_vector.hpp b/src/common/adt/array_vector.hpp
index 819aa49..9e59460 100644
--- a/src/common/adt/array_vector.hpp
+++ b/src/common/adt/array_vector.hpp
@@ -15,45 +15,48 @@
 #include <cstring>
 #include <cstddef>
 
+namespace adt {
+
 template<class _Cp, bool _IsConst>
-class __array_vector_iterator;
+class array_vector_iterator;
 
 template<class _Cp>
-class __array_reference;
+class array_reference;
 
 template<class _Cp>
-class __array_const_reference;
+class array_const_reference;
 
 template<typename ElTy>
 struct array_equal_to;
 
+
 template<class _Cp>
-class __array {
-    typedef typename _Cp::__storage_type __storage_type;
-    typedef typename _Cp::__storage_pointer __storage_pointer;
-    typedef typename _Cp::__const_storage_pointer __const_storage_pointer;
-    typedef typename _Cp::size_type __size_type;
+class array {
+    typedef typename _Cp::storage_type storage_type;
+    typedef typename _Cp::storage_pointer storage_pointer;
+    typedef typename _Cp::const_storage_pointer const_storage_pointer;
+    typedef typename _Cp::size_type size_type;
 
 #if defined(__clang__)
-  friend typename _Cp::__self;
+    friend typename _Cp::self;
 #else
 
-    friend class _Cp::__self;
+    friend class _Cp::self;
 
 #endif
 
-    friend class __array_vector_iterator<_Cp, false>;
+    friend class array_vector_iterator<_Cp, false>;
 
-    friend class __array_reference<_Cp>;
+    friend class array_reference<_Cp>;
 
-    friend class __array_const_reference<_Cp>;
+    friend class array_const_reference<_Cp>;
 
-    __storage_pointer ptr_;
-    __size_type size_;
+    storage_pointer ptr_;
+    size_type size_;
     bool allocated;
 
 public:
-    ~__array() {
+    ~array() {
         if (allocated)
             delete[] ptr_;
     }
@@ -63,53 +66,53 @@ public:
     }
 
     size_t data_size() const {
-        return size_ * sizeof(__storage_type);
+        return size_ * sizeof(storage_type);
     }
 
-    __storage_pointer data() const {
+    storage_pointer data() const {
         return ptr_;
     }
 
-    __array(const __array &that) {
+    array(const array &that) {
         size_ = that.size_;
-        ptr_ = new __storage_type[size_];
+        ptr_ = new storage_type[size_];
         allocated = true;
         memcpy(ptr_, that.ptr_, data_size());
     }
 
-    __array(const __array_reference<_Cp> that) {
+    array(const array_reference<_Cp> that) {
         size_ = that.size();
-        ptr_ = new __storage_type[size_];
+        ptr_ = new storage_type[size_];
         allocated = true;
         memcpy(ptr_, that.data(), data_size());
     }
 
-    __array &operator=(const __array &that) {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    array &operator=(const array &that) {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
         if (this_ptr != that_ptr)
             memcpy(this_ptr, that_ptr, data_size());
 
         return *this;
     }
 
-    __array &operator=(const __array_reference<_Cp> that) {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    array &operator=(const array_reference<_Cp> that) {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
         if (this_ptr != that_ptr)
             memcpy(this_ptr, that_ptr, data_size());
 
         return *this;
     }
 
-    __array &operator=(__const_storage_pointer that_ptr) {
-        __storage_pointer this_ptr = data();
+    array &operator=(const_storage_pointer that_ptr) {
+        storage_pointer this_ptr = data();
         if (this_ptr != that_ptr)
             memcpy(this_ptr, that_ptr, data_size());
 
         return *this;
     }
 
-    bool operator<(const __array &that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator<(const array &that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -119,8 +122,8 @@ public:
         return false;
     }
 
-    bool operator<(const __array_reference<_Cp> that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator<(const array_reference<_Cp> that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -130,8 +133,8 @@ public:
         return false;
     }
 
-    bool operator==(const __array &that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator==(const array &that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -141,8 +144,8 @@ public:
         return true;
     }
 
-    bool operator==(const __array_reference<_Cp> that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator==(const array_reference<_Cp> that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -152,42 +155,43 @@ public:
         return true;
     }
 
-    bool operator!=(const __array &that) const {
+    bool operator!=(const array &that) const {
         return !operator==(that);
     }
 
-    bool operator!=(const __array_reference<_Cp> that) const {
+    bool operator!=(const array_reference<_Cp> that) const {
         return !operator==(that);
     }
 
 private:
-    __array(__storage_pointer p, __size_type sz) :
+    array(storage_pointer p, size_type sz) :
             ptr_(p), size_(sz), allocated(false) { }
 };
 
+
 template<class _Cp>
-class __array_reference {
-    typedef typename _Cp::__storage_type __storage_type;
-    typedef typename _Cp::__storage_pointer __storage_pointer;
-    typedef typename _Cp::__const_storage_pointer __const_storage_pointer;
-    typedef typename _Cp::size_type __size_type;
+class array_reference {
+    typedef typename _Cp::storage_type storage_type;
+    typedef typename _Cp::storage_pointer storage_pointer;
+    typedef typename _Cp::const_storage_pointer const_storage_pointer;
+    typedef typename _Cp::size_type size_type;
 
 #if defined(__clang__)
-  friend typename _Cp::__self;
+    friend typename _Cp::self;
 #else
 
-    friend class _Cp::__self;
+    friend class _Cp::self;
 
 #endif
 
-    friend class __array_vector_iterator<_Cp, false>;
+    friend class array_vector_iterator<_Cp, false>;
 
-    friend class __array<_Cp>;
+    friend class array<_Cp>;
 
-    friend struct array_equal_to<__storage_type>;
+    friend struct adt::array_equal_to<storage_type>;
 
-    __storage_pointer ptr_;
-    __size_type size_;
+    storage_pointer ptr_;
+    size_type size_;
 
 public:
     size_t size() const {
@@ -195,39 +199,39 @@ public:
     }
 
     size_t data_size() const {
-        return size() * sizeof(__storage_type);
+        return size() * sizeof(storage_type);
     }
 
-    __storage_pointer data() const {
+    storage_pointer data() const {
         return ptr_;
     }
 
-    __array_reference &operator=(const __array<_Cp> &that) {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    array_reference &operator=(const array<_Cp> &that) {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
         if (this_ptr != that_ptr)
             memcpy(this_ptr, that_ptr, data_size());
 
         return *this;
     }
 
-    __array_reference &operator=(__const_storage_pointer that_ptr) {
-        __storage_pointer this_ptr = data();
+    array_reference &operator=(const_storage_pointer that_ptr) {
+        storage_pointer this_ptr = data();
         if (this_ptr != that_ptr)
             memcpy(this_ptr, that_ptr, data_size());
 
         return *this;
     }
 
-    __array_reference &operator=(const __array_reference that) {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    array_reference &operator=(const array_reference that) {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
         if (this_ptr != that_ptr)
             memcpy(this_ptr, that_ptr, data_size());
 
         return *this;
     }
 
-    bool operator<(const __array<_Cp> &that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator<(const array<_Cp> &that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -237,8 +241,8 @@ public:
         return false;
     }
 
-    bool operator<(const __array_reference that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator<(const array_reference that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -248,8 +252,8 @@ public:
         return false;
     }
 
-    bool operator==(const __array<_Cp> &that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator==(const array<_Cp> &that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -259,8 +263,8 @@ public:
         return true;
     }
 
-    bool operator==(const __array_reference that) const {
-        __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator==(const array_reference that) const {
+        storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -270,40 +274,40 @@ public:
         return true;
     }
 
-    bool operator!=(const __array_reference that) const {
+    bool operator!=(const array_reference that) const {
         return !operator==(that);
     }
 
-    bool operator!=(const __array<_Cp> &that) const {
+    bool operator!=(const array<_Cp> &that) const {
         return !operator==(that);
     }
 
 private:
-    __array_reference(__storage_pointer p, __size_type sz) :
+    array_reference(storage_pointer p, size_type sz) :
             ptr_(p), size_(sz) { }
 };
 
 template<class _Cp>
-class __array_const_reference {
-    typedef typename _Cp::__storage_type __storage_type;
-    typedef typename _Cp::__storage_pointer __storage_pointer;
-    typedef typename _Cp::__const_storage_pointer __const_storage_pointer;
-    typedef typename _Cp::size_type __size_type;
+class array_const_reference {
+    typedef typename _Cp::storage_type storage_type;
+    typedef typename _Cp::storage_pointer storage_pointer;
+    typedef typename _Cp::const_storage_pointer const_storage_pointer;
+    typedef typename _Cp::size_type size_type;
 
 #if defined(__clang__)
-  friend typename _Cp::__self;
+    friend typename _Cp::self;
 #else
 
-    friend class _Cp::__self;
+    friend class _Cp::self;
 
 #endif
 
-    friend class __array_vector_iterator<_Cp, true>;
+    friend class array_vector_iterator<_Cp, true>;
 
-    friend struct array_equal_to<__storage_type>;
+    friend struct adt::array_equal_to<storage_type>;
 
-    __const_storage_pointer ptr_;
-    __size_type size_;
+    const_storage_pointer ptr_;
+    size_type size_;
 
 public:
     size_t size() const {
@@ -311,18 +315,18 @@ public:
     }
 
     size_t data_size() const {
-        return size() * sizeof(__storage_type);
+        return size() * sizeof(storage_type);
     }
 
-    __const_storage_pointer data() const {
+    const_storage_pointer data() const {
         return ptr_;
     }
 
-    __array_const_reference(const __array_const_reference &that)
+    array_const_reference(const array_const_reference &that)
             : ptr_(that.ptr_), size_(that.size_) { }
 
-    bool operator<(__array_const_reference that) const {
-        const __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator<(array_const_reference that) const {
+        const storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -332,8 +336,8 @@ public:
         return false;
     }
 
-    bool operator==(__array_const_reference that) const {
-        const __storage_pointer this_ptr = data(), that_ptr = that.data();
+    bool operator==(array_const_reference that) const {
+        const storage_pointer this_ptr = data(), that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -343,9 +347,9 @@ public:
         return true;
     }
 
-    bool operator==(const __array_reference<_Cp> that) const {
-        __const_storage_pointer this_ptr = data();
-        const __storage_pointer that_ptr = that.data();
+    bool operator==(const array_reference<_Cp> that) const {
+        const_storage_pointer this_ptr = data();
+        const storage_pointer that_ptr = that.data();
 
         for (size_t i = 0; i < size(); ++i) {
             if (this_ptr[i] != that_ptr[i])
@@ -355,26 +359,27 @@ public:
         return true;
     }
 
-    bool operator!=(const __array_const_reference that) const {
+    bool operator!=(const array_const_reference that) const {
         return !operator==(that);
     }
 
-    bool operator!=(const __array_reference<_Cp> that) const {
+    bool operator!=(const array_reference<_Cp> that) const {
         return !operator==(that);
     }
 
 private:
-    __array_const_reference(__const_storage_pointer p, __size_type sz) :
+    array_const_reference(const_storage_pointer p, size_type sz) :
             ptr_(p), size_(sz) { }
 
-    __array_const_reference &operator=(const __array_const_reference &that);
+    array_const_reference &operator=(const array_const_reference &that);
 };
 
+}
 // This is hack. Never do this again!
 #ifdef __GLIBCXX__
 namespace std {
     template<typename _Cp>
-    struct __are_same<__array_reference<_Cp>, __array<_Cp> &> {
+    struct __are_same<adt::array_reference<_Cp>, adt::array<_Cp> &> {
         enum {
             __value = 1
         };
@@ -382,7 +387,7 @@ namespace std {
     };
 
     template<typename _Cp>
-    struct __are_same<__array<_Cp> &, __array_reference<_Cp> > {
+    struct __are_same<adt::array<_Cp> &, adt::array_reference<_Cp> > {
         enum {
             __value = 1
         };
@@ -390,50 +395,51 @@ namespace std {
     };
 }
 #endif
+namespace adt {
 
 template<typename _Cp>
-void swap(__array_reference<_Cp> lhs, __array_reference<_Cp> rhs) {
+void swap(array_reference<_Cp> lhs, array_reference<_Cp> rhs) {
     std::swap_ranges(lhs.data(), lhs.data() + lhs.size(), rhs.data());
 }
 
 template<typename _Cp>
-void swap(__array<_Cp> &lhs, __array_reference<_Cp> rhs) {
+void swap(array<_Cp> &lhs, array_reference<_Cp> rhs) {
     std::swap_ranges(lhs.data(), lhs.data() + lhs.size(), rhs.data());
 }
 
 template<typename _Cp>
-void swap(__array_reference<_Cp> lhs, __array<_Cp> &rhs) {
+void swap(array_reference<_Cp> lhs, array<_Cp> &rhs) {
     std::swap_ranges(lhs.data(), lhs.data() + lhs.size(), rhs.data());
 }
 
 template<typename _Cp, bool _IsConst>
-class __array_vector_iterator {
+class array_vector_iterator {
 public:
     typedef typename _Cp::difference_type difference_type;
-    typedef __array_vector_iterator pointer;
-    typedef typename std::conditional<_IsConst, __array_const_reference<_Cp>, __array_reference<_Cp> >::type reference;
-    typedef __array<_Cp> value_type;
+    typedef array_vector_iterator pointer;
+    typedef typename std::conditional<_IsConst, array_const_reference<_Cp>, array_reference<_Cp> >::type reference;
+    typedef array<_Cp> value_type;
 
     typedef std::random_access_iterator_tag iterator_category;
 
 private:
-    typedef typename _Cp::__storage_type __storage_type;
-    typedef typename _Cp::__storage_pointer __storage_pointer;
-    typedef typename _Cp::size_type __size_type;
+    typedef typename _Cp::storage_type storage_type;
+    typedef typename _Cp::storage_pointer storage_pointer;
+    typedef typename _Cp::size_type size_type;
 
 #if defined(__clang__)
-  friend typename _Cp::__self;
+  friend typename _Cp::self;
 #else
 
-    friend class _Cp::__self;
+    friend class _Cp::self;
 
 #endif
 
-    __storage_pointer data_;
-    __size_type el_sz_;
+    storage_pointer data_;
+    size_type el_sz_;
 
 public:
-    __array_vector_iterator(__storage_pointer data, __size_type el_sz)
+    array_vector_iterator(storage_pointer data, size_type el_sz)
             : data_(data), el_sz_(el_sz) { }
 
     size_t size() const {
@@ -441,10 +447,10 @@ public:
     }
 
     size_t data_size() const {
-        return el_sz_ * sizeof(__storage_type);
+        return el_sz_ * sizeof(storage_type);
     }
 
-    __storage_pointer data() const {
+    storage_pointer data() const {
         return data_;
     }
 
@@ -456,86 +462,86 @@ public:
         return *(*this + n);
     }
 
-    __array_vector_iterator &operator++() {
+    array_vector_iterator &operator++() {
         data_ += el_sz_;
         return *this;
     }
 
-    __array_vector_iterator &operator--() {
+    array_vector_iterator &operator--() {
         data_ -= el_sz_;
         return *this;
     }
 
-    __array_vector_iterator operator++(int) {
-        __array_vector_iterator res = *this;
+    array_vector_iterator operator++(int) {
+        array_vector_iterator res = *this;
         data_ += el_sz_;
         return res;
     }
 
-    __array_vector_iterator operator--(int) {
-        __array_vector_iterator res = *this;
+    array_vector_iterator operator--(int) {
+        array_vector_iterator res = *this;
         data_ -= el_sz_;
         return res;
     }
 
-    __array_vector_iterator operator+(const difference_type &n) const {
-        return __array_vector_iterator(data_ + n * el_sz_, el_sz_);
+    array_vector_iterator operator+(const difference_type &n) const {
+        return array_vector_iterator(data_ + n * el_sz_, el_sz_);
     }
 
-    __array_vector_iterator &operator+=(const difference_type &n) {
+    array_vector_iterator &operator+=(const difference_type &n) {
         data_ += n * el_sz_;
         return *this;
     }
 
-    __array_vector_iterator operator-(const difference_type &n) const {
-        return __array_vector_iterator(data_ - n * el_sz_, el_sz_);
+    array_vector_iterator operator-(const difference_type &n) const {
+        return array_vector_iterator(data_ - n * el_sz_, el_sz_);
     }
 
-    __array_vector_iterator &operator-=(const difference_type &n) {
+    array_vector_iterator &operator-=(const difference_type &n) {
         data_ -= n * el_sz_;
         return *this;
     }
 
-    friend bool operator==(const __array_vector_iterator &r1,
-                           const __array_vector_iterator &r2) {
+    friend bool operator==(const array_vector_iterator &r1,
+                           const array_vector_iterator &r2) {
         return r1.data_ == r2.data_;
     }
 
-    friend bool operator!=(const __array_vector_iterator &r1,
-                           const __array_vector_iterator &r2) {
+    friend bool operator!=(const array_vector_iterator &r1,
+                           const array_vector_iterator &r2) {
         return r1.data_ != r2.data_;
     }
 
-    friend bool operator<(const __array_vector_iterator &r1,
-                          const __array_vector_iterator &r2) {
+    friend bool operator<(const array_vector_iterator &r1,
+                          const array_vector_iterator &r2) {
         return r1.data_ < r2.data_;
     }
 
-    friend bool operator<=(const __array_vector_iterator &r1,
-                           const __array_vector_iterator &r2) {
+    friend bool operator<=(const array_vector_iterator &r1,
+                           const array_vector_iterator &r2) {
         return r1.data_ <= r2.data_;
     }
 
-    friend bool operator>(const __array_vector_iterator &r1,
-                          const __array_vector_iterator &r2) {
+    friend bool operator>(const array_vector_iterator &r1,
+                          const array_vector_iterator &r2) {
         return r1.data_ > r2.data_;
     }
 
-    friend bool operator>=(const __array_vector_iterator &r1,
-                           const __array_vector_iterator &r2) {
+    friend bool operator>=(const array_vector_iterator &r1,
+                           const array_vector_iterator &r2) {
         return r1.data_ >= r2.data_;
     }
 
 
-    friend __array_vector_iterator
+    friend array_vector_iterator
     operator+(difference_type n,
-              const __array_vector_iterator &r2) {
+              const array_vector_iterator &r2) {
         return r2 + n;
     }
 
     friend difference_type
-    operator-(const __array_vector_iterator &r1,
-              const __array_vector_iterator &r2) {
+    operator-(const array_vector_iterator &r1,
+              const array_vector_iterator &r2) {
         return (r1.data_ - r2.data_) / r1.el_sz_;
     }
 };
@@ -546,34 +552,34 @@ public:
     typedef size_t size_type;
     typedef ptrdiff_t difference_type;
 
-    typedef __array_reference<array_vector> reference;
-    typedef __array_const_reference<array_vector> const_reference;
-    typedef __array<array_vector> value_type;
-    typedef __array_vector_iterator<array_vector, false> iterator;
-    typedef __array_vector_iterator<array_vector, true> const_iterator;
+    typedef array_reference<array_vector> reference;
+    typedef array_const_reference<array_vector> const_reference;
+    typedef array<array_vector> value_type;
+    typedef array_vector_iterator<array_vector, false> iterator;
+    typedef array_vector_iterator<array_vector, true> const_iterator;
 
 private:
-    typedef ElTy __storage_type;
-    typedef array_vector __self;
-    typedef __storage_type *__storage_pointer;
-    typedef const __storage_type *__const_storage_pointer;
+    typedef ElTy storage_type;
+    typedef array_vector self;
+    typedef storage_type *storage_pointer;
+    typedef const storage_type *const_storage_pointer;
 
-    friend class __array<__self>;
+    friend class array<self>;
 
-    friend class __array_reference<__self>;
+    friend class array_reference<self>;
 
-    friend class __array_const_reference<__self>;
+    friend class array_const_reference<self>;
 
-    friend class __array_vector_iterator<__self, true>;
+    friend class array_vector_iterator<self, true>;
 
-    friend class __array_vector_iterator<__self, false>;
+    friend class array_vector_iterator<self, false>;
 
-    __storage_pointer data_;
+    storage_pointer data_;
     size_type size_;
     size_type el_sz_;
 
 public:
-    array_vector(__storage_pointer data, size_type sz, size_type el_sz)
+    array_vector(storage_pointer data, size_type sz, size_type el_sz)
             : data_(data), size_(sz), el_sz_(el_sz) { }
 
     reference operator[](size_t pos) {
@@ -610,13 +616,13 @@ public:
 
     size_t size() const { return size_; }
 
-    __storage_pointer data() const { return data_; }
+    storage_pointer data() const { return data_; }
 
     void set_size(size_t size) {
         size_ = size;
     }
 
-    void set_data(__storage_pointer data) {
+    void set_data(storage_pointer data) {
         data_ = data;
     }
 };
@@ -674,4 +680,5 @@ struct array_equal_to {
     }
 };
 
+} //adt
 #endif
diff --git a/src/common/adt/bag.hpp b/src/common/adt/bag.hpp
index 47d58ad..5cded0e 100644
--- a/src/common/adt/bag.hpp
+++ b/src/common/adt/bag.hpp
@@ -9,29 +9,31 @@
 
 #include "utils/verify.hpp"
 
+namespace adt {
+
 template<class T, class hash = std::hash<T>>
 class bag {
     typedef std::unordered_map<T, size_t, hash> Data;
     Data data_;
     size_t size_;
 public:
-    
+
     bag() : size_(0) {
     }
 
     typedef typename Data::const_iterator const_iterator;
 
-    void put(const T& t, size_t mult) {
+    void put(const T &t, size_t mult) {
         VERIFY(mult > 0);
         data_[t] += mult;
         size_ += mult;
     }
 
-    void put(const T& t) {
+    void put(const T &t) {
         put(t, 1);
     }
 
-    bool take(const T& t, size_t mult) {
+    bool take(const T &t, size_t mult) {
         VERIFY(mult > 0);
         /*typename map<T, size_t>::iterator*/auto it = data_.find(t);
         if (it == data_.end()) {
@@ -54,11 +56,11 @@ public:
         }
     }
 
-    bool take(const T& t) {
+    bool take(const T &t) {
         return take(t, 1);
     }
 
-    size_t mult(const T& t) const {
+    size_t mult(const T &t) const {
         auto it = data_.find(t);
         if (it == data_.end()) {
             return 0;
@@ -85,3 +87,5 @@ public:
     }
 
 };
+
+} //adt
\ No newline at end of file
diff --git a/src/common/adt/bf.hpp b/src/common/adt/bf.hpp
index 1c9ef92..7b2254e 100644
--- a/src/common/adt/bf.hpp
+++ b/src/common/adt/bf.hpp
@@ -11,164 +11,167 @@ namespace bf {
 /// The counting Bloom filter.
 template<class T, unsigned width_ = 4>
 class counting_bloom_filter {
-  counting_bloom_filter(const counting_bloom_filter&) = delete;
-  counting_bloom_filter& operator=(const counting_bloom_filter&) = delete;
+    counting_bloom_filter(const counting_bloom_filter &) = delete;
+    counting_bloom_filter &operator=(const counting_bloom_filter &) = delete;
 
 protected:
-  static constexpr uint64_t cell_mask_ = (1ull << width_) - 1;
-  static constexpr size_t cells_per_entry_ = 8 * sizeof(uint64_t) / width_;
+    static constexpr uint64_t cell_mask_ = (1ull << width_) - 1;
+    static constexpr size_t cells_per_entry_ = 8 * sizeof(uint64_t) / width_;
 
 public:
-  /// The hash digest type.
-  typedef size_t digest;
-
-  /// The hash function type.
-  typedef std::function<digest(const T&, uint64_t seed)> hasher;
-
-  counting_bloom_filter() = default;
-  ~counting_bloom_filter() = default;
-
-  /// Constructs a counting Bloom filter.
-  /// @param h The hasher.
-  /// @param cells The number of cells.
-  /// @param num_hashes The number of hash functions to use
-  /// The memory consumption will be cells * width bits
-  counting_bloom_filter(hasher h,
-                        size_t cells, size_t num_hashes = 3)
-    : hasher_(std::move(h)),
-      num_hashes_(num_hashes),
-      cells_(cells),
-      data_((cells * width_ + 8 * sizeof(uint64_t) - 1)/ 8 / sizeof(uint64_t)) {
-    static_assert((width_ & (width_ - 1)) == 0, "Width must be power of two");
-  }
-
-  /// Move-constructs a counting Bloom filter.
-  counting_bloom_filter(counting_bloom_filter&&) = default;
-
-  /// Adds an element to the Bloom filter.
-  /// @tparam T The type of the element to insert.
-  /// @param x An instance of type `T`.
-  void add(const T &o) {
-    for (size_t i = 0; i < num_hashes_; ++i) {
-      digest d = hasher_(o, i);
-      size_t cell_id = d - cells_ * (d / cells_); // Use division here in order to test stuff like libidivide
-      size_t pos = cell_id / cells_per_entry_;
-      size_t epos = cell_id - pos * cells_per_entry_;
-      auto &entry = data_[pos];
-      uint64_t mask = cell_mask_ << (width_ * epos);
-
-      // Add counter
-      while (true) {
-        uint64_t val = entry.load();
-
-        // Overflow, do nothing
-        if ((val & mask) == mask)
-          break;
-
-        uint64_t newval = val + (1ull << (width_ * epos));
-        if (!entry.compare_exchange_strong(val, newval))
-          continue;
-
-        break;
-      }
-
+    /// The hash digest type.F
+    typedef size_t digest;
+
+    /// The hash function type.
+    typedef std::function<digest(const T &, uint64_t seed)> hasher;
+
+    counting_bloom_filter() = default;
+
+    ~counting_bloom_filter() = default;
+
+    /// Constructs a counting Bloom filter.
+    /// @param h The hasher.
+    /// @param cells The number of cells.
+    /// @param num_hashes The number of hash functions to use
+    /// The memory consumption will be cells * width bits
+    counting_bloom_filter(hasher h,
+                          size_t cells, size_t num_hashes = 3)
+            : hasher_(std::move(h)),
+              num_hashes_(num_hashes),
+              cells_(cells),
+              data_((cells * width_ + 8 * sizeof(uint64_t) - 1) / 8 / sizeof(uint64_t)) {
+        static_assert((width_ & (width_ - 1)) == 0, "Width must be power of two");
     }
-  }
-
-  /// Retrieves the count of an element.
-  /// @tparam T The type of the element to query.
-  /// @param x An instance of type `T`.
-  /// @return A frequency estimate for *x*.
-  size_t lookup(const T &o) const {
-    size_t val = (1ull << width_) - 1;
-    for (size_t i = 0; i < num_hashes_; ++i) {
-      digest d = hasher_(o, i);
-      size_t cell_id = d - cells_ * (d / cells_); // Use division here in order to test stuff like libidivide
-      size_t pos = cell_id / cells_per_entry_;
-      size_t epos = cell_id - pos * cells_per_entry_;
-      size_t cval = (data_[pos] >> (width_ * epos)) & cell_mask_;
-      if (val > cval)
-        val = cval;
+
+    /// Move-constructs a counting Bloom filter.
+    counting_bloom_filter(counting_bloom_filter &&) = default;
+
+    /// Adds an element to the Bloom filter.
+    /// @tparam T The type of the element to insert.
+    /// @param x An instance of type `T`.
+    void add(const T &o) {
+        for (size_t i = 0; i < num_hashes_; ++i) {
+            digest d = hasher_(o, i);
+            size_t cell_id = d - cells_ * (d / cells_); // Use division here in order to test stuff like libidivide
+            size_t pos = cell_id / cells_per_entry_;
+            size_t epos = cell_id - pos * cells_per_entry_;
+            auto &entry = data_[pos];
+            uint64_t mask = cell_mask_ << (width_ * epos);
+
+            // Add counter
+            while (true) {
+                uint64_t val = entry.load();
+
+                // Overflow, do nothing
+                if ((val & mask) == mask)
+                    break;
+
+                uint64_t newval = val + (1ull << (width_ * epos));
+                if (!entry.compare_exchange_strong(val, newval))
+                    continue;
+
+                break;
+            }
+
+        }
     }
 
-    return val;
-  }
+    /// Retrieves the count of an element.
+    /// @tparam T The type of the element to query.
+    /// @param x An instance of type `T`.
+    /// @return A frequency estimate for *x*.
+    size_t lookup(const T &o) const {
+        size_t val = (1ull << width_) - 1;
+        for (size_t i = 0; i < num_hashes_; ++i) {
+            digest d = hasher_(o, i);
+            size_t cell_id = d - cells_ * (d / cells_); // Use division here in order to test stuff like libidivide
+            size_t pos = cell_id / cells_per_entry_;
+            size_t epos = cell_id - pos * cells_per_entry_;
+            size_t cval = (data_[pos] >> (width_ * epos)) & cell_mask_;
+            if (val > cval)
+                val = cval;
+        }
+
+        return val;
+    }
 
-  /// Removes all items from the Bloom filter.
-  void clear() {
-    std::fill(data_.begin(), data_.end(), 0);
-  }
+    /// Removes all items from the Bloom filter.
+    void clear() {
+        std::fill(data_.begin(), data_.end(), 0);
+    }
 
 protected:
-  hasher hasher_;
-  size_t num_hashes_;
-  size_t cells_;
-  std::vector<std::atomic<uint64_t>> data_;
+    hasher hasher_;
+    size_t num_hashes_;
+    size_t cells_;
+    std::vector<std::atomic<uint64_t>> data_;
 };
 
 /// The counting Bloom filter.
 template<class T, unsigned width_ = 4>
 class bitcounting_bloom_filter : public counting_bloom_filter<T, width_> {
-  using typename counting_bloom_filter<T, width_>::digest;
-  using typename counting_bloom_filter<T, width_>::hasher;
-  
- public:
-  bitcounting_bloom_filter(hasher h,
-                           size_t cells, size_t num_hashes = 3)
-    : counting_bloom_filter<T, width_>(h, cells, num_hashes) {}
-
-  /// Adds an element to the Bloom filter.
-  /// @tparam T The type of the element to insert.
-  /// @param x An instance of type `T`.
-  void add(const T &o) {
-    for (size_t i = 0; i < this->num_hashes_; ++i) {
-      digest d = this->hasher_(o, i);
-      size_t cell_id = d - this->cells_ * (d / this->cells_); // Use division here in order to test stuff like libidivide
-      size_t pos = cell_id / this->cells_per_entry_;
-      size_t epos = cell_id - pos * this->cells_per_entry_;
-      auto &entry = this->data_[pos];
-      uint64_t mask = this->cell_mask_ << (width_ * epos);
-
-      // Add counter
-      while (true) {
-        uint64_t val = entry.load() & mask;
-
-        // Overflow, do nothing
-        if (val == mask)
-          break;
-
-        uint64_t cellval = val >> width_ * epos;
-        size_t cnt = (cellval == 0 ? 0 : 64 - __builtin_clzll(cellval)) + width_ * epos;
-        
-        if ((std::atomic_fetch_or(&entry, uint64_t(1) << cnt) & mask) != val)
-          continue;
-
-        break;
-      }
-    }
-  }
-
-  /// Retrieves the count of an element.
-  /// @tparam T The type of the element to query.
-  /// @param x An instance of type `T`.
-  /// @return A frequency estimate for *x*.
-  size_t lookup(const T &o) const {
-    size_t val = (1ull << width_) - 1;
-    for (size_t i = 0; i < this->num_hashes_; ++i) {
-      digest d = this->hasher_(o, i);
-      size_t cell_id = d - this->cells_ * (d / this->cells_); // Use division here in order to test stuff like libidivide
-      size_t pos = cell_id / this->cells_per_entry_;
-      size_t epos = cell_id - pos * this->cells_per_entry_;
-      uint64_t entry = (this->data_[pos] >> (width_ * epos)) & this->cell_mask_;
-      size_t cval = (entry == 0 ? 0 : 64 - __builtin_clzll(entry));
-      
-      if (val > cval)
-        val = cval;
+    using typename counting_bloom_filter<T, width_>::digest;
+    using typename counting_bloom_filter<T, width_>::hasher;
+
+public:
+    bitcounting_bloom_filter(hasher h,
+                             size_t cells, size_t num_hashes = 3)
+            : counting_bloom_filter<T, width_>(h, cells, num_hashes) { }
+
+    /// Adds an element to the Bloom filter.
+    /// @tparam T The type of the element to insert.
+    /// @param x An instance of type `T`.
+    void add(const T &o) {
+        for (size_t i = 0; i < this->num_hashes_; ++i) {
+            digest d = this->hasher_(o, i);
+            size_t cell_id = d - this->cells_ *
+                                 (d / this->cells_); // Use division here in order to test stuff like libidivide
+            size_t pos = cell_id / this->cells_per_entry_;
+            size_t epos = cell_id - pos * this->cells_per_entry_;
+            auto &entry = this->data_[pos];
+            uint64_t mask = this->cell_mask_ << (width_ * epos);
+
+            // Add counter
+            while (true) {
+                uint64_t val = entry.load() & mask;
+
+                // Overflow, do nothing
+                if (val == mask)
+                    break;
+
+                uint64_t cellval = val >> width_ * epos;
+                size_t cnt = (cellval == 0 ? 0 : 64 - __builtin_clzll(cellval)) + width_ * epos;
+
+                if ((std::atomic_fetch_or(&entry, uint64_t(1) << cnt) & mask) != val)
+                    continue;
+
+                break;
+            }
+        }
     }
 
-    return val;
-  }
+    /// Retrieves the count of an element.
+    /// @tparam T The type of the element to query.
+    /// @param x An instance of type `T`.
+    /// @return A frequency estimate for *x*.
+    size_t lookup(const T &o) const {
+        size_t val = (1ull << width_) - 1;
+        for (size_t i = 0; i < this->num_hashes_; ++i) {
+            digest d = this->hasher_(o, i);
+            size_t cell_id = d - this->cells_ *
+                                 (d / this->cells_); // Use division here in order to test stuff like libidivide
+            size_t pos = cell_id / this->cells_per_entry_;
+            size_t epos = cell_id - pos * this->cells_per_entry_;
+            uint64_t entry = (this->data_[pos] >> (width_ * epos)) & this->cell_mask_;
+            size_t cval = (entry == 0 ? 0 : 64 - __builtin_clzll(entry));
+
+            if (val > cval)
+                val = cval;
+        }
+
+        return val;
+    }
 };
 
 
-} // namespace bf
+} // namespace bf
\ No newline at end of file
diff --git a/src/common/adt/chained_iterator.hpp b/src/common/adt/chained_iterator.hpp
index c7ef9d2..744c5d9 100644
--- a/src/common/adt/chained_iterator.hpp
+++ b/src/common/adt/chained_iterator.hpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <vector>
 
+namespace adt {
+
 template<class It>
 class chained_iterator :
         public boost::iterator_facade<chained_iterator<It>,
@@ -72,5 +74,5 @@ private:
     std::vector<It> ends_;
 };
 
-
+} //adt
 #endif
diff --git a/src/common/adt/concurrent_dsu.hpp b/src/common/adt/concurrent_dsu.hpp
index b45445c..1ff93a6 100644
--- a/src/common/adt/concurrent_dsu.hpp
+++ b/src/common/adt/concurrent_dsu.hpp
@@ -26,6 +26,8 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
 
+namespace dsu {
+
 class ConcurrentDSU {
     struct atomic_set_t {
         uint64_t data  : 61;
@@ -209,12 +211,12 @@ public:
         std::unordered_map<size_t, size_t> sizes;
 
 #if 0
-    for (size_t x = 0; x < size; ++x) {
-        if (data_[x].parent != x) {
-            size_t t = data_[x].parent;
-            VERIFY(data_[t].parent == t)
+        for (size_t x = 0; x < size; ++x) {
+            if (data_[x].parent != x) {
+                size_t t = data_[x].parent;
+                VERIFY(data_[t].parent == t)
+            }
         }
-    }
 #endif
 
         // Insert all the root elements into the map
@@ -256,7 +258,7 @@ public:
         os.close();
 
         // Write down the sizes
-        MMappedRecordWriter<size_t> index(Prefix + ".idx");
+        MMappedRecordWriter <size_t> index(Prefix + ".idx");
         index.reserve(sizes.size());
         size_t *idx = index.data();
         for (size_t x = 0, i = 0, sz = 0; x < data_.size(); ++x) {
@@ -292,6 +294,7 @@ private:
     mutable std::vector<std::atomic<atomic_set_t> > data_;
 };
 
+} //dsu
 #pragma GCC diagnostic pop
 
 #endif /* CONCURRENTDSU_HPP_ */
diff --git a/src/common/adt/filter_iterator.hpp b/src/common/adt/filter_iterator.hpp
index fc5293a..5cac88a 100644
--- a/src/common/adt/filter_iterator.hpp
+++ b/src/common/adt/filter_iterator.hpp
@@ -8,42 +8,44 @@
 #ifndef FILTER_ITERATOR_H_
 #define FILTER_ITERATOR_H_
 
+namespace adt {
+
 /**
- * Iterator with some predicate -- iterates only on elements with predicate(item) == true
- */
+* Iterator with some predicate -- iterates only on elements with predicate(item) == true
+*/
 template<typename iterator_type, typename predicate_type>
 class filter_iterator {
 public:
     typedef typename iterator_type::value_type value_type;
 
-    filter_iterator(const iterator_type& begin, const iterator_type& end, const predicate_type& pred):
-        current_(begin), end_(end), pred_(pred)
-    {
-        while((current_ != end_) && (!pred_(*current_))) // why do we need here? DRY, see method advance() below.
+    filter_iterator(const iterator_type &begin, const iterator_type &end, const predicate_type &pred) :
+            current_(begin), end_(end), pred_(pred) {
+        while ((current_ != end_) && (!pred_(*current_))) // why do we need here? DRY, see method advance() below.
             ++current_;
     } // filter_iterator
 
     value_type operator*() const { return *current_; }
     value_type operator->() const { return *current_; }
 
-    filter_iterator& operator++() { advance(); return *this; }
+    filter_iterator &operator++() {
+        advance();
+        return *this;
+    }
 
-    bool operator==(const filter_iterator& rhs) const { return current_ == rhs.current_; }
-    bool operator!=(const filter_iterator& rhs) const { return !(operator==(rhs)); }
+    bool operator==(const filter_iterator &rhs) const { return current_ == rhs.current_; }
+    bool operator!=(const filter_iterator &rhs) const { return !(operator==(rhs)); }
 
 private:
-    void advance()
-    {
-        do
-        {
+    void advance() {
+        do {
             ++current_;
         }
-        while((current_ != end_) && (!pred_(*current_)));
+        while ((current_ != end_) && (!pred_(*current_)));
     } // advance
 
     iterator_type current_;
     iterator_type end_;
     predicate_type pred_;
 };
-
+} //adt
 #endif /* FILTER_ITERATOR_H_ */
diff --git a/src/common/adt/flat_map.hpp b/src/common/adt/flat_map.hpp
index a12e1a8..677e8eb 100644
--- a/src/common/adt/flat_map.hpp
+++ b/src/common/adt/flat_map.hpp
@@ -15,14 +15,16 @@ struct flat_map {
     typedef V mapped_type;
     typedef std::pair<K, V> value_type;
     typedef Comp key_compare;
+
     struct value_compare : std::binary_function<value_type, value_type, bool> {
-        bool operator()(const value_type & lhs, const value_type & rhs) const {
+        bool operator()(const value_type &lhs, const value_type &rhs) const {
             return key_compare()(lhs.first, rhs.first);
         }
     };
+
     typedef Allocator allocator_type;
-    typedef V& reference;
-    typedef const V& const_reference;
+    typedef V &reference;
+    typedef const V &const_reference;
     typedef typename std::allocator_traits<allocator_type>::pointer pointer;
     typedef typename std::allocator_traits<allocator_type>::const_pointer const_pointer;
     typedef std::vector<value_type, allocator_type> container_type;
@@ -34,10 +36,12 @@ struct flat_map {
     typedef typename container_type::size_type size_type;
 
     flat_map() = default;
+
     template<typename It>
     flat_map(It begin, It end) { insert(begin, end); }
+
     flat_map(std::initializer_list<value_type> init)
-            : flat_map(init.begin(), init.end()) {}
+            : flat_map(init.begin(), init.end()) { }
 
     iterator                begin()              {    return data_.begin();    }
     iterator                end()                {    return data_.end();      }
@@ -56,11 +60,11 @@ struct flat_map {
     size_type size() const { return data_.size(); }
     size_type max_size() const { return data_.max_size(); }
     size_type capacity() const { return data_.capacity(); }
-    void reserve(size_type size) {data_.reserve(size); }
+    void reserve(size_type size) { data_.reserve(size); }
     void shrink_to_fit() { data_.shrink_to_fit(); }
     size_type bytes_used() const { return capacity() * sizeof(value_type) + sizeof(data_); }
 
-    mapped_type & operator[](const key_type &key) {
+    mapped_type &operator[](const key_type &key) {
         KeyOrValueCompare comp;
         auto lower = lower_bound(key);
         if (lower == end() || comp(key, *lower))
@@ -68,7 +72,8 @@ struct flat_map {
         else
             return lower->second;
     }
-    mapped_type & operator[](key_type &&key) {
+
+    mapped_type &operator[](key_type &&key) {
         KeyOrValueCompare comp;
         auto lower = lower_bound(key);
         if (lower == end() || comp(key, *lower))
@@ -80,12 +85,15 @@ struct flat_map {
     std::pair<iterator, bool> insert(value_type &&value) {
         return emplace(std::move(value));
     }
+
     std::pair<iterator, bool> insert(const value_type &value) {
         return emplace(value);
     }
+
     iterator insert(const_iterator hint, value_type &&value) {
         return emplace_hint(hint, std::move(value));
     }
+
     iterator insert(const_iterator hint, const value_type &value) {
         return emplace_hint(hint, value);
     }
@@ -99,7 +107,7 @@ struct flat_map {
         }
         if (begin == end)
             return;
-        
+
         // If we don't need to increase capacity, then we can use a more efficient
         // insert method where everything is just put in the same vector
         // and then merge in place.
@@ -108,7 +116,7 @@ struct flat_map {
             for (size_t i = capacity(); i > size_before && begin != end; --i, ++begin) {
                 data_.emplace_back(*begin);
             }
-        } catch(...) {
+        } catch (...) {
             // If emplace_back throws an exception, the easiest way to make sure
             // that our invariants are still in place is to resize to the state
             // we were in before
@@ -139,15 +147,19 @@ struct flat_map {
         // Insert the remaining elements that didn't fit by calling this function recursively.
         return insert(begin, end);
     }
+
     void insert(std::initializer_list<value_type> il) {
         insert(il.begin(), il.end());
     }
+
     iterator erase(iterator it) {
         return data_.erase(it);
     }
+
     iterator erase(const_iterator it) {
         return erase(iterator_const_cast(it));
     }
+
     size_type erase(const key_type &key) {
         auto found = find(key);
         if (found == end())
@@ -155,33 +167,40 @@ struct flat_map {
         erase(found);
         return 1;
     }
+
     iterator erase(const_iterator first, const_iterator last) {
         return data_.erase(iterator_const_cast(first), iterator_const_cast(last));
     }
-    void swap(flat_map & other) {
+
+    void swap(flat_map &other) {
         data_.swap(other.data);
     }
+
     void clear() {
         data_.clear();
     }
+
     template<typename First, typename... Args>
     std::pair<iterator, bool> emplace(First &&first, Args &&... args) {
         KeyOrValueCompare comp;
         auto lower_bound = std::lower_bound(data_.begin(), data_.end(), first, comp);
         if (lower_bound == data_.end() || comp(first, *lower_bound))
-            return { data_.emplace(lower_bound, std::forward<First>(first), std::forward<Args>(args)...), true };
+            return {data_.emplace(lower_bound, std::forward<First>(first), std::forward<Args>(args)...), true};
         else
-            return { lower_bound, false };
+            return {lower_bound, false};
     }
+
     std::pair<iterator, bool> emplace() {
         return emplace(value_type());
     }
+
     template<typename First, typename... Args>
     iterator emplace_hint(const_iterator hint, First &&first, Args &&... args) {
         KeyOrValueCompare comp;
         if (hint == cend() || comp(first, *hint)) {
             if (hint == cbegin() || comp(*(hint - 1), first))
-                return data_.emplace(iterator_const_cast(hint), std::forward<First>(first), std::forward<Args>(args)...);
+                return data_.emplace(iterator_const_cast(hint), std::forward<First>(first),
+                                     std::forward<Args>(args)...);
             else
                 return emplace(std::forward<First>(first), std::forward<Args>(args)...).first;
         } else if (!comp(*hint, first)) {
@@ -190,6 +209,7 @@ struct flat_map {
             return emplace(std::forward<First>(first), std::forward<Args>(args)...).first;
         }
     }
+
     iterator emplace_hint(const_iterator hint) {
         return emplace_hint(hint, value_type());
     }
@@ -218,11 +238,11 @@ struct flat_map {
         return std::lower_bound(begin(), end(), key, KeyOrValueCompare());
     }
     template<typename T>
-    const_iterator lower_bound(const T & key) const {
+    const_iterator lower_bound(const T &key) const {
         return std::lower_bound(begin(), end(), key, KeyOrValueCompare());
     }
     template<typename T>
-    iterator upper_bound(const T & key) {
+    iterator upper_bound(const T &key) {
         return std::upper_bound(begin(), end(), key, KeyOrValueCompare());
     }
     template<typename T>
@@ -260,7 +280,7 @@ struct flat_map {
         return !(*this < other);
     }
 
-  private:
+private:
     container_type data_;
 
     iterator iterator_const_cast(const_iterator it) {
@@ -301,7 +321,7 @@ struct flat_map {
     // like std::binary_search, but returns the iterator to the element
     // if it was found, and returns end otherwise
     template<typename It, typename T, typename Compare>
-    static It binary_find(It begin, It end, const T & value, const Compare & cmp) {
+    static It binary_find(It begin, It end, const T &value, const Compare &cmp) {
         auto lower_bound = std::lower_bound(begin, end, value, cmp);
         if (lower_bound == end || cmp(value, *lower_bound))
             return end;
@@ -311,10 +331,9 @@ struct flat_map {
 };
 
 template<typename K, typename V, typename C, typename A>
-void swap(flat_map<K, V, C, A> & lhs, flat_map<K, V, C, A> & rhs) {
+void swap(flat_map<K, V, C, A> &lhs, flat_map<K, V, C, A> &rhs) {
     lhs.swap(rhs);
 }
 
-}
-
+} //adt
 #endif
diff --git a/src/common/adt/flat_set.hpp b/src/common/adt/flat_set.hpp
index b4ee8e0..d6b13b6 100644
--- a/src/common/adt/flat_set.hpp
+++ b/src/common/adt/flat_set.hpp
@@ -7,17 +7,16 @@
 #include <algorithm>
 #include <type_traits>
 #include <functional>
-
 namespace adt {
 
-template<typename T, typename Comp = std::less<T>, template<typename, typename...> class Container = std::vector >
+template<typename T, typename Comp = std::less<T>, template<typename, typename...> class Container = std::vector>
 struct flat_set {
     typedef T key_type;
     typedef T value_type;
     typedef Comp key_compare;
     typedef Comp value_compare;
-    typedef value_type& reference;
-    typedef const value_type& const_reference;
+    typedef value_type &reference;
+    typedef const value_type &const_reference;
     typedef Container<value_type> container_type;
     typedef typename container_type::pointer pointer;
     typedef typename container_type::const_pointer const_pointer;
@@ -29,10 +28,12 @@ struct flat_set {
     typedef typename container_type::size_type size_type;
 
     flat_set() = default;
+
     template<typename It>
     flat_set(It begin, It end) {
         insert(begin, end);
     }
+
     flat_set(std::initializer_list<value_type> init)
             : flat_set(init.begin(), init.end()) { }
 
@@ -57,8 +58,8 @@ struct flat_set {
     void shrink_to_fit() { data_.shrink_to_fit(); }
     size_type bytes_used() const { return capacity() * sizeof(value_type) + sizeof(data_); }
 
-    std::pair<iterator, bool> insert(value_type && value) { return emplace(std::move(value)); }
-    std::pair<iterator, bool> insert(const value_type & value) { return emplace(value); }
+    std::pair<iterator, bool> insert(value_type &&value) { return emplace(std::move(value)); }
+    std::pair<iterator, bool> insert(const value_type &value) { return emplace(value); }
     iterator insert(const_iterator hint, value_type && value) { return emplace_hint(hint, std::move(value)); }
     iterator insert(const_iterator hint, const value_type & value) { return emplace_hint(hint, value); }
     void insert(std::initializer_list<value_type> il) { insert(il.begin(), il.end()); }
@@ -80,7 +81,7 @@ struct flat_set {
             for (size_t i = capacity(); i > size_before && begin != end; --i, ++begin) {
                 data_.emplace_back(*begin);
             }
-        } catch(...) {
+        } catch (...) {
             // If emplace_back throws an exception, the easiest way to make sure
             // that our invariants are still in place is to resize to the state
             // we were in before
@@ -110,6 +111,7 @@ struct flat_set {
         // this will recurse log(n) times where n is std::distance(begin, end)
         return insert(begin, end);
     }
+
     iterator erase(iterator it) { return data_.erase(it); }
     iterator erase(const_iterator it) { return erase(iterator_const_cast(it)); }
     size_type erase(const value_type &val) {
@@ -122,25 +124,30 @@ struct flat_set {
         return data_.erase(iterator_const_cast(first), iterator_const_cast(last));
     }
 
-    void swap(flat_set & other) { data_.swap(other.data); }
+    void swap(flat_set &other) { data_.swap(other.data); }
+
     void clear() { data_.clear(); }
 
     template<typename First, typename... Args>
-    std::pair<iterator, bool> emplace(First && first, Args &&... args) {
+    std::pair<iterator, bool> emplace(First &&first, Args &&... args) {
         Comp comp;
         auto lower_bound = std::lower_bound(data_.begin(), data_.end(), first, comp);
         if (lower_bound == data_.end() || comp(first, *lower_bound))
-            return { data_.emplace(lower_bound, std::forward<First>(first), std::forward<Args>(args)...), true };
+            return {data_.emplace(lower_bound, std::forward<First>(first), std::forward<Args>(args)...),
+                    true};
         else
-            return { lower_bound, false };
+            return {lower_bound, false};
     }
+
     std::pair<iterator, bool> emplace() { return emplace(value_type()); }
+
     template<typename First, typename... Args>
-    iterator emplace_hint(const_iterator hint, First && first, Args &&... args) {
+    iterator emplace_hint(const_iterator hint, First &&first, Args &&... args) {
         Comp comp;
         if (hint == cend() || comp(first, *hint)) {
             if (hint == cbegin() || comp(*(hint - 1), first))
-                return data_.emplace(iterator_const_cast(hint), std::forward<First>(first), std::forward<Args>(args)...);
+                return data_.emplace(iterator_const_cast(hint), std::forward<First>(first),
+                                     std::forward<Args>(args)...);
             else
                 return emplace(std::forward<First>(first), std::forward<Args>(args)...).first;
         } else if (!comp(*hint, first)) {
@@ -201,7 +208,7 @@ struct flat_set {
         return !(*this < other);
     }
 
-  private:
+private:
     container_type data_;
 
     iterator iterator_const_cast(const_iterator it) {
@@ -221,10 +228,9 @@ struct flat_set {
 };
 
 template<typename V, typename C, template<typename, typename...> class Container>
-void swap(flat_set<V, C, Container> & lhs, flat_set<V, C, Container> & rhs) {
+void swap(flat_set<V, C, Container> &lhs, flat_set<V, C, Container> &rhs) {
     lhs.swap(rhs);
 }
 
-}
-
+} //adt
 #endif // __ADT_FLAT_SET_HPP__
diff --git a/src/common/adt/hll.hpp b/src/common/adt/hll.hpp
index ab24fbe..bc77cab 100644
--- a/src/common/adt/hll.hpp
+++ b/src/common/adt/hll.hpp
@@ -4,21 +4,21 @@
 #include <functional>
 #include <numeric>
 #include <cmath>
-
 namespace hll {
-  template<class T, unsigned precision = 24>
-  class hll {
+
+template<class T, unsigned precision = 24>
+class hll {
     static constexpr uint64_t m_ = 1ull << precision;
     static constexpr uint64_t mask_ = (m_ - 1) << (64 - precision);
-    
+
     constexpr double alpha(unsigned p) const {
       // constexpr switches are C++14 only :(
       return (p > 6 ?
               0.7213 / (1.0 + 1.079 / double(1ull << p)) :
               p == 6 ? 0.709 : p == 5 ? 0.697 : 0.673);
     }
-    
-   public:
+
+public:
     /// The hash digest type.
     typedef uint64_t digest;
 
@@ -26,8 +26,7 @@ namespace hll {
     typedef std::function<digest(const T)> hasher;
 
     hll(hasher h)
-      : hasher_(std::move(h)), data_(1ull << precision, 0)
-    { }
+            : hasher_(std::move(h)), data_(1ull << precision, 0) { }
 
 
     /// @tparam T The type of the element to insert.
@@ -46,24 +45,24 @@ namespace hll {
       for (size_t i = 0; i < data_.size(); ++i)
         data_[i] = std::max(data_[i], other.data_[i]);
     }
-    
+
     std::pair<double, bool> cardinality() const {
       // FIXME: Precision loss?
       // FIXME: Bias correction!
       double res = alpha(precision) * m_ * m_;
       double E = std::accumulate(data_.begin(), data_.end(),
-                                 0.0, [](double a, uint8_t b) { return a + exp2(-(double)b); });
+                                 0.0, [](double a, uint8_t b) { return a + exp2(-(double) b); });
       res /= E;
-      return { res, res > 5.0 * m_/2 };
+      return {res, res > 5.0 * m_ / 2};
     }
 
     void clear() {
-        std::fill(data_.begin(), data_.end(), 0);
+      std::fill(data_.begin(), data_.end(), 0);
     }
-      
-   private:
+
+private:
     hasher hasher_;
     std::vector<uint8_t> data_;
-  };
+};
 
-} //namespace hll
+} // hll
\ No newline at end of file
diff --git a/src/common/adt/iterator_range.hpp b/src/common/adt/iterator_range.hpp
index 7b5db6b..8f9f17a 100644
--- a/src/common/adt/iterator_range.hpp
+++ b/src/common/adt/iterator_range.hpp
@@ -9,7 +9,6 @@
 
 #include <utility>
 #include <iterator>
-
 namespace adt {
 
 template<typename IteratorT>
@@ -26,9 +25,10 @@ public:
             : begin_iterator(std::move(begin_iterator)),
               end_iterator(std::move(end_iterator)) { }
 
-    IteratorT begin() const { return begin_iterator; }
-
-    IteratorT end() const { return end_iterator; }
+    const IteratorT& begin() const { return begin_iterator; }
+    const IteratorT& end() const { return end_iterator; }
+    IteratorT& begin() { return begin_iterator; }
+    IteratorT& end() { return end_iterator; }
 };
 
 template<class T>
@@ -45,6 +45,7 @@ template<typename T>
 iterator_range<decltype(begin(std::declval<T>()))> drop_begin(T &&t, int n) {
     return make_range(std::next(begin(t), n), end(t));
 }
-}
 
+
+} //adt
 #endif
diff --git a/src/common/adt/kmer_hash_vector.hpp b/src/common/adt/kmer_hash_vector.hpp
deleted file mode 100644
index fcc486f..0000000
--- a/src/common/adt/kmer_hash_vector.hpp
+++ /dev/null
@@ -1,370 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-/*
- * kmer_hash_vector.hpp
- *
- *  Created on: Jul 19, 2012
- *      Author: alex
- */
-
-#ifndef KMER_HASH_VECTOR_HPP_
-#define KMER_HASH_VECTOR_HPP_
-
-
-#include "sequence/runtime_k.hpp"
-#include "kmer_map.hpp"
-
-
-namespace runtime_k {
-
-class IKmerHashVector {
-
-protected:
-    static const size_t LOAD_OVERHEAD = 1000;
-
-    size_t      nthreads_;
-
-    size_t      cell_size_;
-
-public:
-    typedef RtSeq input_value_type;
-
-    IKmerHashVector(size_t nthreads)
-        : nthreads_     (nthreads)
-        , cell_size_    (LOAD_OVERHEAD) {
-    }
-
-    virtual ~IKmerHashVector() {
-
-    }
-
-    virtual IKmerHashVector * copy() const = 0;
-
-    virtual void clear() = 0;
-
-    virtual void clear(size_t i) = 0;
-
-    virtual bool is_full() const = 0;
-
-    virtual bool is_presisely_full() const = 0;
-
-    virtual size_t capacity(size_t i) const = 0;
-
-    virtual size_t size(size_t i) const = 0;
-
-
-    virtual void insert(const input_value_type& value) = 0;
-
-    virtual void reserve(size_t cell_size) = 0;
-
-
-    virtual size_t get_k() const = 0;
-
-    size_t get_threads_num() const
-    {
-        return nthreads_;
-    }
-
-    virtual void dump (KmerMap<int>& destination, size_t bucketNum) = 0;
-};
-
-
-
-class KmerHashVector {
-
-public:
-
-    typedef IKmerHashVector base_vector_type;
-
-private:
-
-    base_vector_type * data_;
-
-public:
-
-    typedef KmerHashVector vector_type;
-
-    typedef base_vector_type::input_value_type input_value_type;
-
-
-    KmerHashVector(size_t k, size_t nthreads);
-
-    KmerHashVector(base_vector_type * vec): data_(vec) {
-    }
-
-    KmerHashVector(const vector_type& vec) {
-        data_ = vec.data_->copy();
-    }
-
-    vector_type& operator=(const vector_type& vec) {
-        if (vec.data_ != data_) {
-            delete data_;
-            data_ = vec.data_->copy();
-        }
-
-        return *this;
-    }
-
-    ~KmerHashVector() {
-       delete data_;
-    }
-
-
-
-    bool is_full() const {
-        return data_->is_full();
-    }
-
-    bool is_presisely_full() const {
-        return data_->is_presisely_full();
-    }
-
-    size_t get_threads_num() const
-    {
-        return data_->get_threads_num();
-    }
-
-
-    void insert(const input_value_type& value) {
-        data_->insert(value);
-    }
-
-    void clear() {
-        data_->clear();
-    }
-
-
-    void clear(size_t i) {
-        data_->clear(i);
-    }
-
-    size_t get_k() const {
-        return data_->get_k();
-    }
-
-    size_t capacity(size_t i) const {
-        return data_->capacity(i);
-    }
-
-    void reserve(size_t cell_size) {
-        data_->reserve(cell_size);
-    }
-
-    base_vector_type * get_data() const {
-        return data_;
-    }
-
-    void print_sizes() {
-        for (size_t i = 0; i < data_->get_threads_num(); ++i) {
-            INFO("Size " << i << ": " << data_->size(i));
-        }
-    }
-
-    void dump (KmerMap<int>& destination, size_t bucketNum) {
-        data_->dump(destination, bucketNum);
-    }
-};
-
-
-// ================================= VECTOR IMPLEMENTATION =================================
-
-template <size_t size_>
-class KmerHashVectorImpl: public IKmerHashVector {
-
-public:
-
-    typedef TypeContainerImpl<size_> type_container;
-
-    typedef typename type_container::Kmer Kmer;
-
-    typedef typename type_container::vector_type vector_type;
-
-    typedef std::vector<vector_type> data_type;
-
-    typedef IKmerHashVector base_type;
-
-    typedef typename base_type::input_value_type input_value_type;
-
-private:
-
-    data_type data_;
-
-    size_t k_;
-
-public:
-
-    KmerHashVectorImpl(size_t k, size_t nthreads):
-        IKmerHashVector(nthreads)
-        , data_      (nthreads)
-        , k_         (k)   {
-    }
-
-    virtual base_type * copy() const {
-        return new KmerHashVectorImpl<size_>(*this);
-    }
-
-    virtual bool is_full() const {
-        return data_[0].size() >= cell_size_;
-    }
-
-    virtual bool is_presisely_full() const {
-        for (size_t i = 0; i < nthreads_; ++i) {
-            if (data_[i].size() >= cell_size_)
-                return true;
-        }
-        return false;
-    }
-
-    virtual void insert(const input_value_type& value) {
-        Kmer kmer = type_container::from_sequence(value);
-        data_[kmer.GetHash() % nthreads_].push_back(kmer);
-    }
-
-    virtual void clear() {
-        for (size_t i = 0; i < nthreads_; ++i) {
-            data_[i].clear();
-        }
-    }
-
-    virtual void clear(size_t i) {
-        data_[i].clear();
-    }
-
-    virtual size_t get_k() const {
-        return k_;
-    }
-
-    virtual size_t capacity(size_t i) const {
-        return data_[i].capacity();
-    }
-
-    virtual size_t size(size_t i) const {
-        return data_[i].size();
-    }
-
-    virtual void reserve(size_t cell_size) {
-        cell_size_ = cell_size;
-        for (size_t i = 0; i < nthreads_; ++i) {
-            data_[i].reserve(cell_size_ + LOAD_OVERHEAD);
-        }
-    }
-
-    const data_type& get_data() const {
-        return data_;
-    }
-
-    virtual void dump (KmerMap<int>& destination, size_t bucketNum) {
-        KmerMapImpl<size_, int>& destImpl = dynamic_cast<KmerMapImpl<size_, int>&>(destination.get_data());
-
-        for (auto it = data_[bucketNum].begin(), end = data_[bucketNum].end(); it != end; ++it) {
-            ++destImpl[*it];
-        }
-    }
-};
-
-
-// ================================= VECTOR FACTORIES =================================
-// Single factory interface
-class SingleKmerHashVectorFactory {
-
-public:
-
-    virtual IKmerHashVector * GetHashVector(size_t k, size_t nthreads) const = 0;
-
-    virtual ~SingleKmerHashVectorFactory() {
-
-    }
-};
-
-
-// Single factory for specific k and value
-template <size_t ts_>
-class SingleKmerHashVectorFactoryImpl: public SingleKmerHashVectorFactory {
-
-public:
-
-    virtual IKmerHashVector * GetHashVector(size_t k, size_t nthreads) const {
-        VERIFY_MSG(GET_UPPER_BOUND(k) == GET_K_BY_TS(ts_), k << " -> " << GET_UPPER_BOUND(k) << ", " << ts_ << " -> " << GET_K_BY_TS(ts_));
-        //INFO(k << " -> " << GET_UPPER_BOUND(k) << ", " << ts_ << " -> " << GET_K_BY_TS(ts_));
-
-        return new KmerHashVectorImpl< GET_K_BY_TS(ts_) >(k, nthreads);
-    }
-
-};
-
-//Factory genetator
-template<size_t ts_>
-class HashVectorGenerator {
-
-public:
-
-    static void GenerateHashVectors(std::vector< SingleKmerHashVectorFactory* > & factories) {
-        factories[ts_] = new SingleKmerHashVectorFactoryImpl<ts_>();
-        HashVectorGenerator<ts_ - 1> :: GenerateHashVectors (factories);
-    }
-};
-
-//Terminating factory generator
-template<>
-class HashVectorGenerator<MIN_TS> {
-
-public:
-
-    static void GenerateHashVectors(std::vector< SingleKmerHashVectorFactory* > & factories) {
-        factories[MIN_TS] = new SingleKmerHashVectorFactoryImpl<MIN_TS>;
-    }
-};
-
-
-//Lazy singleton for factory for every required value
-class KmerHashVectorFactory {
-
-private:
-
-    std::vector < SingleKmerHashVectorFactory* > single_factories_;
-
-    KmerHashVectorFactory() {
-        VERIFY_MSG(MIN_K <= MAX_K, "Invalid K value range");
-
-        single_factories_ = std::vector < SingleKmerHashVectorFactory* >(MAX_TS + 1);
-        HashVectorGenerator<MAX_TS>::GenerateHashVectors(single_factories_);
-    }
-
-public:
-
-    static KmerHashVectorFactory& GetInstance() {
-        static KmerHashVectorFactory instance;
-
-        return instance;
-    }
-
-    KmerHashVector GetHashVector(size_t k, size_t nthreads) {
-        VERIFY_MSG(k >= MIN_K && k <= MAX_K, "K value " + ToString(k) + " is not supported, should be >= " +
-                ToString(MIN_K) + " and <= " + ToString(MAX_K));
-
-        return KmerHashVector(single_factories_[GET_T_ELEMENTS_NUMBER(k)]->GetHashVector(k, nthreads));
-    }
-
-    IKmerHashVector * GetRawHashVector(size_t k, size_t nthreads) {
-        VERIFY_MSG(k >= MIN_K && k <= MAX_K, "K value " + ToString(k) + " is not supported, should be >= " +
-                ToString(MIN_K) + " and <= " + ToString(MAX_K));
-
-        return single_factories_[GET_T_ELEMENTS_NUMBER(k)]->GetHashVector(k, nthreads);
-    }
-};
-
-KmerHashVector GetHashVector(size_t k, size_t nthreads) {
-    return KmerHashVectorFactory::GetInstance().GetHashVector(k, nthreads);
-}
-
-KmerHashVector::KmerHashVector(size_t k, size_t nthreads): data_(KmerHashVectorFactory::GetInstance().GetRawHashVector(k, nthreads)) {
-}
-
-} //namespace runtime_k
-
-#endif /* KMER_HASH_VECTOR_HPP_ */
diff --git a/src/common/adt/kmer_vector.hpp b/src/common/adt/kmer_vector.hpp
index 2be2fb2..dc01fdf 100644
--- a/src/common/adt/kmer_vector.hpp
+++ b/src/common/adt/kmer_vector.hpp
@@ -17,6 +17,8 @@
 
 #endif
 
+namespace adt {
+
 template<class Seq>
 class KMerVector {
 private:
@@ -37,13 +39,13 @@ private:
         je_free(storage_);
         storage_ = res;
 #else
-    // No JEMalloc, no cookies
-    ElTy *res = new ElTy[capacity_ * el_sz_];
-    if (storage_)
-      std::memcpy(res, storage_, size_ * sizeof(ElTy) * el_sz_);
+        // No JEMalloc, no cookies
+        ElTy *res = new ElTy[capacity_ * el_sz_];
+        if (storage_)
+            std::memcpy(res, storage_, size_ * sizeof(ElTy) * el_sz_);
 
-    delete[] storage_;
-    storage_ = res;
+        delete[] storage_;
+        storage_ = res;
 #endif
 
         return storage_;
@@ -59,12 +61,14 @@ public:
     typedef array_equal_to<ElTy> equal_to;
 
     explicit KMerVector(unsigned K, size_t capacity = 1)
-            : K_(K), size_(0), capacity_(std::max(capacity, (size_t) 1)), el_sz_(Seq::GetDataSize(K)), storage_(NULL),
+            : K_(K), size_(0), capacity_(std::max(capacity, (size_t) 1)), el_sz_(Seq::GetDataSize(K)),
+              storage_(NULL),
               vector_(realloc(), size_, el_sz_) {
     }
 
     KMerVector(KMerVector &&that)
-            : K_(that.K_), size_(that.size_), capacity_(that.capacity_), el_sz_(that.el_sz_), storage_(that.storage_),
+            : K_(that.K_), size_(that.size_), capacity_(that.capacity_), el_sz_(that.el_sz_),
+              storage_(that.storage_),
               vector_(storage_, size_, el_sz_) {
         that.storage_ = NULL;
     }
@@ -188,5 +192,5 @@ private:
     array_vector<ElTy> vector_;
 };
 
-
+} //adt
 #endif /* __KMER_VECTOR_HPP */
diff --git a/src/common/adt/loser_tree.hpp b/src/common/adt/loser_tree.hpp
index 7dbab36..797cdf1 100644
--- a/src/common/adt/loser_tree.hpp
+++ b/src/common/adt/loser_tree.hpp
@@ -8,7 +8,10 @@ namespace adt {
 template<typename IntegerType>
 IntegerType ilog2(IntegerType x) {
     IntegerType lg = 0;
-    while (x >= 256) { x >>= 8; lg += 8; }
+    while (x >= 256) {
+        x >>= 8;
+        lg += 8;
+    }
     while (x >>= 1) lg += 1;
 
     return lg;
@@ -20,9 +23,9 @@ IntegerType ilog2ceil(IntegerType x) {
 }
 
 template<class It, class Cmp>
-class loser_tree  {
+class loser_tree {
     typedef typename std::iterator_traits<It>::value_type value_type;
-    
+
     size_t log_k_;
     size_t k_;
     std::vector<size_t> entry_;
@@ -34,10 +37,10 @@ class loser_tree  {
             return true;
         if (a.end() == a.begin())
             return false;
-        
+
         return inner_cmp_(*a.begin(), *b.begin());
     }
-    
+
     size_t init_winner(size_t root) {
         if (root >= k_)
             return root - k_;
@@ -53,7 +56,7 @@ class loser_tree  {
         }
     }
 
-  public:
+public:
     loser_tree(const std::vector<adt::iterator_range<It>> &runs,
                Cmp inner_cmp = Cmp())
             : inner_cmp_(inner_cmp), runs_(runs) {
@@ -61,11 +64,11 @@ class loser_tree  {
         k_ = (size_t(1) << log_k_);
 
         // fprintf(stderr, "k: %zu, logK: %zu, nruns: %zu\n", k_, log_k_, runs.size());
-        
+
         entry_.resize(2 * k_);
         for (size_t i = 0; i < k_; ++i)
             entry_[k_ + i] = i;
-        
+
         // Insert sentinels
         for (size_t i = runs.size(); i < k_; ++i)
             runs_.emplace_back(adt::make_range(runs_[0].end(), runs_[0].end()));
@@ -96,7 +99,7 @@ class loser_tree  {
         const auto &winner = runs_[winner_index];
         return (winner.begin() == winner.end());
     }
-    
+
 
     template<class It2>
     size_t multi_merge(It2 out, size_t amount = -1ULL) {
@@ -112,7 +115,7 @@ class loser_tree  {
 
             winner_index = replay(winner_index);
         }
-        
+
         entry_[0] = winner_index;
 
         return cnt;
@@ -125,10 +128,10 @@ class loser_tree  {
 
         return res;
     }
-    
 
-  private:
+
+private:
     std::vector<adt::iterator_range<It>> runs_;
 };
 
-}
\ No newline at end of file
+} //adt
\ No newline at end of file
diff --git a/src/common/adt/parallel_seq_vector.hpp b/src/common/adt/parallel_seq_vector.hpp
deleted file mode 100644
index 44c8d6c..0000000
--- a/src/common/adt/parallel_seq_vector.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#pragma once
-
-#include "parallel_unordered_map.hpp"
-#include "utils/openmp_wrapper.h"
-
-#include "sequence/runtime_k.hpp"
-#include "kmer_map.hpp"
-#include "kmer_hash_vector.hpp"
-
-class ParallelSeqVector {
-
-public:
-    typedef runtime_k::KmerHashVector par_container_t;
-
-    typedef runtime_k::KmerMap<int> destination_container_t;
-
-    typedef RtSeq Kmer;
-
-private:
-
-    size_t k_;
-
-    size_t nthreads_;
-
-    std::vector<par_container_t> nodes_;
-
-public:
-
-    ParallelSeqVector(size_t k, size_t nthreads, size_t cell_size) :
-        k_(k),
-        nthreads_(nthreads),
-        nodes_()
-
-    {
-        for (size_t i = 0; i < nthreads_; ++i) {
-            nodes_.push_back(runtime_k::GetHashVector(k_, nthreads_));
-        }
-
-        for (size_t i = 0; i < nthreads_; ++i) {
-            nodes_[i].reserve(cell_size);
-        }
-    }
-
-
-    void AddEdge(const Kmer &kmer, size_t thread_number) {
-        nodes_[thread_number].insert(kmer);
-    }
-
-    void CountSequence(const Sequence& s, size_t thread_number) {
-        if (s.size() < k_)
-            return;
-
-        Kmer kmer = s.start<Kmer>(k_);
-
-        AddEdge(kmer, thread_number);
-        for (size_t j = k_; j < s.size(); ++j) {
-            kmer <<= s[j];
-            AddEdge(kmer, thread_number);
-        }
-
-    }
-//
-//    void MergeMaps(destination_container_t & dest_container, size_t i) {
-//        for (size_t j = 0; j < nthreads_; ++j) {
-//            dest_container.transfer(nodes_[j], i);
-//        }
-//    }
-
-    void Dump(destination_container_t & bucket, size_t bucket_number) {
-        for (size_t i = 0; i < nodes_.size(); ++i) {
-            nodes_[i].dump(bucket, bucket_number);
-            nodes_[i].clear(bucket_number);
-        }
-    }
-
-
-    size_t SingleBucketCount() const {
-        return nodes_[0].capacity(0);
-    }
-
-    bool IsFull(size_t i) const {
-        return nodes_[i].is_full();
-    }
-
-    void Clear(size_t i) {
-        nodes_[i].clear();
-    }
-
-    void Clear() {
-        for (size_t i = 0; i < nthreads_; ++i) {
-            nodes_[i].clear();
-        }
-    }
-
-    void print_sizes() {
-        for (size_t i = 0; i < nodes_.size(); ++i) {
-            INFO("Size " << i << "::: ");
-            nodes_[i].print_sizes();
-        }
-    }
-
-
-};
diff --git a/src/common/adt/parallel_unordered_map.hpp b/src/common/adt/parallel_unordered_map.hpp
deleted file mode 100644
index 5faf990..0000000
--- a/src/common/adt/parallel_unordered_map.hpp
+++ /dev/null
@@ -1,137 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#pragma once 
-
-#include <unordered_set>
-
-template<class T, class Hash, class KeyEqual>
-struct parallel_unordered_set
-{
-private:
-
-    typedef std::unordered_set<T, Hash, KeyEqual>                      origin_container_t;
-
-    typedef std::vector<origin_container_t>                                 container_arr_t;
-
-    typedef typename origin_container_t::value_type                         value_type;
-
-    public:
-        parallel_unordered_set(size_t nthreads, size_t cell_size = 100000)
-            : nthreads_     (nthreads)
-            , buckets_      (nthreads, origin_container_t(cell_size)) {
-
-        }
-
-        void insert(const value_type& value, size_t bucket_num)
-        {
-            buckets_[bucket_num].insert(value);
-        }
-
-        const origin_container_t & operator[](size_t i) const
-        {
-            return buckets_[i];
-        }
-
-        size_t get_threads_num() const
-        {
-            return nthreads_;
-        }
-
-        const container_arr_t & get_buckets() const
-        {
-            return buckets_;
-        }
-
-        void clear() {
-            for (size_t i = 0; i < nthreads_; ++i) {
-                buckets_[i].clear();
-            }
-        }
-
-    private:
-        parallel_unordered_set& operator=(const parallel_unordered_set&);
-
-    private:
-        size_t      nthreads_;
-        container_arr_t     buckets_;
-};
-
-
-
-template<class T>
-struct parallel_vector
-{
-    private:
-        static const size_t LOAD_OVERHEAD = 1000;
-
-        typedef std::vector<T>                                                  origin_container_t;
-        typedef std::vector<origin_container_t>                                 container_arr_t;
-        typedef typename origin_container_t::value_type                         value_type;
-
-    public:
-        parallel_vector(size_t nthreads, size_t cell_size = 100000)
-            : nthreads_     (nthreads)
-            , cell_size_    (cell_size)
-            , buckets_      (nthreads) {
-
-            for (size_t i = 0; i < nthreads_; ++i) {
-                buckets_[i].reserve(cell_size + LOAD_OVERHEAD);
-            }
-        }
-
-        void insert(const value_type& value, size_t bucket_num)
-        {
-            buckets_[bucket_num].push_back(value);
-        }
-
-        const origin_container_t & operator[](size_t i) const
-        {
-            return buckets_[i];
-        }
-
-        origin_container_t & operator[](size_t i)  {
-            return buckets_[i];
-        }
-
-        size_t get_threads_num() const
-        {
-            return nthreads_;
-        }
-
-        const container_arr_t & get_buckets() const
-        {
-            return buckets_;
-        }
-
-        bool is_full() const {
-            return buckets_[0].size() >= cell_size_;
-        }
-
-        bool is_presisely_full() const {
-            for (size_t i = 0; i < nthreads_; ++i) {
-                if (buckets_[i].size() >= cell_size_)
-                    return true;
-            }
-            return false;
-        }
-
-        void clear() {
-            for (size_t i = 0; i < nthreads_; ++i) {
-                buckets_[i].clear();
-            }
-        }
-
-
-    private:
-        parallel_vector& operator=(const parallel_vector&);
-
-    private:
-        size_t      nthreads_;
-        size_t      cell_size_;
-        container_arr_t     buckets_;
-};
diff --git a/src/common/adt/pointer_iterator.hpp b/src/common/adt/pointer_iterator.hpp
index 3f2e5a2..1b304d0 100644
--- a/src/common/adt/pointer_iterator.hpp
+++ b/src/common/adt/pointer_iterator.hpp
@@ -14,6 +14,8 @@
 #include <stdexcept>
 #include <vector>
 
+namespace adt {
+
 template<typename T>
 class pointer_iterator : public std::iterator<std::random_access_iterator_tag, T> {
 protected:
@@ -171,4 +173,6 @@ operator-(const pointer_iterator<T> &r1,
     return (r1.data_ - r2.data_);
 }
 
+} //adt
+
 #endif // __HAMMER_POINTER_ITERATOR_HPP__
diff --git a/src/common/adt/queue_iterator.hpp b/src/common/adt/queue_iterator.hpp
index 5a867af..c5f6fb5 100644
--- a/src/common/adt/queue_iterator.hpp
+++ b/src/common/adt/queue_iterator.hpp
@@ -11,6 +11,10 @@
 #include "utils/verify.hpp"
 #include <set>
 
+
+namespace adt {
+
+
 template<typename T, typename Comparator>
 class erasable_priority_queue {
 private:
@@ -20,13 +24,13 @@ public:
      * Be careful! This constructor requires Comparator to have default constructor even if you call it with
      * specified comparator. In this case just create default constructor with VERIFY(false) inside it.
      */
-    erasable_priority_queue(const Comparator& comparator = Comparator()) :
+    erasable_priority_queue(const Comparator &comparator = Comparator()) :
         storage_(comparator) {
     }
 
     template<typename InputIterator>
     erasable_priority_queue(InputIterator begin, InputIterator end,
-            const Comparator& comparator = Comparator()) :
+            const Comparator &comparator = Comparator()) :
         storage_(begin, end, comparator) {
     }
 
@@ -40,11 +44,11 @@ public:
         return *(storage_.begin());
     }
 
-    void push(const T& key) {
+    void push(const T &key) {
         storage_.insert(key);
     }
 
-    bool erase(const T& key) {
+    bool erase(const T &key) {
         bool res = storage_.erase(key) > 0;
         return res;
     }
@@ -78,7 +82,7 @@ class DynamicQueueIterator {
 
 public:
 
-    DynamicQueueIterator(const Comparator& comparator = Comparator()) :
+    DynamicQueueIterator(const Comparator &comparator = Comparator()) :
         current_actual_(false), current_deleted_(false), queue_(comparator) {
     }
 
@@ -87,11 +91,11 @@ public:
         queue_.insert(begin, end);
     }
 
-    void push(const T& to_add) {
+    void push(const T &to_add) {
         queue_.push(to_add);
     }
 
-    void erase(const T& to_remove) {
+    void erase(const T &to_remove) {
         if (current_actual_ && to_remove == current_) {
             current_deleted_ = true;
         }
@@ -114,7 +118,7 @@ public:
 
     const T& operator*() {
         VERIFY(!queue_.empty());
-        if(!current_actual_ || current_deleted_) {
+        if (!current_actual_ || current_deleted_) {
             current_ = queue_.top();
             current_actual_ = true;
             current_deleted_ = false;
@@ -137,7 +141,7 @@ public:
     }
 
 };
-
+} //adt
 
 #endif /* QUEUE_ITERATOR_HPP_ */
 
diff --git a/src/common/adt/small_pod_vector.hpp b/src/common/adt/small_pod_vector.hpp
index 184e1bc..25cfd45 100644
--- a/src/common/adt/small_pod_vector.hpp
+++ b/src/common/adt/small_pod_vector.hpp
@@ -62,7 +62,8 @@ public:
 #define IS_TRIVIALLY_COPYABLE(T) std::is_trivially_copyable<T>::value
 #endif
 
-    static_assert(IS_TRIVIALLY_COPYABLE(value_type), "Value type for SmallPODVector should be trivially copyable");
+    static_assert(IS_TRIVIALLY_COPYABLE(value_type),
+                  "Value type for SmallPODVector should be trivially copyable");
 
 #undef IS_TRIVIALLY_COPYABLE
 
@@ -394,6 +395,5 @@ public:
 #undef LIKELY
 #undef UNLIKELY
 
-}
-
+} //adt
 #endif // __ADT_SMALL_POD_VECTOR__
diff --git a/src/common/assembly_graph/components/connected_component.cpp b/src/common/assembly_graph/components/connected_component.cpp
index 69a9dce..1ce680d 100644
--- a/src/common/assembly_graph/components/connected_component.cpp
+++ b/src/common/assembly_graph/components/connected_component.cpp
@@ -65,11 +65,11 @@ void ConnectedComponentCounter::CalculateComponents() const {
     return;
 }
 
-size_t ConnectedComponentCounter::GetComponent(EdgeId & e) const {
+size_t ConnectedComponentCounter::GetComponent(EdgeId e) const {
     if (component_ids_.size() == 0) {
         CalculateComponents();
     }
-    return component_ids_[e];
+    return component_ids_.at(e);
 }
 
 
diff --git a/src/common/assembly_graph/components/connected_component.hpp b/src/common/assembly_graph/components/connected_component.hpp
index 2fa958f..6289dd1 100644
--- a/src/common/assembly_graph/components/connected_component.hpp
+++ b/src/common/assembly_graph/components/connected_component.hpp
@@ -17,7 +17,7 @@ public:
     ConnectedComponentCounter(const Graph &g):g_(g) {}
     void CalculateComponents() const;
 //    size_t GetComponent(path_extend::BidirectionalPath * p) const;
-    size_t GetComponent(EdgeId & e) const;
+    size_t GetComponent(EdgeId e) const;
     bool IsFilled() const {
         return (component_ids_.size() != 0);
     }
diff --git a/src/common/assembly_graph/components/splitters.hpp b/src/common/assembly_graph/components/splitters.hpp
index 9aa5d0d..b3e37dd 100644
--- a/src/common/assembly_graph/components/splitters.hpp
+++ b/src/common/assembly_graph/components/splitters.hpp
@@ -277,8 +277,14 @@ private:
         const double coverage_bound_;
         const size_t edge_limit_;
         mutable size_t edge_summary_length_;
+        const size_t edge_summary_length_limit_;
 
         void Find(EdgeId edge, std::set<EdgeId> &result) const {
+
+            if (edge_summary_length_ > edge_summary_length_limit_) {
+                return;
+            }
+
             if (result.size() > edge_limit_) {
                 return;
             }
@@ -307,12 +313,13 @@ private:
         }
 
     public:
-        CoverageBoundedDFS(const Graph &graph, double coverage_bound,
-                           size_t edge_limit = 10000)
+        CoverageBoundedDFS(const Graph &graph, double coverage_bound, size_t edge_summary_limit,
+                           size_t edge_limit = 500)
                 : graph_(graph),
                   coverage_bound_(coverage_bound),
                   edge_limit_(edge_limit),
-                  edge_summary_length_(0) {
+                  edge_summary_length_(0),
+                  edge_summary_length_limit_(edge_summary_limit) {
         }
 
         std::set<EdgeId> Find(VertexId v) const {
@@ -337,11 +344,15 @@ private:
     CoverageBoundedDFS dfs_helper;
 
 public:
-    HighCoverageComponentFinder(const Graph &graph, double max_coverage)
-    : AbstractNeighbourhoodFinder<Graph>(graph), coverage_bound_(max_coverage), dfs_helper(graph, max_coverage) {
+    HighCoverageComponentFinder(const Graph &graph,
+                                double max_coverage,
+                                size_t edge_sum_limit = std::numeric_limits<size_t>::max())
+            : AbstractNeighbourhoodFinder<Graph>(graph),
+              coverage_bound_(max_coverage),
+              dfs_helper(graph, max_coverage, edge_sum_limit) {
     }
 
-    GraphComponent<Graph> Find(typename Graph::VertexId v) const {
+    GraphComponent<Graph> Find(VertexId v) const {
         std::set<EdgeId> result = dfs_helper.Find(v);
         return GraphComponent<Graph>::FromEdges(this->graph(), result, false);
     }
@@ -388,8 +399,8 @@ private:
 
     set<VertexId> FindBorder(const GraphComponent<Graph>& component) const {
         set<VertexId> result;
-        insert_all(result, component.entrances());
-        insert_all(result, component.exits());
+        utils::insert_all(result, component.entrances());
+        utils::insert_all(result, component.exits());
         return result;
     }
 
@@ -784,7 +795,7 @@ shared_ptr<GraphSplitter<Graph>> ReliableSplitter(const Graph &graph,
                             size_t edge_length_bound = ReliableNeighbourhoodFinder<Graph>::DEFAULT_EDGE_LENGTH_BOUND,
                             size_t max_size = ReliableNeighbourhoodFinder<Graph>::DEFAULT_MAX_SIZE) {
     typedef typename Graph::VertexId VertexId;
-    shared_ptr<RelaxingIterator<VertexId>> inner_iterator = make_shared<CollectionIterator<set<VertexId>>>(graph.begin(), graph.end());
+    shared_ptr<RelaxingIterator<VertexId>> inner_iterator = make_shared<CollectionIterator<typename Graph::VertexContainer>>(graph.begin(), graph.end());
     shared_ptr<AbstractNeighbourhoodFinder<Graph>> nf = make_shared<ReliableNeighbourhoodFinder<Graph>>(graph, edge_length_bound, max_size);
     return make_shared<NeighbourhoodFindingSplitter<Graph>>(graph,
             inner_iterator, nf);
@@ -795,7 +806,7 @@ shared_ptr<GraphSplitter<Graph>> ConnectedSplitter(const Graph &graph,
                             size_t edge_length_bound = 1000000,
                             size_t max_size = 1000000) {
     typedef typename Graph::VertexId VertexId;
-    shared_ptr<RelaxingIterator<VertexId>> inner_iterator = make_shared<CollectionIterator<set<VertexId>>>(graph.begin(), graph.end());
+    shared_ptr<RelaxingIterator<VertexId>> inner_iterator = make_shared<CollectionIterator<typename Graph::VertexContainer>>(graph.begin(), graph.end());
     shared_ptr<AbstractNeighbourhoodFinder<Graph>> nf = make_shared<ReliableNeighbourhoodFinder<Graph>>(graph, edge_length_bound, max_size);
     return make_shared<NeighbourhoodFindingSplitter<Graph>>(graph,
             inner_iterator, nf);
@@ -822,7 +833,7 @@ shared_ptr<GraphSplitter<Graph>> LongEdgesExclusiveSplitter(
                 ReliableNeighbourhoodFinder<Graph>::DEFAULT_EDGE_LENGTH_BOUND) {
     typedef typename Graph::VertexId VertexId;
     shared_ptr<RelaxingIterator<VertexId>> inner_iterator = make_shared<
-            CollectionIterator<set<VertexId>>>(graph.begin(), graph.end());
+            CollectionIterator<typename Graph::VertexContainer>>(graph.begin(), graph.end());
     shared_ptr<AbstractNeighbourhoodFinder<Graph>> nf = make_shared<
             ShortEdgeComponentFinder<Graph>>(graph, bound);
     return make_shared<NeighbourhoodFindingSplitter<Graph>>(graph,
diff --git a/src/common/utils/debruijn_graph/debruijn_graph_constructor.hpp b/src/common/assembly_graph/construction/debruijn_graph_constructor.hpp
similarity index 61%
rename from src/common/utils/debruijn_graph/debruijn_graph_constructor.hpp
rename to src/common/assembly_graph/construction/debruijn_graph_constructor.hpp
index 47aed1d..4f14047 100644
--- a/src/common/utils/debruijn_graph/debruijn_graph_constructor.hpp
+++ b/src/common/assembly_graph/construction/debruijn_graph_constructor.hpp
@@ -9,9 +9,10 @@
 #include "assembly_graph/core/graph.hpp"
 #include "assembly_graph/core/construction_helper.hpp"
 #include "utils/standard_base.hpp"
-#include "utils/indices/kmer_extension_index.hpp"
-#include "utils/openmp_wrapper.h"
-#include "utils/parallel_wrapper.hpp"
+#include "utils/extension_index/kmer_extension_index.hpp"
+#include "utils/parallel/openmp_wrapper.h"
+#include "utils/parallel/parallel_wrapper.hpp"
+#include <numeric>
 
 namespace debruijn_graph {
 
@@ -142,11 +143,11 @@ private:
     }
 
     void CalculateSequences(std::vector<KeyWithHash> &kwh_list,
-            std::vector<Sequence> &sequences) {
+                            std::vector<Sequence> &sequences) {
         size_t size = kwh_list.size();
         sequences.resize(size);
 
-#   pragma omp parallel for schedule(guided)
+#       pragma omp parallel for schedule(guided)
         for (size_t i = 0; i < size; ++i) {
             sequences[i] = ConstructSequenceWithEdge(kwh_list[i]);
         }
@@ -158,7 +159,7 @@ public:
     }
 
     void ConstructGraph(size_t queueMinSize, size_t queueMaxSize,
-            double queueGrowthRate) {
+                        double queueGrowthRate) {
         kmer_iterator it = origin_.kmer_begin();
         kmer_iterator end = origin_.kmer_end();
         size_t queueSize = queueMinSize;
@@ -179,114 +180,109 @@ private:
     DECL_LOGGER("DeBruijnGraphConstructor")
 };
 
-class UnbranchingPathFinder {
+class UnbranchingPathExtractor {
 private:
-    typedef DeBruijnExtensionIndex<> Index;
+    typedef utils::DeBruijnExtensionIndex<> Index;
     typedef RtSeq Kmer;
     typedef Index::kmer_iterator kmer_iterator;
-    typedef Index::KeyWithHash KeyWithHash;
     typedef Index::DeEdge DeEdge;
+    typedef Index::KeyWithHash KeyWithHash;
 
     Index &origin_;
     size_t kmer_size_;
 
-public:
-    UnbranchingPathFinder(Index &origin, size_t kmer_size) : origin_(origin), kmer_size_(kmer_size) {
+    bool IsJunction(KeyWithHash kwh) const {
+        return IsJunction(origin_.get_value(kwh));
+    }
+
+    bool IsJunction(utils::InOutMask mask) const {
+        return !mask.CheckUniqueOutgoing() || !mask.CheckUniqueIncoming();
+    }
+
+    void AddStartDeEdgesForVertex(KeyWithHash kh, utils::InOutMask mask,
+                                  std::vector<DeEdge>& start_edges) const {
+        for (char next = 0; next < 4; next++) {
+            if (!mask.CheckOutgoing(next))
+                continue;
+
+            start_edges.emplace_back(kh, origin_.GetOutgoing(kh, next));
+            TRACE("Added to queue " << start_edges.back() << " " << mask);
+        }
+    }
+
+    void AddStartDeEdges(KeyWithHash kh, std::vector<DeEdge>& start_edges) const {
+        start_edges.clear();
+        auto extensions = origin_.get_value(kh);
+        if (!IsJunction(extensions))
+            return;
+
+        AddStartDeEdgesForVertex(kh, extensions, start_edges);
+        KeyWithHash kh_inv = !kh;
+        if (!kh_inv.is_minimal()) {
+            AddStartDeEdgesForVertex(kh_inv, origin_.get_value(kh_inv),
+                                     start_edges);
+        }
     }
 
-    bool StepRightIfPossible(DeEdge &edge) {
-        if (origin_.CheckUniqueOutgoing(edge.end) && origin_.CheckUniqueIncoming(edge.end)) {
-            edge = DeEdge(edge.end, origin_.GetUniqueOutgoing(edge.end));
+    bool StepRightIfPossible(DeEdge &edge) const {
+        utils::InOutMask mask = origin_.get_value(edge.end);
+        if (mask.CheckUniqueOutgoing() && mask.CheckUniqueIncoming()) {
+            edge = DeEdge(edge.end,
+                          origin_.GetOutgoing(edge.end, mask.GetUniqueOutgoing()));
             return true;
         }
         return false;
     }
 
-    Sequence ConstructSeqGoingRight(DeEdge edge) {
-        SequenceBuilder s;
-        s.append(edge.start.key());
-        s.append(edge.end[kmer_size_ - 1]);
+    Sequence ConstructSequenceWithEdge(DeEdge edge, SequenceBuilder &builder) const {
+        builder.clear(); // We reuse the buffer to reduce malloc traffic
+        builder.append(edge.start.key());
+        builder.append(edge.end[kmer_size_ - 1]);
         DeEdge initial = edge;
         while (StepRightIfPossible(edge) && edge != initial) {
-            s.append(edge.end[kmer_size_ - 1]);
+            builder.append(edge.end[kmer_size_ - 1]);
         }
-        return s.BuildSequence();
-    }
-
-    Sequence ConstructSequenceWithEdge(DeEdge edge) {
-        return ConstructSeqGoingRight(edge);
+        return builder.BuildSequence();
     }
 
-    //Loop consists of 4 parts: 2 selfRC k+1-mers and two sequences of arbitrary length RC to each other; pos is a position of one of selfRC edges
-    vector<Sequence> SplitLoop(Sequence s, size_t pos) {
-        return {s.Subseq(pos, pos + kmer_size_ + 1), s.Subseq(pos + 1, s.size() - kmer_size_) + s.Subseq(0, pos + kmer_size_)};
+    // Loop consists of 4 parts: 2 selfRC k+1-mers and two sequences of arbitrary length RC to each other; pos is a position of one of selfRC edges
+    std::vector<Sequence> SplitLoop(const Sequence &s, size_t pos) const {
+        return { s.Subseq(pos, pos + kmer_size_ + 1),
+                 s.Subseq(pos + 1, s.size() - kmer_size_) + s.Subseq(0, pos + kmer_size_) };
 
     }
 
-//TODO Think about what happends to self rc perfect loops
-    vector<Sequence> ConstructLoopFromVertex(const KeyWithHash &kh) {
+//  TODO Think about what happends to self rc perfect loops
+    std::vector<Sequence> ConstructLoopFromVertex(const KeyWithHash &kh, SequenceBuilder &builder) const {
         DeEdge break_point(kh, origin_.GetUniqueOutgoing(kh));
-        Sequence s = ConstructSequenceWithEdge(break_point);
+        Sequence s = ConstructSequenceWithEdge(break_point, builder);
         Kmer kmer = s.start<Kmer>(kmer_size_ + 1) >> 'A';
-        for(size_t i = kmer_size_; i < s.size(); i++) {
+        for (size_t i = kmer_size_; i < s.size(); i++) {
             kmer = kmer << s[i];
-            if (kmer == !kmer) {
+            if (kmer == !kmer)
                 return SplitLoop(s, i - kmer_size_);
-            }
         }
         return {s};
     }
-};
-
-class UnbranchingPathExtractor {
-private:
-    typedef DeBruijnExtensionIndex<> Index;
-    typedef RtSeq Kmer;
-    typedef Index::kmer_iterator kmer_iterator;
-    typedef Index::DeEdge DeEdge;
-    typedef Index::KeyWithHash KeyWithHash;
-
-    Index &origin_;
-    size_t kmer_size_;
-
-    bool IsJunction(KeyWithHash kh) const {
-        return !(origin_.CheckUniqueOutgoing(kh) && origin_.CheckUniqueIncoming(kh));
-    }
 
-    void AddStartDeEdgesForVertex(KeyWithHash kh, std::vector<DeEdge>& start_edges) const {
-        for (char next = 0; next < 4; next++) {
-            if (origin_.CheckOutgoing(kh, next)) {
-                TRACE("Added to queue " << DeEdge(kh, origin_.GetOutgoing(kh, next)));
-                start_edges.push_back(DeEdge(kh, origin_.GetOutgoing(kh, next)));
-            }
-        }
-    }
+    void CalculateSequences(kmer_iterator &it,
+                            std::vector<Sequence> &sequences) const {
+        SequenceBuilder builder;
+        std::vector<DeEdge> start_edges;
+        start_edges.reserve(8);
 
-    void AddStartDeEdges(kmer_iterator &it, size_t queueSize,
-                  std::vector<DeEdge>& start_edges) const {
-        for (; start_edges.size() < queueSize && it.good(); ++it) {
+        for ( ; it.good(); ++it) {
             KeyWithHash kh = origin_.ConstructKWH(Kmer(kmer_size_, *it));
-            if (IsJunction(kh)) {
-                AddStartDeEdgesForVertex(kh, start_edges);
-                KeyWithHash kh_inv = !kh;
-                if(!(kh_inv.is_minimal())) {
-                    AddStartDeEdgesForVertex(kh_inv, start_edges);
-                }
-            }
-        }
-    }
+            AddStartDeEdges(kh, start_edges);
 
-    void CalculateSequences(std::vector<DeEdge> &edges,
-                            std::vector<Sequence> &sequences, UnbranchingPathFinder &finder) const {
-        size_t size = edges.size();
-        size_t start = sequences.size();
-        sequences.resize(start + size);
+            for (auto edge : start_edges) {
+                Sequence s = ConstructSequenceWithEdge(edge, builder);
+                if (s < !s)
+                    continue;
 
-#   pragma omp parallel for schedule(guided)
-        for (size_t i = 0; i < size; ++i) {
-            sequences[start + i] = finder.ConstructSequenceWithEdge(edges[i]);
-            TRACE("From " << edges[i] << " calculated sequence");
-            TRACE(sequences[start + i]);
+                sequences.push_back(s);
+                TRACE("From " << edge << " calculated sequence\n" << s);
+            }
         }
     }
 
@@ -294,35 +290,53 @@ private:
         Kmer kmer = sequence.start<Kmer>(kmer_size_);
         KeyWithHash kwh = origin_.ConstructKWH(kmer);
         origin_.IsolateVertex(kwh);
-        for(size_t pos = kmer_size_; pos < sequence.size(); pos++) {
+        for (size_t pos = kmer_size_; pos < sequence.size(); pos++) {
             kwh = kwh << sequence[pos];
             origin_.IsolateVertex(kwh);
         }
     }
 
     void CleanCondensed(const std::vector<Sequence> &sequences) {
-#   pragma omp parallel for schedule(guided)
+#       pragma omp parallel for schedule(guided)
         for (size_t i = 0; i < sequences.size(); ++i) {
             CleanCondensed(sequences[i]);
+            CleanCondensed(!sequences[i]);
         }
     }
 
-    //This methods collects all loops that were not extracted by finding unbranching paths because there are no junctions on loops.
-    //TODO make parallel
-    const std::vector<Sequence> CollectLoops() {
+    // This methods collects all loops that were not extracted by finding
+    // unbranching paths because there are no junctions on loops.
+    const std::vector<Sequence> CollectLoops(unsigned nchunks) {
         INFO("Collecting perfect loops");
-        UnbranchingPathFinder finder(origin_, kmer_size_);
+        auto its = origin_.kmer_begin(nchunks);
+        std::vector<std::vector<KeyWithHash> > starts(its.size());
+
+#       pragma omp parallel for schedule(guided)
+        for (size_t i = 0; i < its.size(); ++i) {
+            auto &it = its[i];
+            for (; it.good(); ++it) {
+                KeyWithHash kh = origin_.ConstructKWH(Kmer(kmer_size_, *it));
+                if (!IsJunction(kh))
+                    starts[i].push_back(kh);
+            }
+        }
+
         std::vector<Sequence> result;
-        for (kmer_iterator it = origin_.kmer_begin(); it.good(); ++it) {
-            KeyWithHash kh = origin_.ConstructKWH(Kmer(kmer_size_, *it));
-            if (!IsJunction(kh)) {
-                vector<Sequence> loop = finder.ConstructLoopFromVertex(kh);
-                for(Sequence s: loop) {
-                    result.push_back(s);
+        SequenceBuilder builder;
+        for (const auto& entry : starts) {
+            for (const auto& kwh : entry) {
+                if (IsJunction(kwh))
+                    continue;
+
+                for (Sequence s : ConstructLoopFromVertex(kwh, builder)) {
+                    Sequence s_rc = !s;
+                    if (s < s_rc)
+                        result.push_back(s_rc);
+                    else
+                        result.push_back(s);
+
                     CleanCondensed(s);
-                    if(s != (!s)) {
-                        result.push_back(!s);
-                    }
+                    CleanCondensed(s_rc);
                 }
             }
         }
@@ -331,38 +345,42 @@ private:
     }
 
 public:
-    UnbranchingPathExtractor(Index &origin, size_t k) : origin_(origin), kmer_size_(k) {
-    }
+    UnbranchingPathExtractor(Index &origin, size_t k)
+            : origin_(origin), kmer_size_(k) {}
 
     //TODO very large vector is returned. But I hate to make all those artificial changes that can fix it.
-    const std::vector<Sequence> ExtractUnbranchingPaths(size_t queueMinSize, size_t queueMaxSize,
-                                                        double queueGrowthRate) {
-        INFO("Extracting unbranching paths");
-        UnbranchingPathFinder finder(origin_, kmer_size_);
-        std::vector<Sequence> result;
-        size_t queueSize = queueMinSize;
-        std::vector<DeEdge> start_edges;
-        std::vector<Sequence> sequences;
-        start_edges.reserve(queueSize);
-        auto it = origin_.kmer_begin();
-        while (it.good()) {
-            AddStartDeEdges(it, queueSize, start_edges); // format a queue of junction kmers
-            CalculateSequences(start_edges, sequences, finder); // in parallel
-            start_edges.clear();
-            queueSize = min((size_t) ((double) queueSize * queueGrowthRate), queueMaxSize);
-        }
-        INFO("Extracting unbranching paths finished. " << sequences.size() << " sequences extracted");
-        return sequences;
-    }
+    const std::vector<Sequence> ExtractUnbranchingPaths(unsigned nchunks) const {
+        auto its = origin_.kmer_begin(nchunks);
 
-    const std::vector<Sequence> ExtractUnbranchingPathsAndLoops(size_t queueMinSize, size_t queueMaxSize,
-                                                                double queueGrowthRate) {
-        std::vector<Sequence> result = ExtractUnbranchingPaths(queueMinSize, queueMaxSize, queueGrowthRate);
+        INFO("Extracting unbranching paths");
+        std::vector<std::vector<Sequence>> sequences(its.size());
+#       pragma omp parallel for schedule(guided)
+        for (size_t i = 0; i < its.size(); ++i)
+            CalculateSequences(its[i], sequences[i]);
+
+        size_t snum = std::accumulate(sequences.begin(), sequences.end(),
+                                      0,
+                                      [](size_t val, const std::vector<Sequence> &s) {
+                                          return val + s.size();
+                                      });
+        sequences[0].reserve(snum);
+        for (size_t i = 1; i < sequences.size(); ++i) {
+            sequences[0].insert(sequences[0].end(),
+                                std::make_move_iterator(sequences[i].begin()), std::make_move_iterator(sequences[i].end()));
+            sequences[i].clear();
+            sequences[i].shrink_to_fit();
+        }
+
+        INFO("Extracting unbranching paths finished. " << sequences[0].size() << " sequences extracted");
+        return sequences[0];
+    }
+
+    const std::vector<Sequence> ExtractUnbranchingPathsAndLoops(unsigned nchunks) {
+        std::vector<Sequence> result = ExtractUnbranchingPaths(nchunks);
         CleanCondensed(result);
-        std::vector<Sequence> loops = CollectLoops();
-        for(auto it = loops.begin(); it != loops.end(); ++it) {
-            result.push_back(*it);
-        }
+        std::vector<Sequence> loops = CollectLoops(nchunks);
+        result.insert(result.end(),
+                      std::make_move_iterator(loops.begin()), std::make_move_iterator(loops.end()));
         return result;
     }
 
@@ -370,16 +388,13 @@ private:
     DECL_LOGGER("UnbranchingPathExtractor")
 };
 
-/*
- * Only works for Conjugate dbg
- */
 template<class Graph>
 class FastGraphFromSequencesConstructor {
 private:
     typedef typename Graph::EdgeId EdgeId;
     typedef typename Graph::VertexId VertexId;
     typedef RtSeq Kmer;
-    typedef DeBruijnExtensionIndex<> Index;
+    typedef utils::DeBruijnExtensionIndex<> Index;
     size_t kmer_size_;
     Index &origin_;
 
@@ -389,43 +404,27 @@ private:
         EdgeId edge_;
 
         size_t BitBool(bool flag) const {
-            if(flag)
+            if (flag)
                 return 1;
             return 0;
         }
 
     public:
-        size_t GetHash() const {
-            return hash_and_mask_ >> 2;
-        }
+        size_t GetHash() const { return hash_and_mask_ >> 2; }
+        bool IsRC() const { return hash_and_mask_ & 2; }
+        bool IsStart() const { return hash_and_mask_ & 1; }
+        EdgeId GetEdge() const { return edge_; }
+        bool IsInvalid() { return hash_and_mask_ + 1 == 0 && edge_ == EdgeId(0); }
 
-        bool IsRC() const {
-            return hash_and_mask_ & 2;
-        }
+        LinkRecord(size_t hash, EdgeId edge, bool is_start, bool is_rc)
+                : hash_and_mask_((hash << 2) | (BitBool(is_rc) << 1)| BitBool(is_start)), edge_(edge) { }
 
-        bool IsStart() const {
-            return hash_and_mask_ & 1;
-        }
+        LinkRecord()
+                : hash_and_mask_(-1ul), edge_(0) {}
 
 
-        EdgeId GetEdge() const {
-            return edge_;
-        }
-
-        LinkRecord(size_t hash, EdgeId edge, bool is_start, bool is_rc) :
-                hash_and_mask_((hash << 2) | (BitBool(is_rc) << 1)| BitBool(is_start)), edge_(edge) {
-        }
-
-        LinkRecord() :
-                hash_and_mask_(-1ul), edge_(0) {
-        }
-
-        bool IsInvalid() {
-            return hash_and_mask_ + 1 == 0 && edge_ == EdgeId(0);
-        }
-
         bool operator<(const LinkRecord &other) const {
-            if(this->hash_and_mask_ == other.hash_and_mask_)
+            if (this->hash_and_mask_ == other.hash_and_mask_)
                 return this->edge_ < other.edge_;
             return this->hash_and_mask_ < other.hash_and_mask_;
         }
@@ -434,7 +433,7 @@ private:
     LinkRecord StartLink(const EdgeId &edge, const Sequence &sequence) const {
         Kmer kmer(kmer_size_, sequence);
         Kmer kmer_rc = !kmer;
-        if(kmer < kmer_rc)
+        if (kmer < kmer_rc)
             return LinkRecord(origin_.ConstructKWH(kmer).idx(), edge, true, false);
         else
             return LinkRecord(origin_.ConstructKWH(kmer_rc).idx(), edge, true, true);
@@ -443,23 +442,23 @@ private:
     LinkRecord EndLink(const EdgeId &edge, const Sequence &sequence) const {
         Kmer kmer(kmer_size_, sequence, sequence.size() - kmer_size_);
         Kmer kmer_rc = !kmer;
-        if(kmer < kmer_rc)
+        if (kmer < kmer_rc)
             return LinkRecord(origin_.ConstructKWH(kmer).idx(), edge, false, false);
         else
             return LinkRecord(origin_.ConstructKWH(kmer_rc).idx(), edge, false, true);
     }
 
-    void CollectLinkRecords(typename Graph::HelperT &helper, const Graph &graph, vector<LinkRecord> &records, const vector<Sequence> &sequences) const {
+    void CollectLinkRecords(typename Graph::HelperT &helper, const Graph &graph, std::vector<LinkRecord> &records, const vector<Sequence> &sequences) const {
         size_t size = sequences.size();
         records.resize(size * 2, LinkRecord(0, EdgeId(0), false, false));
         restricted::IdSegmentStorage id_storage = helper.graph().GetGraphIdDistributor().Reserve(size * 2);
-#   pragma omp parallel for schedule(guided)
+#       pragma omp parallel for schedule(guided)
         for (size_t i = 0; i < size; ++i) {
             size_t j = i << 1;
             auto id_distributor = id_storage.GetSegmentIdDistributor(j, j + 2);//indices for two edges are required
             EdgeId edge = helper.AddEdge(DeBruijnEdgeData(sequences[i]), id_distributor);
             records[j] = StartLink(edge, sequences[i]);
-            if(graph.conjugate(edge) != edge)
+            if (graph.conjugate(edge) != edge)
                 records[j + 1] = EndLink(edge, sequences[i]);
             else
                 records[j + 1] = LinkRecord();
@@ -468,19 +467,18 @@ private:
 
     void LinkEdge(typename Graph::HelperT &helper, const Graph &graph, const VertexId v, const EdgeId edge, const bool is_start, const bool is_rc) const {
         VertexId v1 = v;
-        if(is_rc) {
+        if (is_rc)
             v1 = graph.conjugate(v);
-        }
-        if(is_start) {
+
+        if (is_start)
             helper.LinkOutgoingEdge(v1, edge);
-        } else {
+        else
             helper.LinkIncomingEdge(v1, edge);
-        }
     }
 
 public:
-    FastGraphFromSequencesConstructor(size_t k, Index &origin) : kmer_size_(k), origin_(origin) {
-    }
+    FastGraphFromSequencesConstructor(size_t k, Index &origin)
+            : kmer_size_(k), origin_(origin) {}
 
     void ConstructGraph(Graph &graph, const vector<Sequence> &sequences) const {
         typename Graph::HelperT helper = graph.GetConstructionHelper();
@@ -490,21 +488,22 @@ public:
         size_t size = records.size();
         vector<vector<VertexId>> vertices_list(omp_get_max_threads());
         restricted::IdSegmentStorage id_storage = helper.graph().GetGraphIdDistributor().Reserve(size * 2);
-#   pragma omp parallel for schedule(guided)
-        for(size_t i = 0; i < size; i++) {
-            if(i != 0 && records[i].GetHash() == records[i - 1].GetHash()) {
+#       pragma omp parallel for schedule(guided)
+        for (size_t i = 0; i < size; i++) {
+            if (i != 0 && records[i].GetHash() == records[i - 1].GetHash())
                 continue;
-            }
-            if(records[i].IsInvalid())
+            if (records[i].IsInvalid())
                 continue;
+
             auto id_distributor = id_storage.GetSegmentIdDistributor(i << 1, (i << 1) + 2);
             VertexId v = helper.CreateVertex(DeBruijnVertexData(), id_distributor);
             vertices_list[omp_get_thread_num()].push_back(v);
-            for(size_t j = i; j < size && records[j].GetHash() == records[i].GetHash(); j++) {
+            for (size_t j = i; j < size && records[j].GetHash() == records[i].GetHash(); j++) {
                 LinkEdge(helper, graph, v, records[j].GetEdge(), records[j].IsStart(), records[j].IsRC());
             }
         }
-        for(size_t i = 0; i < vertices_list.size(); i++)
+
+        for (size_t i = 0; i < vertices_list.size(); i++)
             helper.AddVerticesToGraph(vertices_list[i].begin(), vertices_list[i].end());
     }
 };
@@ -516,7 +515,7 @@ template<class Graph>
 class DeBruijnGraphExtentionConstructor {
 private:
     typedef typename Graph::EdgeId EdgeId;
-    typedef DeBruijnExtensionIndex<> DeBruijn;
+    typedef utils::DeBruijnExtensionIndex<> DeBruijn;
     typedef typename Graph::VertexId VertexId;
     typedef RtSeq Kmer;
 
@@ -524,30 +523,18 @@ private:
     DeBruijn &origin_;
     size_t kmer_size_;
 
-    void FilterRC(std::vector<Sequence> &edge_sequences) {
-        size_t size = 0;
-        for(size_t i = 0; i < edge_sequences.size(); i++) {
-            if(!(edge_sequences[i] < !edge_sequences[i])) {
-                edge_sequences[size] = edge_sequences[i];
-                size++;
-            }
-        }
-        edge_sequences.resize(size);
-    }
-
 public:
     DeBruijnGraphExtentionConstructor(Graph& graph, DeBruijn &origin) :
             graph_(graph), origin_(origin), kmer_size_(graph.k()) {
     }
 
-    void ConstructGraph(size_t queueMinSize, size_t queueMaxSize,
-            double queueGrowthRate, bool keep_perfect_loops) {
+    void ConstructGraph(bool keep_perfect_loops) {
         std::vector<Sequence> edge_sequences;
-        if(keep_perfect_loops)
-            edge_sequences = UnbranchingPathExtractor(origin_, kmer_size_).ExtractUnbranchingPathsAndLoops(queueMinSize, queueMaxSize, queueGrowthRate);
+        unsigned nchunks = 16 * omp_get_max_threads();
+        if (keep_perfect_loops)
+            edge_sequences = UnbranchingPathExtractor(origin_, kmer_size_).ExtractUnbranchingPathsAndLoops(nchunks);
         else
-            edge_sequences = UnbranchingPathExtractor(origin_, kmer_size_).ExtractUnbranchingPaths(queueMinSize, queueMaxSize, queueGrowthRate);
-        FilterRC(edge_sequences);
+            edge_sequences = UnbranchingPathExtractor(origin_, kmer_size_).ExtractUnbranchingPaths(nchunks);
         FastGraphFromSequencesConstructor<Graph>(kmer_size_, origin_).ConstructGraph(graph_, edge_sequences);
     }
 
diff --git a/src/common/utils/debruijn_graph/early_simplification.hpp b/src/common/assembly_graph/construction/early_simplification.hpp
similarity index 97%
rename from src/common/utils/debruijn_graph/early_simplification.hpp
rename to src/common/assembly_graph/construction/early_simplification.hpp
index d85649f..36fa329 100644
--- a/src/common/utils/debruijn_graph/early_simplification.hpp
+++ b/src/common/assembly_graph/construction/early_simplification.hpp
@@ -7,14 +7,14 @@
 
 #pragma once
 #include "utils/standard_base.hpp"
-#include "utils/indices/perfect_hash_map.hpp"
-#include "utils/mph_index/kmer_index.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
+#include "utils/kmer_mph/kmer_index.hpp"
 
 namespace debruijn_graph {
 
 class LinkCleaner {
 private:
-    typedef DeBruijnExtensionIndex<> Index;
+    typedef utils::DeBruijnExtensionIndex<> Index;
     typedef Index::KMer Kmer;
     typedef Index::KeyWithHash KeyWithHash;
     Index &index_;
@@ -61,7 +61,7 @@ public:
 
 class AlternativeEarlyTipClipper {
 private:
-    typedef DeBruijnExtensionIndex<> Index;
+    typedef utils::DeBruijnExtensionIndex<> Index;
     typedef Index::KMer Kmer;
     typedef Index::KeyWithHash KeyWithHash;
     Index &index_;
diff --git a/src/common/assembly_graph/core/graph_core.hpp b/src/common/assembly_graph/core/graph_core.hpp
index 71dd589..42f4e6a 100644
--- a/src/common/assembly_graph/core/graph_core.hpp
+++ b/src/common/assembly_graph/core/graph_core.hpp
@@ -11,12 +11,15 @@
 #include "utils/verify.hpp"
 #include "utils/logger/logger.hpp"
 #include "order_and_law.hpp"
+#include "utils/stl_utils.hpp"
+
+#include "adt/small_pod_vector.hpp"
+
 #include <boost/iterator/iterator_facade.hpp>
-#include "utils/simple_tools.hpp"
+#include <btree/safe_btree_set.h>
 
 namespace omnigraph {
 
-using std::vector;
 template<class DataMaster>
 class GraphCore;
 
@@ -92,7 +95,7 @@ private:
     typedef typename DataMaster::VertexData VertexData;
     typedef restricted::pure_pointer<PairedEdge<DataMaster>> EdgeId;
     typedef restricted::pure_pointer<PairedVertex<DataMaster>> VertexId;
-    typedef typename std::vector<EdgeId>::const_iterator edge_raw_iterator;
+    typedef typename adt::SmallPODVector<EdgeId>::const_iterator edge_raw_iterator;
 
     class conjugate_iterator : public boost::iterator_facade<conjugate_iterator,
             EdgeId, boost::forward_traversal_tag, EdgeId> {
@@ -137,7 +140,7 @@ private:
     friend class PairedElementManipulationHelper<VertexId>;
     friend class conjugate_iterator;
 
-    std::vector<EdgeId> outgoing_edges_;
+    adt::SmallPODVector<EdgeId> outgoing_edges_;
 
     VertexId conjugate_;
 
@@ -196,7 +199,7 @@ private:
     }
 
     const std::vector<EdgeId> OutgoingEdgesTo(VertexId v) const {
-        vector<EdgeId> result;
+        std::vector<EdgeId> result;
         for (auto it = outgoing_edges_.begin(); it != outgoing_edges_.end(); ++it) {
             if ((*it)->end() == v) {
                 result.push_back(*it);
@@ -232,13 +235,14 @@ public:
     typedef typename DataMasterT::EdgeData EdgeData;
     typedef restricted::pure_pointer<PairedEdge<DataMaster>> EdgeId;
     typedef restricted::pure_pointer<PairedVertex<DataMaster>> VertexId;
-    typedef typename std::set<VertexId>::const_iterator VertexIt;
+    typedef btree::safe_btree_set<VertexId> VertexContainer;
+    typedef typename VertexContainer::const_iterator VertexIt;
     typedef typename PairedVertex<DataMaster>::edge_const_iterator edge_const_iterator;
 
 private:
    restricted::LocalIdDistributor id_distributor_;
    DataMaster master_;
-   std::set<VertexId> vertices_;
+   VertexContainer vertices_;
 
    friend class ConstructionHelper<DataMaster>;
 public:
@@ -506,20 +510,20 @@ public:
         return edge->conjugate();
     }
 
-    size_t length(const EdgeId edge) const {
+    size_t length(EdgeId edge) const {
         return master_.length(data(edge));
     }
 
-    size_t length(const VertexId v) const {
+    size_t length(VertexId v) const {
         return master_.length(data(v));
     }
 
     //////////////////////shortcut methods
 
     std::vector<EdgeId> IncidentEdges(VertexId v) const {
-        vector<EdgeId> answer;
-        push_back_all(answer, IncomingEdges(v));
-        push_back_all(answer, OutgoingEdges(v));
+        std::vector<EdgeId> answer;
+        utils::push_back_all(answer, IncomingEdges(v));
+        utils::push_back_all(answer, OutgoingEdges(v));
         return answer;
     }
 
@@ -574,7 +578,7 @@ public:
 
     std::string str(const VertexId v) const {
 //      return master_.str(data(v));
-        return ToString(int_id(v));
+        return std::to_string(int_id(v));
     }
 
     std::string detailed_str(const VertexId v) const {
diff --git a/src/common/assembly_graph/core/graph_iterators.hpp b/src/common/assembly_graph/core/graph_iterators.hpp
index 4edb985..96997af 100644
--- a/src/common/assembly_graph/core/graph_iterators.hpp
+++ b/src/common/assembly_graph/core/graph_iterators.hpp
@@ -7,10 +7,10 @@
 
 #pragma once
 
-#include "common/adt/queue_iterator.hpp"
+#include "adt/queue_iterator.hpp"
 #include "func/pred.hpp"
 #include "action_handlers.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include <boost/iterator/iterator_facade.hpp>
 
 namespace omnigraph {
@@ -23,7 +23,8 @@ namespace omnigraph {
 template<class Graph, typename ElementId, typename Comparator = std::less<ElementId>>
 class SmartIterator : public GraphActionHandler<Graph> {
     typedef GraphActionHandler<Graph> base;
-    DynamicQueueIterator<ElementId, Comparator> inner_it_;
+    typedef adt::DynamicQueueIterator<ElementId, Comparator> DynamicQueueIterator;
+    DynamicQueueIterator inner_it_;
     bool add_new_;
     bool canonical_only_;
     //todo think of checking it in HandleAdd
@@ -120,7 +121,7 @@ public:
                      const Comparator& comparator = Comparator(),
                      bool canonical_only = false,
                      func::TypedPredicate<ElementId> add_condition = func::AlwaysTrue<ElementId>())
-            : base(g, "SmartSet " + ToString(this), add_new, comparator, canonical_only, add_condition) {
+            : base(g, "SmartSet", add_new, comparator, canonical_only, add_condition) {
     }
 
     template<class Iterator>
@@ -169,7 +170,7 @@ class SmartVertexIterator : public SmartIterator<Graph,
     SmartVertexIterator(const Graph &g, const Comparator& comparator =
                         Comparator(), bool canonical_only = false)
             : SmartIterator<Graph, VertexId, Comparator>(
-                g, "SmartVertexIterator " + ToString(get_id()), true,
+                g, "SmartVertexIterator " + std::to_string(get_id()), true,
                 comparator, canonical_only) {
         this->insert(g.begin(), g.end());
     }
@@ -297,7 +298,7 @@ class SmartEdgeIterator : public SmartIterator<Graph, typename Graph::EdgeId, Co
     SmartEdgeIterator(const Graph &g, Comparator comparator = Comparator(),
                       bool canonical_only = false)
             : SmartIterator<Graph, EdgeId, Comparator>(
-                g, "SmartEdgeIterator " + ToString(get_id()), true,
+                g, "SmartEdgeIterator " + std::to_string(get_id()), true,
                 comparator, canonical_only) {
         this->insert(EdgeIt(g, g.begin()), EdgeIt(g, g.end()));
 
diff --git a/src/common/assembly_graph/core/order_and_law.hpp b/src/common/assembly_graph/core/order_and_law.hpp
index 1f0373c..f21c97b 100644
--- a/src/common/assembly_graph/core/order_and_law.hpp
+++ b/src/common/assembly_graph/core/order_and_law.hpp
@@ -15,7 +15,7 @@
 #include "utils/stacktrace.hpp"
 #include <algorithm>
 #include <map>
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 #include "folly/PackedSyncPtr.h"
 
 
diff --git a/src/common/assembly_graph/dijkstra/dijkstra_algorithm.hpp b/src/common/assembly_graph/dijkstra/dijkstra_algorithm.hpp
index 536e4ed..b19b435 100644
--- a/src/common/assembly_graph/dijkstra/dijkstra_algorithm.hpp
+++ b/src/common/assembly_graph/dijkstra/dijkstra_algorithm.hpp
@@ -6,7 +6,7 @@
 //***************************************************************************
 #pragma once
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "dijkstra_settings.hpp"
 
 #include <queue>
@@ -203,8 +203,8 @@ public:
             return path;
 
         VertexId curr_vertex = vertex;
-        VertexId prev_vertex = get(prev_vert_map_, vertex).first;
-        EdgeId edge = get(prev_vert_map_, curr_vertex).second;
+        VertexId prev_vertex = utils::get(prev_vert_map_, vertex).first;
+        EdgeId edge = utils::get(prev_vert_map_, curr_vertex).second;
 
         while (prev_vertex != VertexId(0)) {
             if (graph_.EdgeStart(edge) == prev_vertex)
@@ -212,7 +212,7 @@ public:
             else
                 path.push_back(edge);
             curr_vertex = prev_vertex;
-            const auto& prev_v_e = get(prev_vert_map_, curr_vertex);
+            const auto& prev_v_e = utils::get(prev_vert_map_, curr_vertex);
             prev_vertex = prev_v_e.first;
             edge = prev_v_e.second;
         }
diff --git a/src/common/assembly_graph/graph_support/contig_output.hpp b/src/common/assembly_graph/graph_support/contig_output.hpp
index a0daf1f..b8de48d 100644
--- a/src/common/assembly_graph/graph_support/contig_output.hpp
+++ b/src/common/assembly_graph/graph_support/contig_output.hpp
@@ -13,590 +13,20 @@
 #include "assembly_graph/stats/statistics.hpp"
 #include "assembly_graph/paths/path_finders.hpp"
 #include "assembly_graph/paths/path_utils.hpp"
+#include "assembly_graph/paths/bidirectional_path_io/io_support.hpp"
 
 namespace debruijn_graph {
 
-//This class corrects mismatches or masks repeat differences or other such things with the sequence of an edge
-template<class Graph>
-class ContigCorrector {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-    const Graph &graph_;
-protected:
-    const Graph &graph() const {
-        return graph_;
-    }
-
-public:
-    ContigCorrector(const Graph &graph) : graph_(graph) {
-    }
-
-    virtual string correct(EdgeId e) = 0;
-
-    virtual ~ContigCorrector() {
-    }
-};
-
-template<class Graph>
-class DefaultContigCorrector : public ContigCorrector<Graph> {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-public:
-    DefaultContigCorrector(const Graph &graph) : ContigCorrector<Graph>(graph) {
-    }
-
-    string correct(EdgeId e) {
-        return this->graph().EdgeNucls(e).str();
-    }
-};
-
-
-class GFASegmentWriter {
-private:
-    std::ostream &ostream_;
-
-
-public:
-
-    GFASegmentWriter(std::ostream &stream) : ostream_(stream)  {
-    }
-
-    void Write(size_t edge_id, const Sequence &seq, double cov) {
-        ostream_ << "S\t" << edge_id << "\t";
-        ostream_ << seq.str() << "\t";
-        ostream_ << "KC:i:" << int(cov) << std::endl;
-    }
-};
-
-class GFALinkWriter {
-private:
-    std::ostream &ostream_;
-    size_t overlap_size_;
-
-public:
-
-    GFALinkWriter(std::ostream &stream, size_t overlap_size) : ostream_(stream), overlap_size_(overlap_size)  {
-    }
-
-    void Write(size_t first_segment, std::string &first_orientation, size_t second_segment, std::string &second_orientation) {
-        ostream_ << "L\t" << first_segment << "\t" << first_orientation << "\t" ;
-        ostream_ << second_segment << "\t" << second_orientation << "\t" << overlap_size_ << "M";
-        ostream_ << std::endl;
-
-    }
-};
-
-
-struct PathSegmentSequence {
-    size_t path_id_;
-    size_t segment_number_;
-    std::vector<std::string> segment_sequence_;
-    PathSegmentSequence(size_t path_id, std::vector<std::string> &segment_sequence)
-    : path_id_(path_id), segment_number_(1), segment_sequence_(segment_sequence) {
-    }
-
-    PathSegmentSequence()
-    : path_id_(0), segment_number_(1), segment_sequence_(){
-    }
-    void Reset() {
-        segment_sequence_.clear();
-    }
-};
-
-class GFAPathWriter {
-private:
-    std::ostream &ostream_;
-
-public:
-
-    GFAPathWriter(std::ostream &stream)
-    : ostream_(stream)  {
-    }
-
-    void Write(const PathSegmentSequence &path_segment_sequence) {
-        ostream_ << "P" << "\t" ;
-        ostream_ << path_segment_sequence.path_id_ << "_" << path_segment_sequence.segment_number_ << "\t";
-        std::string delimeter = "";
-        for (size_t i = 0; i < path_segment_sequence.segment_sequence_.size() - 1; ++i) {
-            ostream_ << delimeter << path_segment_sequence.segment_sequence_[i];
-            delimeter = ",";
-        }
-        ostream_ << "\t";
-        std::string delimeter2 = "";
-        for (size_t i = 0; i < path_segment_sequence.segment_sequence_.size() - 1; ++i) {
-                ostream_ << delimeter2 << "*";
-                delimeter2 = ",";
-        }
-        ostream_ << std::endl;
-    }
-
-};
-
-template<class Graph>
-class GFAWriter {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-    const Graph &graph_;
-    const path_extend::PathContainer &paths_;
-    const string filename_;
-    std::set<EdgeId> set_of_authentic_edges_;
-
-    bool IsCanonical(EdgeId e) const {
-        if (e <= graph_.conjugate(e)) {
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    std::string GetOrientation(EdgeId e) const {
-        return IsCanonical(e) ? "+" : "-";
-    }
-
-    void WriteSegments(std::ofstream &stream) {
-        GFASegmentWriter segment_writer(stream);
-        for (auto it = graph_.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            segment_writer.Write((*it).int_id(), graph_.EdgeNucls(*it), graph_.coverage(*it) * double(graph_.length(*it)));
-        }
-    }
-
-    void WriteLinks(std::ofstream &stream) {
-        GFALinkWriter link_writer(stream, graph_.k());
-        for (auto it = graph_.SmartVertexBegin(); !it.IsEnd(); ++it) {
-            for (auto inc_edge : graph_.IncomingEdges(*it)) {
-                std::string orientation_first = GetOrientation(inc_edge);
-                size_t segment_first = IsCanonical(inc_edge) ? inc_edge.int_id() : graph_.conjugate(inc_edge).int_id();
-                for (auto out_edge : graph_.OutgoingEdges(*it)) {
-                    size_t segment_second = IsCanonical(out_edge) ? out_edge.int_id() : graph_.conjugate(out_edge).int_id();
-                    std::string orientation_second = GetOrientation(out_edge);
-                    link_writer.Write(segment_first, orientation_first, segment_second, orientation_second);
-                }
-            }
-        }
-    }
-
-    void UpdateSegmentedPath(PathSegmentSequence &segmented_path, EdgeId e) {
-        std::string segment_id = IsCanonical(e) ? ToString(e.int_id()) : ToString(graph_.conjugate(e).int_id());
-        std::string orientation = GetOrientation(e);
-        segmented_path.segment_sequence_.push_back(segment_id + orientation);
-    }
-
-    void WritePaths(std::ofstream &stream) {
-        GFAPathWriter path_writer(stream);
-        for (const auto &path_pair : paths_) {
-            const path_extend::BidirectionalPath &p = (*path_pair.first);
-            if (p.Size() == 0) {
-                continue;
-            }
-            PathSegmentSequence segmented_path;
-            segmented_path.path_id_ = p.GetId();
-            for (size_t i = 0; i < p.Size() - 1; ++i) {
-                EdgeId e = p[i];
-                UpdateSegmentedPath(segmented_path, e);
-                if (graph_.EdgeEnd(e) != graph_.EdgeStart(p[i+1])) {
-                    path_writer.Write(segmented_path);
-                    segmented_path.segment_number_++;
-                    segmented_path.Reset();
-                }
-            }
-            UpdateSegmentedPath(segmented_path, p.Back());
-            path_writer.Write(segmented_path);
-
-        }
-    }
-
-public:
-    GFAWriter(const Graph &graph, const path_extend::PathContainer &paths, const string &filename)
-    : graph_(graph), paths_(paths), filename_(filename) {
-    }
-
-    void Write() {
-        std::ofstream stream;
-        stream.open(filename_);
-        WriteSegments(stream);
-        WriteLinks(stream);
-        WritePaths(stream);
-    }
-};
-
-//This class uses corrected sequences to construct contig (just return as is, find unipath, trim contig)
-template<class Graph>
-class ContigConstructor {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-    const Graph &graph_;
-    ContigCorrector<Graph> &corrector_;
-protected:
-    string correct(EdgeId e) {
-        return corrector_.correct(e);
-    }
-
-    const Graph &graph() const {
-        return graph_;
-    }
-
-public:
-
-    ContigConstructor(const Graph &graph, ContigCorrector<Graph> &corrector) : graph_(graph), corrector_(corrector) {
-    }
-
-    virtual pair<string, double> construct(EdgeId e) = 0;
-
-    virtual ~ContigConstructor(){
-    }
-};
-
-template<class Graph>
-class DefaultContigConstructor : public ContigConstructor<Graph> {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-public:
-
-    DefaultContigConstructor(const Graph &graph, ContigCorrector<Graph> &corrector) : ContigConstructor<Graph>(graph, corrector) {
-    }
-
-    pair<string, double> construct(EdgeId e) {
-        return make_pair(this->correct(e), this->graph().coverage(e));
-    }
-};
-
-template<class Graph>
-vector<typename Graph::EdgeId> Unipath(const Graph& g, typename Graph::EdgeId e) {
-    omnigraph::UniquePathFinder<Graph> unipath_finder(g);
-    vector<typename Graph::EdgeId> answer = unipath_finder.UniquePathBackward(e);
-    const vector<typename Graph::EdgeId>& forward = unipath_finder.UniquePathForward(e);
-    for (size_t i = 1; i < forward.size(); ++i) {
-        answer.push_back(forward[i]);
-    }
-    return answer;
-}
-
-template<class Graph>
-class UnipathConstructor : public ContigConstructor<Graph> {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-
-
-
-    string MergeOverlappingSequences(std::vector<string>& ss, size_t overlap) {
-        if (ss.empty()) {
-            return "";
-        }
-        stringstream result;
-        result << ss.front().substr(0, overlap);
-//        prev_end = ss.front().substr(0, overlap);
-        for (auto it = ss.begin(); it != ss.end(); ++it) {
-//            VERIFY(prev_end == it->substr(0, overlap));
-            result << it->substr(overlap);
-//            prev_end = it->substr(it->size() - overlap);
-        }
-        return result.str();
-    }
-
-
-    string MergeSequences(const Graph& g,
-            const vector<typename Graph::EdgeId>& continuous_path) {
-        vector<string> path_sequences;
-        for (size_t i = 0; i < continuous_path.size(); ++i) {
-            if(i > 0)
-                VERIFY(
-                    g.EdgeEnd(continuous_path[i - 1])
-                            == g.EdgeStart(continuous_path[i]));
-            path_sequences.push_back(this->correct(continuous_path[i]));
-        }
-        return MergeOverlappingSequences(path_sequences, g.k());
-    }
-
-public:
-
-    UnipathConstructor(const Graph &graph, ContigCorrector<Graph> &corrector) : ContigConstructor<Graph>(graph, corrector) {
-    }
-
-    pair<string, double> construct(EdgeId e) {
-        vector<EdgeId> unipath = Unipath(this->graph(), e);
-        return make_pair(MergeSequences(this->graph(), unipath), stats::AvgCoverage(this->graph(), unipath));
-    }
-};
-
-template<class Graph>
-class CuttingContigConstructor : public ContigConstructor<Graph> {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-
-    bool ShouldCut(VertexId v) const {
-        const Graph &g = this->graph();
-        vector<EdgeId> edges;
-        push_back_all(edges, g.OutgoingEdges(v));
-        if(edges.size() == 0)
-            return false;
-        for(size_t i = 1; i < edges.size(); i++) {
-            if(g.EdgeNucls(edges[i])[g.k()] != g.EdgeNucls(edges[0])[g.k()])
-                return false;
-        }
-        edges.clear();
-        push_back_all(edges, g.IncomingEdges(v));
-        for(size_t i = 0; i < edges.size(); i++)
-            for(size_t j = i + 1; j < edges.size(); j++) {
-                if(g.EdgeNucls(edges[i])[g.length(edges[i]) - 1] != g.EdgeNucls(edges[j])[g.length(edges[j]) - 1])
-                    return true;
-            }
-        return false;
-    }
-
-public:
-
-    CuttingContigConstructor(const Graph &graph, ContigCorrector<Graph> &corrector) : ContigConstructor<Graph>(graph, corrector) {
-    }
-
-    pair<string, double> construct(EdgeId e) {
-        string result = this->correct(e);
-        if(result.size() > this->graph().k() && ShouldCut(this->graph().EdgeEnd(e))) {
-            result = result.substr(0, result.size() - this->graph().k());
-        }
-        if(result.size() > this->graph().k() && ShouldCut(this->graph().conjugate(this->graph().EdgeStart(e)))) {
-            result = result.substr(this->graph().k(), result.size());
-        }
-        return make_pair(result, this->graph().coverage(e));
-    }
-};
-
-struct ExtendedContigIdT {
-    string full_id_;
-    string short_id_;
-
-    ExtendedContigIdT(): full_id_(""), short_id_("") {}
-
-    ExtendedContigIdT(string full_id, string short_id): full_id_(full_id), short_id_(short_id) {}
-};
-
-template <class Graph>
-void MakeContigIdMap(const Graph& graph, map<EdgeId, ExtendedContigIdT>& ids, const ConnectedComponentCounter &cc_counter_, string prefix) {
-    int counter = 0;
-    for (auto it = graph.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-        EdgeId e = *it;
-        if (ids.count(e) == 0) {
-            string id;
-            if (cfg::get().pd) {
-                size_t c_id = cc_counter_.GetComponent(e);
-                id = io::MakeContigComponentId(++counter, graph.length(e) + graph.k(), graph.coverage(e), c_id, prefix);
-            }
-            else
-                id = io::MakeContigId(++counter, graph.length(e) + graph.k(), graph.coverage(e), prefix);
-            ids[e] = ExtendedContigIdT(id, ToString(counter) + "+");
-            if (e != graph.conjugate(e))
-                ids[graph.conjugate(e)] =  ExtendedContigIdT(id + "'", ToString(counter) + "-");
-        }
-    }
-}
-
-template<class Graph>
-class ContigPrinter {
-private:
-    const Graph &graph_;
-    ContigConstructor<Graph> &constructor_;
-    template<class sequence_stream>
-    void ReportEdge(sequence_stream& oss
-            , const pair<string, double> sequence_data) {
-        oss << sequence_data.second;
-        oss << sequence_data.first;
-    }
-
-    void ReportEdge(io::osequencestream_for_fastg& oss,
-            const string& sequence,
-            const string& id,
-            const set<string>& nex_ids) {
-        oss.set_header(id);
-        oss << nex_ids;
-        oss << sequence;
-    }
-
-public:
-    ContigPrinter(const Graph &graph, ContigConstructor<Graph> &constructor) : graph_(graph), constructor_(constructor) {
-    }
-
-    template<class sequence_stream>
-    void PrintContigs(sequence_stream &os) {
-        for (auto it = graph_.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            ReportEdge<sequence_stream>(os, constructor_.construct(*it));
-        }
-    }
-
-    template<class sequence_stream>
-    void PrintContigsFASTG(sequence_stream &os, const ConnectedComponentCounter & cc_counter) {
-        map<EdgeId, ExtendedContigIdT> ids;
-        MakeContigIdMap(graph_, ids, cc_counter, "EDGE");
-        for (auto it = graph_.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            set<string> next;
-            VertexId v = graph_.EdgeEnd(*it);
-            auto edges = graph_.OutgoingEdges(v);
-            for (auto next_it = edges.begin(); next_it != edges.end(); ++next_it) {
-                next.insert(ids[*next_it].full_id_);
-            }
-            ReportEdge(os, constructor_.construct(*it).first, ids[*it].full_id_, next);
-            if (*it != graph_.conjugate(*it))
-            {
-                set<string> next_conj;
-                v = graph_.EdgeEnd(graph_.conjugate(*it));
-                edges = graph_.OutgoingEdges(v);
-                for (auto next_it = edges.begin(); next_it != edges.end(); ++next_it) {
-                    next_conj.insert(ids[*next_it].full_id_);
-                }
-                ReportEdge(os, constructor_.construct(graph_.conjugate(*it)).first, ids[graph_.conjugate(*it)].full_id_, next_conj);               
-            }
-        }
-    }
-};
-
-template<class Graph>
-bool PossibleECSimpleCheck(const Graph& g
-        , typename Graph::EdgeId e) {
-    return g.OutgoingEdgeCount(g.EdgeStart(e)) > 1 && g.IncomingEdgeCount(g.EdgeEnd(e)) > 1;
-}
-
-template<class Graph>
-void ReportEdge(io::osequencestream_cov& oss
-        , const Graph& g
-        , typename Graph::EdgeId e
-        , bool output_unipath = false
-        , size_t solid_edge_length_bound = 0) {
-    typedef typename Graph::EdgeId EdgeId;
-    if (!output_unipath || (PossibleECSimpleCheck(g, e) && g.length(e) <= solid_edge_length_bound)) {
-        TRACE("Outputting edge " << g.str(e) << " as single edge");
-        oss << g.coverage(e);
-        oss << g.EdgeNucls(e);
-    } else {
-        TRACE("Outputting edge " << g.str(e) << " as part of unipath");
-        vector<EdgeId> unipath = Unipath(g, e);
-        TRACE("Unipath is " << g.str(unipath));
-        oss << stats::AvgCoverage(g, unipath);
-        TRACE("Merged sequence is of length " << MergeSequences(g, unipath).size());
-        oss << MergeSequences(g, unipath);
-    }
-}
-
-inline void OutputContigs(ConjugateDeBruijnGraph &g, const string &contigs_output_filename, bool output_unipath) {
+inline void OutputEdgeSequences(const Graph &g,
+                                const string &contigs_output_filename) {
     INFO("Outputting contigs to " << contigs_output_filename << ".fasta");
-    DefaultContigCorrector<ConjugateDeBruijnGraph> corrector(g);
     io::osequencestream_cov oss(contigs_output_filename + ".fasta");
 
-    if(!output_unipath) {
-        DefaultContigConstructor<ConjugateDeBruijnGraph> constructor(g, corrector);
-
-        ContigPrinter<ConjugateDeBruijnGraph>(g, constructor).PrintContigs(oss);
-    } else {
-        UnipathConstructor<ConjugateDeBruijnGraph> constructor(g, corrector);
-        ContigPrinter<ConjugateDeBruijnGraph>(g, constructor).PrintContigs(oss);
-    }
-
-//    {
-//        osequencestream_cov oss(contigs_output_filename);
-//        set<ConjugateDeBruijnGraph::EdgeId> edges;
-//        for (auto it = g.SmartEdgeBegin(); !it.IsEnd(); ++it) {
-//            if (edges.count(*it) == 0) {
-//                ReportEdge(oss, g, *it, output_unipath, solid_edge_length_bound + ".oppa.fasta");
-//                edges.insert(g.conjugate(*it));
-//            }
-//            //        oss << g.EdgeNucls(*it);
-//        }
-//        DEBUG("Contigs written");
-//    }
-//    if(!output_unipath) {
-//        OutputContigs(g, contigs_output_filename + ".2.fasta", true, solid_edge_length_bound);
-//    }
-}
-
-inline void OutputContigsToGFA(ConjugateDeBruijnGraph &g, path_extend::PathContainer &paths, const string &contigs_output_filename) {
-    INFO("Outputting graph to " << contigs_output_filename << ".gfa");
-    GFAWriter<ConjugateDeBruijnGraph> writer(g, paths, contigs_output_filename + ".gfa");
-    writer.Write();
-}
-
-
-inline void OutputContigsToFASTG(ConjugateDeBruijnGraph& g,
-                   const string& contigs_output_filename, const ConnectedComponentCounter & cc_counter) {
-
-    INFO("Outputting graph to " << contigs_output_filename << ".fastg");
-    DefaultContigCorrector<ConjugateDeBruijnGraph> corrector(g);
-    DefaultContigConstructor<ConjugateDeBruijnGraph> constructor(g, corrector);
-    io::osequencestream_for_fastg ossfg(contigs_output_filename + ".fastg");
-    ContigPrinter<ConjugateDeBruijnGraph>(g, constructor).PrintContigsFASTG(ossfg, cc_counter);
-}
-
-
-
-
-inline bool ShouldCut(ConjugateDeBruijnGraph& g, VertexId v) {
-    vector<EdgeId> edges;
-    push_back_all(edges, g.OutgoingEdges(v));
-
-    if(edges.size() == 0)
-        return false;
-    for(size_t i = 1; i < edges.size(); i++) {
-        if(g.EdgeNucls(edges[i])[g.k()] != g.EdgeNucls(edges[0])[g.k()])
-            return false;
-    }
-    edges.clear();
-    push_back_all(edges, g.IncomingEdges(v));
-    for(size_t i = 0; i < edges.size(); i++)
-        for(size_t j = i + 1; j < edges.size(); j++) {
-            if(g.EdgeNucls(edges[i])[g.length(edges[i]) - 1] != g.EdgeNucls(edges[j])[g.length(edges[j]) - 1])
-                return true;
-        }
-    return false;
-}
-
-inline void OutputCutContigs(ConjugateDeBruijnGraph& g,
-        const string& contigs_output_filename,
-        bool /*output_unipath*/ = false,
-        size_t /*solid_edge_length_bound*/ = 0) {
-    INFO("Outputting contigs to " << contigs_output_filename);
-    DefaultContigCorrector<ConjugateDeBruijnGraph> corrector(g);
-    io::osequencestream_cov oss(contigs_output_filename);
-    CuttingContigConstructor<ConjugateDeBruijnGraph> constructor(g, corrector);
-
-//    osequencestream_cov oss(contigs_output_filename);
-//    set<ConjugateDeBruijnGraph::EdgeId> edges;
-//    for (auto it = g.SmartEdgeBegin(); !it.IsEnd(); ++it) {
-//        EdgeId e = *it;
-//        cout << g.length(e) << endl;
-//        if (edges.count(e) == 0) {
-//            Sequence s = g.EdgeNucls(e);
-//            cout << s.size() << endl;
-//            cout << "oppa " << ShouldCut(g, g.EdgeEnd(e)) << endl;
-//            if(s.size() > g.k() && ShouldCut(g, g.EdgeEnd(e))) {
-//                s = s.Subseq(0, s.size() - g.k());
-//                cout << s.size() << endl;
-//            }
-//            cout << "oppa1 " << ShouldCut(g, g.conjugate(g.EdgeStart(e))) << endl;
-//            if(s.size() > g.k() && ShouldCut(g, g.conjugate(g.EdgeStart(e)))) {
-//                s = s.Subseq(g.k(), s.size());
-//                cout << s.size() << endl;
-//            }
-//            oss << g.coverage(e);
-//            oss << s;
-//            edges.insert(g.conjugate(*it));
-//        }
-//        //        oss << g.EdgeNucls(*it);
-//    }
-}
-
-inline void OutputSingleFileContigs(ConjugateDeBruijnGraph& g,
-        const string& contigs_output_dir) {
-    INFO("Outputting contigs to " << contigs_output_dir);
-    int n = 0;
-    make_dir(contigs_output_dir);
-    char n_str[20];
-    set<ConjugateDeBruijnGraph::EdgeId> edges;
-    for (auto it = g.SmartEdgeBegin(); !it.IsEnd(); ++it) {
-        if (edges.count(*it) == 0) {
-            sprintf(n_str, "%d.fa", n);
-            edges.insert(g.conjugate(*it));
-            io::osequencestream oss(contigs_output_dir + n_str);
-            oss << g.EdgeNucls(*it);
-            n++;
-        }
+    for (auto it = g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
+        EdgeId e = *it;
+        oss << g.coverage(e);
+        oss << g.EdgeNucls(e).str();
     }
-    DEBUG("SingleFileContigs(Conjugate) written");
 }
 
 }
diff --git a/src/common/assembly_graph/graph_support/coverage_filling.hpp b/src/common/assembly_graph/graph_support/coverage_filling.hpp
index ad2516e..5bbf31d 100644
--- a/src/common/assembly_graph/graph_support/coverage_filling.hpp
+++ b/src/common/assembly_graph/graph_support/coverage_filling.hpp
@@ -10,7 +10,7 @@ struct SimultaneousCoverageCollector {
 };
 
 template<>
-struct SimultaneousCoverageCollector<SimpleStoring> {
+struct SimultaneousCoverageCollector<utils::SimpleStoring> {
     template<class SimultaneousCoverageFiller, class Info>
     static void CollectCoverage(SimultaneousCoverageFiller& filler, const Info &edge_info) {
         filler.inc_coverage(edge_info);
@@ -18,7 +18,7 @@ struct SimultaneousCoverageCollector<SimpleStoring> {
 };
 
 template<>
-struct SimultaneousCoverageCollector<InvertableStoring> {
+struct SimultaneousCoverageCollector<utils::InvertableStoring> {
     template<class SimultaneousCoverageFiller, class Info>
     static void CollectCoverage(SimultaneousCoverageFiller& filler, const Info &edge_info) {
         filler.inc_coverage(edge_info);
diff --git a/src/common/assembly_graph/graph_support/detail_coverage.hpp b/src/common/assembly_graph/graph_support/detail_coverage.hpp
index 15600e2..5ca2884 100644
--- a/src/common/assembly_graph/graph_support/detail_coverage.hpp
+++ b/src/common/assembly_graph/graph_support/detail_coverage.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "utils/indices/perfect_hash_map.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
 #include "assembly_graph/core/coverage.hpp"
 #include "assembly_graph/core/action_handlers.hpp"
 #include "utils/verify.hpp"
@@ -80,10 +80,11 @@ public:
         for (auto I = count_index.value_cbegin(), E = count_index.value_cend();
                 I != E; ++I) {
             const auto& edge_info = *I;
+            if (!edge_info.valid())
+                continue;
             EdgeId e = edge_info.edge_id;
             unsigned offset = edge_info.offset;
             unsigned count = edge_info.count;
-            VERIFY(edge_info.valid());
             VERIFY(e.get() != NULL);
             if (offset < averaging_range_) {
                 IncRawCoverage(e, count);
diff --git a/src/common/assembly_graph/graph_support/genomic_quality.hpp b/src/common/assembly_graph/graph_support/genomic_quality.hpp
index 608d120..d342665 100644
--- a/src/common/assembly_graph/graph_support/genomic_quality.hpp
+++ b/src/common/assembly_graph/graph_support/genomic_quality.hpp
@@ -109,7 +109,7 @@ public:
 
     virtual std::string label(EdgeId edge) const {
         double q = quality(edge);
-        return (q == 0) ? "" : "quality: " + ToString(q);
+        return (q == 0) ? "" : "quality: " + std::to_string(q);
     }
 
     void clear() {
@@ -187,7 +187,7 @@ public:
     {}
 
     virtual void HandlePositiveQuality(EdgeId e) {
-        printing_rh_.HandleDelete(e, "_" + ToString(this->quality_handler().quality(e)));
+        printing_rh_.HandleDelete(e, "_" + std::to_string(this->quality_handler().quality(e)));
     }
 
 private:
@@ -265,7 +265,7 @@ private:
 //            //todo magic constant
 //            map<EdgeId, string> empty_coloring;
 //            visualization::visualization_utils::WriteComponent(g_, EdgeNeighborhood<Graph>(g_, edge, 50, 250),
-//                  folder + "edge_" +  ToString(g_.int_id(edge)) + ".dot", empty_coloring, labeler_);
+//                  folder + "edge_" +  std::to_string(g_.int_id(edge)) + ".dot", empty_coloring, labeler_);
 //  }
 //
 //private:
@@ -356,7 +356,7 @@ private:
 //
 //    virtual std::string label(EdgeId edge) const {
 //        double q = quality(edge);
-//        return (q == 0) ? "" : "quality: " + ToString(q);
+//        return (q == 0) ? "" : "quality: " + std::to_string(q);
 //    }
 //
 //};
@@ -451,8 +451,8 @@ private:
 //            //todo magic constant
 ////          map<EdgeId, string> empty_coloring;
 //            shared_ptr<GraphSplitter<Graph>> splitter = EdgeNeighborhoodFinder<Graph>(g_, edge, 50, 250);
-//            visualization::visualization_utils::WriteComponents(g_, *splitter/*, "locality_of_edge_" + ToString(g_.int_id(edge))*/
-//                    , folder + "edge_" +  ToString(g_.int_id(edge)) + "_" + ToString(quality_handler_.quality(edge)) + ".dot"
+//            visualization::visualization_utils::WriteComponents(g_, *splitter/*, "locality_of_edge_" + std::to_string(g_.int_id(edge))*/
+//                    , folder + "edge_" +  std::to_string(g_.int_id(edge)) + "_" + std::to_string(quality_handler_.quality(edge)) + ".dot"
 //                    , colorer_, labeler_);
 //        } else {
 //            TRACE("Deleting edge " << g_.str(edge) << " with zero quality");
@@ -503,8 +503,8 @@ private:
 //            shared_ptr<GraphSplitter<Graph>> splitter = EdgeNeighborhoodFinder<Graph>(g_, edge, 50,
 //                    250);
 //
-//            visualization::visualization_utils::WriteComponents(g_, *splitter, TrueFilter<vector<VertexId>>(), "locality_of_edge_" + ToString(g_.int_id(edge))
-//                    , folder + "edge_" +  ToString(g_.int_id(edge)) + "_" + ToString(quality_handler_.quality(edge)) + ".dot"
+//            visualization::visualization_utils::WriteComponents(g_, *splitter, TrueFilter<vector<VertexId>>(), "locality_of_edge_" + std::to_string(g_.int_id(edge))
+//                    , folder + "edge_" +  std::to_string(g_.int_id(edge)) + "_" + std::to_string(quality_handler_.quality(edge)) + ".dot"
 //                    , empty_coloring, labeler_);
 //        }
 //    }
@@ -543,8 +543,8 @@ private:
 //            //todo magic constant
 //            map<EdgeId, string> empty_coloring;
 //            shared_ptr<GraphSplitter<Graph>> splitter = EdgeNeighborhoodFinder<Graph>(g_, edge, 50, 250);
-//            visualization::visualization_utils::WriteComponents(g_, *splitter, TrueFilter<vector<VertexId>>(), "locality_of_edge_" + ToString(g_.int_id(edge))
-//                    , folder + "edge_" +  ToString(g_.int_id(edge)) + ".dot", empty_coloring, labeler_);
+//            visualization::visualization_utils::WriteComponents(g_, *splitter, TrueFilter<vector<VertexId>>(), "locality_of_edge_" + std::to_string(g_.int_id(edge))
+//                    , folder + "edge_" +  std::to_string(g_.int_id(edge)) + ".dot", empty_coloring, labeler_);
 //    }
 //
 //private:
diff --git a/src/common/assembly_graph/graph_support/parallel_processing.hpp b/src/common/assembly_graph/graph_support/parallel_processing.hpp
index abd3149..f53b50f 100644
--- a/src/common/assembly_graph/graph_support/parallel_processing.hpp
+++ b/src/common/assembly_graph/graph_support/parallel_processing.hpp
@@ -10,39 +10,10 @@
 #include "utils/logger/logger.hpp"
 #include "assembly_graph/core/graph_iterators.hpp"
 #include "assembly_graph/graph_support/graph_processing_algorithm.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 namespace omnigraph {
 
-template<class ItVec, class Condition, class Handler>
-void FindInterestingFromChunkIterators(const ItVec& chunk_iterators,
-                                       const Condition& predicate,
-                                       const Handler& handler) {
-    VERIFY(chunk_iterators.size() > 1);
-    typedef typename Condition::checked_type ElementType;
-    std::vector<std::vector<ElementType>> of_interest(omp_get_max_threads());
-
-    #pragma omp parallel for schedule(guided)
-    for (size_t i = 0; i < chunk_iterators.size() - 1; ++i) {
-        size_t cnt = 0;
-        for (auto it = chunk_iterators[i], end = chunk_iterators[i + 1]; it != end; ++it) {
-             ElementType t = *it;
-             if (predicate(t)) {
-                 of_interest[omp_get_thread_num()].push_back(t);
-             }
-             cnt++;
-         }
-         DEBUG("Processed " << cnt << " elements as potential candidates by thread " << omp_get_thread_num());
-    }
-
-    for (auto& chunk : of_interest) {
-        for (const auto& el : chunk) {
-            handler(el);
-        }
-        chunk.clear();
-    }
-}
-
 template<class Graph, class ElementId>
 class InterestingElementFinder {
 protected:
@@ -59,6 +30,9 @@ public:
     virtual bool Run(const Graph& /*g*/, HandlerF /*handler*/) const = 0;
 };
 
+template<class Graph, class ElementId>
+using InterestingFinderPtr = std::shared_ptr<InterestingElementFinder<Graph, ElementId>>;
+
 template<class Graph, class ElementId = typename Graph::EdgeId>
 class TrivialInterestingElementFinder :
         public InterestingElementFinder<Graph, ElementId> {
@@ -101,6 +75,39 @@ class ParallelInterestingElementFinder : public InterestingElementFinder<Graph,
     const size_t chunk_cnt_;
 public:
 
+    template<class ItVec, class Condition, class Handler>
+    static void FindInterestingFromChunkIterators(const ItVec& chunk_iterators,
+                                           const Condition& predicate,
+                                           const Handler& handler) {
+        VERIFY(chunk_iterators.size() > 1);
+        DEBUG("Parallel search for elements of interest");
+        typedef typename Condition::checked_type ElementType;
+        std::vector<std::vector<ElementType>> of_interest(chunk_iterators.size() - 1);
+
+        #pragma omp parallel for schedule(guided)
+        for (size_t i = 0; i < chunk_iterators.size() - 1; ++i) {
+            DEBUG("Processing chunk " << i << " by thread " << omp_get_thread_num());
+            size_t cnt = 0;
+            for (auto it = chunk_iterators[i], end = chunk_iterators[i + 1]; it != end; ++it) {
+                ElementType t = *it;
+                if (predicate(t)) {
+                    of_interest[i].push_back(t);
+                }
+                cnt++;
+            }
+            DEBUG("Processed chunk " << i << ". " << cnt << " elements identified as potential candidates");
+        }
+
+        DEBUG("Merging chunks");
+        for (auto& chunk : of_interest) {
+            for (const auto& el : chunk) {
+                handler(el);
+            }
+            chunk.clear();
+        }
+        DEBUG("Chunks merged");
+    }
+
     ParallelInterestingElementFinder(func::TypedPredicate<ElementId> condition,
                                      size_t chunk_cnt)
             : base(condition), chunk_cnt_(chunk_cnt) {}
@@ -143,7 +150,7 @@ inline size_t LoopedRun(Algo& algo) {
     return total_triggered;
 }
 
-//todo only potentially relevant edges should be stored at any point
+//FIXME only potentially relevant edges should be stored at any point
 template<class Graph, class ElementId,
          class Comparator = std::less<ElementId>>
 class PersistentProcessingAlgorithm : public PersistentAlgorithmBase<Graph> {
@@ -153,8 +160,8 @@ protected:
 
 private:
     SmartSetIterator<Graph, ElementId, Comparator> it_;
-    bool tracking_;
-    size_t total_iteration_estimate_;
+    const bool tracking_;
+    const size_t total_iteration_estimate_;
     size_t curr_iteration_;
 
 protected:
@@ -222,6 +229,7 @@ public:
         curr_iteration_++;
         return triggered;
     }
+
 private:
     DECL_LOGGER("PersistentProcessingAlgorithm"); 
 };
@@ -269,6 +277,7 @@ private:
     DECL_LOGGER("ParallelEdgeRemovingAlgorithm");
 };
 
+//TODO use coverage order?
 template<class Graph, class Comparator = std::less<typename Graph::EdgeId>>
 class DisconnectionAlgorithm : public PersistentProcessingAlgorithm<Graph,
         typename Graph::EdgeId,
@@ -288,6 +297,7 @@ public:
             : base(g,
                    std::make_shared<omnigraph::ParallelInterestingElementFinder<Graph>>(condition, chunk_cnt),
             /*canonical_only*/false, comp, track_changes),
+              //condition_(second_check ? condition : func::AlwaysTrue<EdgeId>()),
               condition_(condition),
               disconnector_(g, removal_handler) {
     }
diff --git a/src/common/assembly_graph/graph_support/scaff_supplementary.cpp b/src/common/assembly_graph/graph_support/scaff_supplementary.cpp
index 5dd3907..f08e133 100644
--- a/src/common/assembly_graph/graph_support/scaff_supplementary.cpp
+++ b/src/common/assembly_graph/graph_support/scaff_supplementary.cpp
@@ -33,23 +33,23 @@ void ScaffoldingUniqueEdgeAnalyzer::SetCoverageBasedCutoff() {
 }
 
 
-void ScaffoldingUniqueEdgeAnalyzer::FillUniqueEdgeStorage(ScaffoldingUniqueEdgeStorage &storage_) {
-    storage_.unique_edges_.clear();
+void ScaffoldingUniqueEdgeAnalyzer::FillUniqueEdgeStorage(ScaffoldingUniqueEdgeStorage &storage) {
+    storage.unique_edges_.clear();
     size_t total_len = 0;
     size_t unique_len = 0;
     size_t unique_num = 0;
-    storage_.SetMinLength(length_cutoff_);
+    storage.set_min_length(length_cutoff_);
     for (auto iter = gp_.g.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
         size_t tlen = gp_.g.length(*iter);
         total_len += tlen;
         if (gp_.g.length(*iter) >= length_cutoff_ && gp_.g.coverage(*iter) > median_coverage_ * (1 - relative_coverage_variation_)
                 && gp_.g.coverage(*iter) < median_coverage_ * (1 + relative_coverage_variation_) ) {
-            storage_.unique_edges_.insert(*iter);
+            storage.unique_edges_.insert(*iter);
             unique_len += tlen;
             unique_num ++;
         }
     }
-    for (auto iter = storage_.begin(); iter != storage_.end(); ++iter) {
+    for (auto iter = storage.begin(); iter != storage.end(); ++iter) {
         DEBUG (gp_.g.int_id(*iter) << " " << gp_.g.coverage(*iter) << " " << gp_.g.length(*iter) );
     }
     INFO ("With length cutoff: " << length_cutoff_ <<", median long edge coverage: " << median_coverage_ << ", and maximal unique coverage: " <<
@@ -84,8 +84,9 @@ map<EdgeId, size_t> ScaffoldingUniqueEdgeAnalyzer::FillNextEdgeVoting(Bidirectio
     return voting;
 }
 
-bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e, shared_ptr<GraphCoverageMap> long_reads_cov_map, const pe_config::LongReads lr_config, int direction) const {
-    BidirectionalPathSet all_set = long_reads_cov_map->GetCoveringPaths(e);
+bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e, const GraphCoverageMap &long_reads_cov_map,
+                                                        const pe_config::LongReads &lr_config, int direction) const {
+    BidirectionalPathSet all_set = long_reads_cov_map.GetCoveringPaths(e);
     BidirectionalPathMap<size_t> active_paths;
     size_t loop_weight = 0;
     size_t nonloop_weight = 0;
@@ -101,7 +102,9 @@ bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e, shared_ptr<Gra
         }
     }
 //TODO: small plasmid, paths a-b-a, b-a-b ?
-    if (loop_weight > 1) 
+//2 - hybrid paths weight doubles (conjugate paths)
+//TODO: remove weight dublication
+    if (loop_weight > 2 && loop_weight * overwhelming_majority_ > nonloop_weight)
             return false;
         else
             DEBUG (gp_.g.int_id(e) << " loop/nonloop weight " << loop_weight << " " << nonloop_weight);
@@ -121,8 +124,8 @@ bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e, shared_ptr<Gra
                 maxx = pair.second;
             }
         for (const auto &pair: voting)
-            //TODO:: 1 from config?
-            if (pair.first != next_unique && pair.second > 1)
+//2 - hybrid paths weight doubles (conjugate paths)
+            if (pair.first != next_unique && pair.second > 2)
                 alt += pair.second;
         if (maxx < lr_config.unique_edge_priority * double(alt)) {
             DEBUG("edge " << gp_.g.int_id(e) <<" dir "<< direction << " was not unique" );
@@ -146,7 +149,9 @@ bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e, shared_ptr<Gra
     return true;
 }
 
-bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e, shared_ptr<GraphCoverageMap> long_reads_cov_map, const pe_config::LongReads lr_config) const{
+bool ScaffoldingUniqueEdgeAnalyzer::ConservativeByPaths(EdgeId e,
+                                                        const GraphCoverageMap &long_reads_cov_map,
+                                                        const pe_config::LongReads &lr_config) const{
     return (ConservativeByPaths(e, long_reads_cov_map, lr_config, 1) && ConservativeByPaths(e, long_reads_cov_map, lr_config, -1));
 }
 
@@ -167,7 +172,7 @@ void ScaffoldingUniqueEdgeAnalyzer::CheckCorrectness(ScaffoldingUniqueEdgeStorag
     }
 }
 
-set<VertexId> ScaffoldingUniqueEdgeAnalyzer::GetChildren(VertexId v, map <VertexId, set<VertexId>> &dijkstra_cash_) const {
+set<VertexId> ScaffoldingUniqueEdgeAnalyzer::GetChildren(VertexId v, map<VertexId, set<VertexId>> &dijkstra_cash_) const {
     DijkstraHelper<debruijn_graph::Graph>::BoundedDijkstra dijkstra(
             DijkstraHelper<debruijn_graph::Graph>::CreateBoundedDijkstra(gp_.g, max_dijkstra_depth_, max_dijkstra_vertices_));
     dijkstra.Run(v);
@@ -180,7 +185,7 @@ set<VertexId> ScaffoldingUniqueEdgeAnalyzer::GetChildren(VertexId v, map <Vertex
     return dijkstra_cash_[v];
 }
 
-bool ScaffoldingUniqueEdgeAnalyzer::FindCommonChildren(EdgeId e1, EdgeId e2, map <VertexId, set<VertexId>> &dijkstra_cash_) const {
+bool ScaffoldingUniqueEdgeAnalyzer::FindCommonChildren(EdgeId e1, EdgeId e2, map<VertexId, set<VertexId>> &dijkstra_cash_) const {
     auto s1 = GetChildren(gp_.g.EdgeEnd(e1), dijkstra_cash_);
     auto s2 = GetChildren(gp_.g.EdgeEnd(e2), dijkstra_cash_);
     if (s1.find(gp_.g.EdgeStart(e2)) != s1.end()) {
@@ -198,7 +203,7 @@ bool ScaffoldingUniqueEdgeAnalyzer::FindCommonChildren(EdgeId e1, EdgeId e2, map
     return false;
 }
 
-bool ScaffoldingUniqueEdgeAnalyzer::FindCommonChildren(vector<pair<EdgeId, double>> &next_weights) const {
+bool ScaffoldingUniqueEdgeAnalyzer::FindCommonChildren(const vector<pair<EdgeId, double>> &next_weights) const {
     map <VertexId, set<VertexId>> dijkstra_cash_;
     for (size_t i = 0; i < next_weights.size(); i ++) {
         for (size_t j = i + 1; j < next_weights.size(); j++) {
@@ -238,17 +243,18 @@ bool ScaffoldingUniqueEdgeAnalyzer::FindCommonChildren(EdgeId from, size_t lib_i
 }
 
 
-void ScaffoldingUniqueEdgeAnalyzer::ClearLongEdgesWithPairedLib(size_t lib_index, ScaffoldingUniqueEdgeStorage &storage_) const {
+void ScaffoldingUniqueEdgeAnalyzer::ClearLongEdgesWithPairedLib(size_t lib_index,
+                                                                ScaffoldingUniqueEdgeStorage &storage) const {
     set<EdgeId> to_erase;
-    for (EdgeId edge: storage_ ) {
+    for (EdgeId edge: storage) {
         if (!FindCommonChildren(edge, lib_index)) {
             to_erase.insert(edge);
             to_erase.insert(gp_.g.conjugate(edge));
         }
     }
-    for (auto iter = storage_.begin(); iter !=  storage_.end(); ){
+    for (auto iter = storage.begin(); iter != storage.end(); ){
         if (to_erase.find(*iter) != to_erase.end()){
-            iter = storage_.erase(iter);
+            iter = storage.erase(iter);
         } else {
             iter++;
         }
@@ -256,7 +262,9 @@ void ScaffoldingUniqueEdgeAnalyzer::ClearLongEdgesWithPairedLib(size_t lib_index
 }
 
 
-void ScaffoldingUniqueEdgeAnalyzer::FillUniqueEdgesWithLongReads(shared_ptr<GraphCoverageMap> long_reads_cov_map, ScaffoldingUniqueEdgeStorage& unique_storage_pb, const pe_config::LongReads lr_config) {
+void ScaffoldingUniqueEdgeAnalyzer::FillUniqueEdgesWithLongReads(GraphCoverageMap &long_reads_cov_map,
+                                                                 ScaffoldingUniqueEdgeStorage &unique_storage_pb,
+                                                                 const pe_config::LongReads &lr_config) {
     for (auto iter = gp_.g.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
         EdgeId e = *iter;
         if (ConservativeByLength(e) && ConservativeByPaths(e, long_reads_cov_map, lr_config)) {
diff --git a/src/common/assembly_graph/graph_support/scaff_supplementary.hpp b/src/common/assembly_graph/graph_support/scaff_supplementary.hpp
index 8ace4a6..ff47519 100644
--- a/src/common/assembly_graph/graph_support/scaff_supplementary.hpp
+++ b/src/common/assembly_graph/graph_support/scaff_supplementary.hpp
@@ -16,14 +16,20 @@ typedef debruijn_graph::EdgeId EdgeId;
  */
 class ScaffoldingUniqueEdgeStorage {
     friend class ScaffoldingUniqueEdgeAnalyzer;
-private:
-    set <EdgeId> unique_edges_;
+    set<EdgeId> unique_edges_;
     size_t min_unique_length_;
+
 public:
-    ScaffoldingUniqueEdgeStorage(): unique_edges_(){
+    ScaffoldingUniqueEdgeStorage(): unique_edges_(), min_unique_length_(0) {
         DEBUG("storage created, empty");
     }
 
+    ScaffoldingUniqueEdgeStorage(const ScaffoldingUniqueEdgeStorage&) = delete;
+    ScaffoldingUniqueEdgeStorage& operator=(const ScaffoldingUniqueEdgeStorage&) = delete;
+
+    ScaffoldingUniqueEdgeStorage(ScaffoldingUniqueEdgeStorage&&) = default;
+    ScaffoldingUniqueEdgeStorage& operator=(ScaffoldingUniqueEdgeStorage&&) = default;
+
     bool IsUnique(EdgeId e) const {
         return (unique_edges_.find(e) != unique_edges_.end());
     }
@@ -36,34 +42,32 @@ public:
         return unique_edges_.end();
     }
 
-    decltype(unique_edges_.begin()) erase(decltype(unique_edges_.begin()) iter){
+    decltype(unique_edges_.begin()) erase(decltype(unique_edges_.begin()) iter) {
         return unique_edges_.erase(iter);
     }
 
     size_t size() const {
         return unique_edges_.size();
     }
-    size_t GetMinLength() const {
+    size_t min_length() const {
         return min_unique_length_;
     }
-    void SetMinLength(size_t min_length)  {
+    void set_min_length(size_t min_length) {
         min_unique_length_ = min_length;
     }
 
-    const set<EdgeId>& GetSet() const {
+    const set<EdgeId>& unique_edges() const {
         return unique_edges_;
     }
 
 protected:
-    DECL_LOGGER("ScaffoldingUniqueEdgeStorage")
-
+    DECL_LOGGER("ScaffoldingUniqueEdgeStorage");
 };
 
 //Auxillary class required to fillin the unique edge storage.
 
 
 class ScaffoldingUniqueEdgeAnalyzer {
-
     const debruijn_graph::conj_graph_pack &gp_;
     size_t length_cutoff_;
     double median_coverage_;
@@ -73,13 +77,15 @@ class ScaffoldingUniqueEdgeAnalyzer {
     static const size_t max_dijkstra_depth_ = 1000;
     static const size_t max_dijkstra_vertices_ = 1000;
     static const size_t overwhelming_majority_ = 10;
-    set<VertexId> GetChildren(VertexId v, map <VertexId, set<VertexId>> &dijkstra_cash_) const;
-    bool FindCommonChildren(EdgeId e1, EdgeId e2, map <VertexId, set<VertexId>> &dijkstra_cash_) const;
-    bool FindCommonChildren(vector<pair<EdgeId, double>> &next_weights) const;
+    set<VertexId> GetChildren(VertexId v, map<VertexId, set<VertexId>> &dijkstra_cash) const;
+    bool FindCommonChildren(EdgeId e1, EdgeId e2, map<VertexId, set<VertexId>> &dijkstra_cash) const;
+    bool FindCommonChildren(const vector<pair<EdgeId, double>> &next_weights) const;
     bool FindCommonChildren(EdgeId from, size_t lib_index) const;
     map<EdgeId, size_t> FillNextEdgeVoting(BidirectionalPathMap<size_t>& active_paths, int direction) const;
-    bool ConservativeByPaths(EdgeId e, shared_ptr<GraphCoverageMap> long_reads_cov_map, const pe_config::LongReads lr_config) const;
-    bool ConservativeByPaths(EdgeId e, shared_ptr<GraphCoverageMap> long_reads_cov_map, const pe_config::LongReads lr_config, int direction) const;
+    bool ConservativeByPaths(EdgeId e, const GraphCoverageMap &long_reads_cov_map,
+                             const pe_config::LongReads &lr_config) const;
+    bool ConservativeByPaths(EdgeId e, const GraphCoverageMap &long_reads_cov_map,
+                             const pe_config::LongReads &lr_config, int direction) const;
     bool ConservativeByLength(EdgeId e);
     void CheckCorrectness(ScaffoldingUniqueEdgeStorage& unique_storage_pb);
 protected:
@@ -88,12 +94,78 @@ protected:
 
     void SetCoverageBasedCutoff();
 public:
-    ScaffoldingUniqueEdgeAnalyzer(const debruijn_graph::conj_graph_pack &gp, size_t apriori_length_cutoff, double max_relative_coverage):gp_(gp), length_cutoff_(apriori_length_cutoff), relative_coverage_variation_(max_relative_coverage){
+    ScaffoldingUniqueEdgeAnalyzer(const debruijn_graph::conj_graph_pack &gp, size_t apriori_length_cutoff,
+                                  double max_relative_coverage):
+            gp_(gp),
+            length_cutoff_(apriori_length_cutoff),
+            relative_coverage_variation_(max_relative_coverage) {
         SetCoverageBasedCutoff();
     }
-    void FillUniqueEdgeStorage(ScaffoldingUniqueEdgeStorage &storage_);
-    void ClearLongEdgesWithPairedLib(size_t lib_index, ScaffoldingUniqueEdgeStorage &storage_) const;
-    void FillUniqueEdgesWithLongReads(shared_ptr<GraphCoverageMap> long_reads_cov_map, ScaffoldingUniqueEdgeStorage& unique_storage_pb, const pe_config::LongReads lr_config);
+    void FillUniqueEdgeStorage(ScaffoldingUniqueEdgeStorage &storage);
+    void ClearLongEdgesWithPairedLib(size_t lib_index, ScaffoldingUniqueEdgeStorage &storage) const;
+    void FillUniqueEdgesWithLongReads(GraphCoverageMap &long_reads_cov_map,
+                                      ScaffoldingUniqueEdgeStorage &unique_storage_pb,
+                                      const pe_config::LongReads &lr_config);
+};
+
+class UsedUniqueStorage {
+    set<EdgeId> used_;
+    const ScaffoldingUniqueEdgeStorage& unique_;
+
+public:
+    UsedUniqueStorage(const UsedUniqueStorage&) = delete;
+    UsedUniqueStorage& operator=(const UsedUniqueStorage&) = delete;
+
+    UsedUniqueStorage(UsedUniqueStorage&&) = default;
+    UsedUniqueStorage& operator=(UsedUniqueStorage&&) = default;
+
+    explicit UsedUniqueStorage(const ScaffoldingUniqueEdgeStorage& unique):
+            unique_(unique) {}
+
+    void insert(EdgeId e) {
+        if (unique_.IsUnique(e)) {
+            used_.insert(e);
+            used_.insert(e->conjugate());
+        }
+    }
+
+//    const ScaffoldingUniqueEdgeStorage& unique_edge_storage() const {
+//        return unique_;
+//    }
+
+    bool IsUsedAndUnique(EdgeId e) const {
+        return (unique_.IsUnique(e) && used_.find(e) != used_.end());
+    }
+
+    bool UniqueCheckEnabled() const {
+        return unique_.size() > 0;
+    }
+
+    bool TryUseEdge(BidirectionalPath &path, EdgeId e, const Gap &gap) {
+        if (UniqueCheckEnabled()) {
+            if (IsUsedAndUnique(e)) {
+                return false;
+            } else {
+                insert(e);
+            }
+        }
+        path.PushBack(e, gap);
+        return true;
+    }
+
+};
+
+//FIXME rename
+struct UniqueData {
+    size_t min_unique_length_;
+    double unique_variation_;
+
+    ScaffoldingUniqueEdgeStorage main_unique_storage_;
+    vector<ScaffoldingUniqueEdgeStorage> unique_storages_;
+
+    ScaffoldingUniqueEdgeStorage unique_pb_storage_;
+    vector<PathContainer> long_reads_paths_;
+    vector<GraphCoverageMap> long_reads_cov_map_;
 };
 }
 
diff --git a/src/common/assembly_graph/handlers/edge_labels_handler.hpp b/src/common/assembly_graph/handlers/edge_labels_handler.hpp
index 551939f..a6e6407 100644
--- a/src/common/assembly_graph/handlers/edge_labels_handler.hpp
+++ b/src/common/assembly_graph/handlers/edge_labels_handler.hpp
@@ -19,7 +19,7 @@
 
 //#include "utils.hpp"
 #include "visualization/graph_labeler.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include <unordered_map>
 #include <map>
 
diff --git a/src/common/assembly_graph/handlers/edges_position_handler.hpp b/src/common/assembly_graph/handlers/edges_position_handler.hpp
index c3b4c4a..7cd25f8 100644
--- a/src/common/assembly_graph/handlers/edges_position_handler.hpp
+++ b/src/common/assembly_graph/handlers/edges_position_handler.hpp
@@ -16,7 +16,7 @@
 #define EDGES_POSITION_HANDLER_HPP_
 
 //#include "utils.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "assembly_graph/paths/mapping_path.hpp"
 #include "assembly_graph/core/action_handlers.hpp"
 
@@ -46,8 +46,8 @@ class EdgesPositionHandler: public GraphActionHandler<Graph> {
     map<EdgeId, map<string, std::set<MappingRange>>> edges_positions_;
     //TODO extract set<MappingRange> as a storage class
 
-    MappingRange EraseAndExtract(set<MappingRange> &ranges, set<MappingRange>::iterator &position, const MappingRange &new_pos) {
-        auto &old_pos = *position;
+    MappingRange EraseAndExtract(set<MappingRange> &ranges, set<MappingRange>::iterator &position, const MappingRange &new_pos) const {
+        auto old_pos = *position;
         if(old_pos.IntersectLeftOf(new_pos) || old_pos.StrictlyContinuesWith(new_pos, max_mapping_gap_, max_gap_diff_)) {
             ranges.erase(position);
             return old_pos.Merge(new_pos);
@@ -59,8 +59,14 @@ class EdgesPositionHandler: public GraphActionHandler<Graph> {
         }
     }
 
+    std::string RangeStr(const Range &range) const {
+        std::stringstream ss;
+        ss << "[" << (range.start_pos + 1) << " - " << range.end_pos << "]";
+        return ss.str();
+    }
+
 public:
-    MappingRange EraseAndExtract(set<MappingRange> &ranges, MappingRange new_pos) {
+    MappingRange EraseAndExtract(set<MappingRange> &ranges, MappingRange new_pos) const {
         auto it = ranges.lower_bound(new_pos);
         if(it != ranges.end()) {
             new_pos = EraseAndExtract(ranges, it, new_pos);
@@ -72,7 +78,7 @@ public:
         return new_pos;
     }
 
-    set<MappingRange> GetEdgePositions(EdgeId edge, string contig_id) const {
+    set<MappingRange> GetEdgePositions(EdgeId edge, const string &contig_id) const {
         VERIFY(this->IsAttached());
         auto edge_it = edges_positions_.find(edge);
         if(edge_it == edges_positions_.end())
@@ -85,6 +91,12 @@ public:
             return it->second;
     }
 
+    MappingRange GetUniqueEdgePosition(EdgeId edge, const string &contig_id) const {
+        auto poss = GetEdgePositions(edge, contig_id);
+        VERIFY(poss.size() == 1);
+        return *poss.begin();
+    }
+
     vector<EdgePosition> GetEdgePositions(EdgeId edge) const {
         VERIFY(this->IsAttached());
         auto edge_it = edges_positions_.find(edge);
@@ -106,7 +118,7 @@ public:
 
     void AddEdgePosition(EdgeId edge, string contig_id, MappingRange new_pos) {
         VERIFY(this->IsAttached());
-        if(new_pos.empty())
+        if (new_pos.empty())
             return;
         set<MappingRange> &new_set = edges_positions_[edge][contig_id];
         new_pos = EraseAndExtract(new_set, new_pos);
@@ -136,9 +148,11 @@ public:
         vector<EdgePosition> positions = GetEdgePositions(edge);
         size_t counter = 0;
         for (auto pos_it = positions.begin(), end = positions.end(); pos_it != end; ++pos_it) {
-            ss << "(" << pos_it->contigId << ": " << pos_it->mr << ")\\n";
+            ss << "(" << pos_it->contigId << ": "
+               << RangeStr(pos_it->mr.initial_range) << " --> "
+               << RangeStr(pos_it->mr.mapped_range) << ")\\n";
             counter++;
-            if(counter > 30) {
+            if (counter > 30) {
                 ss << "and many more. Totally " << positions.size() << " positions.";
                 break;
             }
diff --git a/src/common/assembly_graph/handlers/id_track_handler.hpp b/src/common/assembly_graph/handlers/id_track_handler.hpp
index 12ab12b..456cd96 100644
--- a/src/common/assembly_graph/handlers/id_track_handler.hpp
+++ b/src/common/assembly_graph/handlers/id_track_handler.hpp
@@ -10,7 +10,7 @@
 #include <unordered_map>
 //#include "utils.hpp"
 #include "visualization/graph_labeler.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "assembly_graph/core/action_handlers.hpp"
 using namespace omnigraph;
 
@@ -78,33 +78,4 @@ public:
     }
 };
 
-template<class VertexId, class EdgeId>
-class BaseIdTrackHandler {
-public:
-    BaseIdTrackHandler() {
-    }
-
-    size_t ReturnIntId(EdgeId e) const {
-        return e.int_id();
-    }
-
-    size_t ReturnIntId(VertexId v) const {
-        return v.int_id();
-    }
-};
-
-template<class Graph>
-class IdTrackHandler : public BaseIdTrackHandler<typename Graph::VertexId, typename Graph::EdgeId> {
-private:
-    typedef typename Graph::EdgeId EdgeId;
-    typedef typename Graph::VertexId VertexId;
-    const Graph &graph_;
-public:
-    IdTrackHandler(const Graph& g) : graph_(g) {
-    }
-
-    ~IdTrackHandler() {
-    }
-};
-
 }
diff --git a/src/common/utils/indices/edge_index_builders.hpp b/src/common/assembly_graph/index/edge_index_builders.hpp
similarity index 65%
rename from src/common/utils/indices/edge_index_builders.hpp
rename to src/common/assembly_graph/index/edge_index_builders.hpp
index 95d5831..870e929 100644
--- a/src/common/utils/indices/edge_index_builders.hpp
+++ b/src/common/assembly_graph/index/edge_index_builders.hpp
@@ -8,10 +8,82 @@
 #pragma once
 
 #include "edge_info_updater.hpp"
-#include "perfect_hash_map_builder.hpp"
+#include "utils/ph_map/perfect_hash_map_builder.hpp"
 
 namespace debruijn_graph {
 
+template<class Graph, class KmerFilter>
+class DeBruijnGraphKMerSplitter : public utils::DeBruijnKMerSplitter<KmerFilter> {
+    typedef typename omnigraph::GraphEdgeIterator<Graph> EdgeIt;
+    typedef typename Graph::EdgeId EdgeId;
+    typedef typename adt::iterator_range<EdgeIt> EdgeRange;
+
+    const Graph &g_;
+
+    size_t FillBufferFromEdges(EdgeRange &r, unsigned thread_id);
+
+public:
+    DeBruijnGraphKMerSplitter(const std::string &work_dir,
+                              unsigned K, const Graph &g,
+                              size_t read_buffer_size = 0)
+            : utils::DeBruijnKMerSplitter<KmerFilter>(work_dir, K, KmerFilter(), read_buffer_size),
+              g_(g) {}
+
+    fs::files_t Split(size_t num_files, unsigned nthreads) override;
+};
+
+template<class Graph, class KmerFilter>
+size_t
+DeBruijnGraphKMerSplitter<Graph, KmerFilter>::FillBufferFromEdges(EdgeRange &r,
+                                                                  unsigned thread_id) {
+    size_t seqs = 0;
+    for (auto &it = r.begin(); it != r.end(); ++it) {
+        const Sequence &nucls = g_.EdgeNucls(*it);
+
+        seqs += 1;
+        if (this->FillBufferFromSequence(nucls, thread_id))
+            break;
+    }
+
+    return seqs;
+}
+
+template<class Graph, class KmerFilter>
+fs::files_t DeBruijnGraphKMerSplitter<Graph, KmerFilter>::Split(size_t num_files, unsigned nthreads) {
+    fs::files_t out = this->PrepareBuffers(num_files, nthreads, this->read_buffer_size_);
+
+    omnigraph::IterationHelper<Graph, EdgeId> edges(g_);
+    auto its = edges.Chunks(nthreads);
+
+    // Turn chunks into iterator ranges
+    std::vector<EdgeRange> ranges;
+    for (size_t i = 0; i < its.size() - 1; ++i)
+        ranges.emplace_back(its[i], its[i+1]);
+
+    VERIFY(ranges.size() <= nthreads);
+
+    size_t counter = 0, n = 10;
+    while (!std::all_of(ranges.begin(), ranges.end(),
+                        [](const EdgeRange &r) { return r.begin() == r.end(); })) {
+#       pragma omp parallel for num_threads(nthreads) reduction(+ : counter)
+        for (size_t i = 0; i < ranges.size(); ++i)
+            counter += FillBufferFromEdges(ranges[i], omp_get_thread_num());
+
+        this->DumpBuffers(out);
+
+        if (counter >> n) {
+            INFO("Processed " << counter << " edges");
+            n += 1;
+        }
+    }
+
+    INFO("Used " << counter << " sequences.");
+
+    this->ClearBuffers();
+
+    return out;
+}
+
 template<class Index>
 class GraphPositionFillingIndexBuilder {
 public:
@@ -21,7 +93,13 @@ public:
     template<class Graph>
     void BuildIndexFromGraph(Index &index,
                              const Graph/*T*/ &g, size_t read_buffer_size = 0) const {
-        debruijn_graph::BuildIndexFromGraph(index, g, read_buffer_size);
+        unsigned nthreads = omp_get_max_threads();
+
+        DeBruijnGraphKMerSplitter<Graph,
+                                  utils::StoringTypeFilter<typename Index::storing_type>>
+                splitter(index.workdir(), index.k(), g, read_buffer_size);
+        utils::KMerDiskCounter<RtSeq> counter(index.workdir(), splitter);
+        BuildIndex(index, counter, 16, nthreads);
 
         // Now use the index to fill the coverage and EdgeId's
         INFO("Collecting k-mer coverage information from graph, this takes a while.");
@@ -145,7 +223,7 @@ class CoverageFillingEdgeIndexBuilder : public Builder {
     size_t BuildIndexFromStream(IndexT &index,
                                 Streams &streams,
                                 io::SingleStream* contigs_stream = 0) const {
-        debruijn_graph::BuildIndexFromStream(index, streams, contigs_stream);
+        utils::BuildIndexFromStream(index, streams, contigs_stream);
 
         return ParallelFillCoverage(index, streams, false);
     }
diff --git a/src/common/utils/indices/edge_info_updater.hpp b/src/common/assembly_graph/index/edge_info_updater.hpp
similarity index 97%
rename from src/common/utils/indices/edge_info_updater.hpp
rename to src/common/assembly_graph/index/edge_info_updater.hpp
index 3760f00..93de09b 100644
--- a/src/common/utils/indices/edge_info_updater.hpp
+++ b/src/common/assembly_graph/index/edge_info_updater.hpp
@@ -8,10 +8,10 @@
 #pragma once
 
 #include "utils/standard_base.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 #include "sequence/sequence.hpp"
 #include "assembly_graph/core/graph_iterators.hpp"
-#include "utils/indices/edge_position_index.hpp"
+#include "edge_position_index.hpp"
 
 namespace debruijn_graph {
 
diff --git a/src/common/utils/indices/edge_multi_index.hpp b/src/common/assembly_graph/index/edge_multi_index.hpp
similarity index 91%
rename from src/common/utils/indices/edge_multi_index.hpp
rename to src/common/assembly_graph/index/edge_multi_index.hpp
index 763e9a5..fd94434 100644
--- a/src/common/utils/indices/edge_multi_index.hpp
+++ b/src/common/assembly_graph/index/edge_multi_index.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "perfect_hash_map.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
 #include "edge_info_updater.hpp"
 #include "edge_position_index.hpp"
 
@@ -91,9 +91,9 @@ public:
 
 //todo it is not handling graph events!!!
 template<class IdType, class Seq = RtSeq,
-    class traits = kmer_index_traits<Seq>,  class StoringType = SimpleStoring >
-class DeBruijnEdgeMultiIndex : public KeyStoringMap<Seq, EdgeInfoStorage<IdType>, traits, StoringType > {
-  typedef KeyStoringMap<Seq, EdgeInfoStorage<IdType>, traits, StoringType > base;
+    class traits = utils::kmer_index_traits<Seq>,  class StoringType = utils::SimpleStoring >
+class DeBruijnEdgeMultiIndex : public utils::KeyStoringMap<Seq, EdgeInfoStorage<IdType>, traits, StoringType > {
+  typedef utils::KeyStoringMap<Seq, EdgeInfoStorage<IdType>, traits, StoringType > base;
  public:
   typedef StoringType storing_type;
   typedef typename base::traits_t traits_t;
diff --git a/src/common/utils/indices/edge_position_index.hpp b/src/common/assembly_graph/index/edge_position_index.hpp
similarity index 88%
rename from src/common/utils/indices/edge_position_index.hpp
rename to src/common/assembly_graph/index/edge_position_index.hpp
index 446fad4..7bef7b9 100644
--- a/src/common/utils/indices/edge_position_index.hpp
+++ b/src/common/assembly_graph/index/edge_position_index.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "perfect_hash_map.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
 #include "io/reads/single_read.hpp"
 
 namespace debruijn_graph {
@@ -62,11 +62,11 @@ stream &operator<<(stream &s, const EdgeInfo<IdType> &info) {
     return s << "EdgeInfo[" << info.edge_id.int_id() << ", " << info.offset << ", " << info.count << "]";
 }
 
-template<class Graph, class StoringType = DefaultStoring>
-class KmerFreeEdgeIndex : public KeyIteratingMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
-        kmer_index_traits<RtSeq>, StoringType> {
-    typedef KeyIteratingMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
-            kmer_index_traits<RtSeq>, StoringType> base;
+template<class Graph, class StoringType = utils::DefaultStoring>
+class KmerFreeEdgeIndex : public utils::KeyIteratingMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
+        utils::kmer_index_traits<RtSeq>, StoringType> {
+    typedef utils::KeyIteratingMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
+            utils::kmer_index_traits<RtSeq>, StoringType> base;
     const Graph &graph_;
 
 public:
@@ -143,11 +143,11 @@ public:
     }
 };
 
-template<class Graph, class StoringType = DefaultStoring>
-class KmerStoringEdgeIndex : public KeyStoringMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
-        kmer_index_traits<RtSeq>, StoringType> {
-  typedef KeyStoringMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
-          kmer_index_traits<RtSeq>, StoringType> base;
+template<class Graph, class StoringType = utils::DefaultStoring>
+class KmerStoringEdgeIndex : public utils::KeyStoringMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
+        utils::kmer_index_traits<RtSeq>, StoringType> {
+  typedef utils::KeyStoringMap<RtSeq, EdgeInfo<typename Graph::EdgeId>,
+          utils::kmer_index_traits<RtSeq>, StoringType> base;
 
 public:
   typedef typename base::traits_t traits_t;
diff --git a/src/common/assembly_graph/paths/bidirectional_path.hpp b/src/common/assembly_graph/paths/bidirectional_path.hpp
index 26b5388..dbf805f 100644
--- a/src/common/assembly_graph/paths/bidirectional_path.hpp
+++ b/src/common/assembly_graph/paths/bidirectional_path.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include <atomic>
+#include <boost/algorithm/string.hpp>
 #include "assembly_graph/core/graph.hpp"
 #include "assembly_graph/components/connected_component.hpp"
 
@@ -26,90 +27,121 @@ namespace path_extend {
 class BidirectionalPath;
 
 struct Gap {
-    int gap_;
-    uint32_t trash_previous_;
-    uint32_t trash_current_;
-    Gap(int gap)
-    : gap_(gap), trash_previous_(0), trash_current_(0)
-    { }
-
-    Gap(int gap, uint32_t trash_previous, uint32_t trash_current)
-     : gap_(gap), trash_previous_(trash_previous), trash_current_(trash_current)
+    int gap;
+    uint32_t trash_previous;
+    uint32_t trash_current;
+
+    static const int INVALID_GAP = std::numeric_limits<int>::min();
+
+    static const Gap& INVALID() {
+        static Gap gap = Gap(INVALID_GAP);
+        return gap;
+    }
+
+    //gap is in k+1-mers and does not know about "trash" regions
+    explicit Gap(int gap_ = 0, uint32_t trash_previous_ = 0, uint32_t trash_current_ = 0)
+     : gap(gap_), trash_previous(trash_previous_), trash_current(trash_current_)
      { }
+
+    Gap conjugate() const {
+        return Gap(gap, trash_current, trash_previous);
+    }
+
+    bool operator==(const Gap &that) const {
+        return gap == that.gap && trash_previous == that.trash_previous && trash_current == that.trash_current;
+    }
+
+    bool operator!=(const Gap &that) const {
+        return !(*this == that);
+    }
+
+    int overlap(size_t k) const {
+        return int(k) - gap;
+    }
+
+    int overlap_after_trim(size_t k) const {
+        return overlap(k) - trash_current - trash_previous;
+    }
+
+    bool NoTrash() const {
+        return trash_current == 0 && trash_previous == 0;
+    }
 };
 
+inline std::ostream& operator<<(std::ostream& os, Gap gap) {
+    return os << "[" << gap.gap << ", " << gap.trash_previous << ", " << gap.trash_current << "]";
+}
 
 class PathListener {
 public:
-    virtual void FrontEdgeAdded(EdgeId e, BidirectionalPath * path, Gap gap) = 0;
-    virtual void BackEdgeAdded(EdgeId e, BidirectionalPath * path, Gap gap) = 0;
-    virtual void FrontEdgeRemoved(EdgeId e, BidirectionalPath * path) = 0;
-    virtual void BackEdgeRemoved(EdgeId e, BidirectionalPath * path) = 0;
-    virtual ~PathListener() {
-    }
+    virtual void FrontEdgeAdded(EdgeId e, BidirectionalPath *path, const Gap &gap) = 0;
+    virtual void BackEdgeAdded(EdgeId e, BidirectionalPath *path, const Gap &gap) = 0;
+    virtual void FrontEdgeRemoved(EdgeId e, BidirectionalPath *path) = 0;
+    virtual void BackEdgeRemoved(EdgeId e, BidirectionalPath *path) = 0;
+    virtual ~PathListener() {}
 };
 
-
 class BidirectionalPath : public PathListener {
-private:
     static std::atomic<uint64_t> path_id_;
 
+    const Graph& g_;
+    std::deque<EdgeId> data_;
+    BidirectionalPath* conj_path_;
+    // Length from beginning of i-th edge to path end: L(e_i + gap_(i+1) + e_(i+1) + ... + gap_N + e_N)
+    std::deque<size_t> cumulative_len_;
+    std::deque<Gap> gap_len_;  // e0 -> gap1 -> e1 -> ... -> gapN -> eN; gap0 = 0
+    std::vector<PathListener *> listeners_;
+    const uint64_t id_;  //Unique ID
+    float weight_;
 
 public:
     BidirectionalPath(const Graph& g)
             : g_(g),
-              data_(),
-              conj_path_(NULL),
-              cumulative_len_(),
-              gap_len_(),
-              listeners_(),
+              conj_path_(nullptr),
               id_(path_id_++),
-              weight_(1.0),
-              has_overlaped_begin_(false),
-              has_overlaped_end_(false),
-              overlap_(false) {
+              weight_(1.0) {
     }
 
     BidirectionalPath(const Graph& g, const std::vector<EdgeId>& path)
             : BidirectionalPath(g) {
+        //TODO cumulative_len takes O(N^2) to fill
         for (size_t i = 0; i < path.size(); ++i) {
             PushBack(path[i]);
         }
-        RecountLengths();
     }
 
-    BidirectionalPath(const Graph& g, EdgeId startingEdge)
+    BidirectionalPath(const Graph& g, EdgeId e)
             : BidirectionalPath(g) {
-        PushBack(startingEdge);
+        PushBack(e);
     }
 
     BidirectionalPath(const BidirectionalPath& path)
             : g_(path.g_),
               data_(path.data_),
-              conj_path_(NULL),
+              conj_path_(nullptr),
               cumulative_len_(path.cumulative_len_),
               gap_len_(path.gap_len_),
               listeners_(),
               id_(path_id_++),
-              weight_(path.weight_),
-              has_overlaped_begin_(path.has_overlaped_begin_),
-              has_overlaped_end_(path.has_overlaped_end_),
-              overlap_(path.overlap_) {
+              weight_(path.weight_) {
+    }
+
+    const Graph &g() const{
+        return g_;
     }
 
-public:
     void Subscribe(PathListener * listener) {
         listeners_.push_back(listener);
     }
 
-    void Unsubscribe(PathListener * listener) {
-        for (auto it = listeners_.begin(); it != listeners_.end(); ++it) {
-            if (*it == listener) {
-                listeners_.erase(it);
-                break;
-            }
-        }
-    }
+//    void Unsubscribe(PathListener * listener) {
+//        for (auto it = listeners_.begin(); it != listeners_.end(); ++it) {
+//            if (*it == listener) {
+//                listeners_.erase(it);
+//                break;
+//            }
+//        }
+//    }
 
     void SetConjPath(BidirectionalPath* path) {
         conj_path_ = path;
@@ -144,10 +176,11 @@ public:
     }
 
     size_t Length() const {
-        if (gap_len_.size() == 0 || cumulative_len_.size() == 0) {
+        if (Empty()) {
             return 0;
         }
-        return cumulative_len_[0] + gap_len_[0].gap_;
+        VERIFY(gap_len_[0].gap == 0);
+        return cumulative_len_[0];
     }
 
     //TODO iterators forward/reverse
@@ -159,30 +192,21 @@ public:
         return data_[index];
     }
 
-    EdgeId ReverseAt(size_t index) const {
-        return data_[data_.size() - index - 1];
+    int ShiftLength(size_t index) const {
+        return gap_len_[index].gap + (int) g_.length(At(index));
     }
 
-
     // Length from beginning of i-th edge to path end for forward directed path: L(e1 + e2 + ... + eN)
     size_t LengthAt(size_t index) const {
         return cumulative_len_[index];
     }
 
-    int GapAt(size_t index) const {
-        return gap_len_[index].gap_;
-    }
-
-    const Gap& GapInfoAt(size_t index) const {
+    Gap GapAt(size_t index) const {
         return gap_len_[index];
     }
 
-    uint32_t TrashCurrentAt(size_t index) const {
-        return gap_len_[index].trash_current_;
-    }
-
-    uint32_t TrashPreviousAt(size_t index) const {
-        return gap_len_[index].trash_previous_;
+    void SetGapAt(size_t index, const Gap &gap) {
+        gap_len_[index] = gap;
     }
 
     size_t GetId() const {
@@ -197,24 +221,21 @@ public:
         return data_.front();
     }
 
-    void PushBack(EdgeId e, int gap = 0, uint32_t trash_previous = 0, uint32_t trash_current = 0) {
-        data_.push_back(e);
-        Gap gap_struct(gap, trash_previous, trash_current);
-        gap_len_.push_back(gap_struct);
-        IncreaseLengths(g_.length(e), gap_struct);
-        NotifyBackEdgeAdded(e, gap_struct);
-    }
-
-    void PushBack(EdgeId e, Gap gap) {
+    void PushBack(EdgeId e, const Gap& gap = Gap()) {
+        VERIFY(!data_.empty() || gap == Gap());
         data_.push_back(e);
         gap_len_.push_back(gap);
-        IncreaseLengths(g_.length(e), gap);
+        IncreaseLengths(g_.length(e), gap.gap);
         NotifyBackEdgeAdded(e, gap);
     }
 
-    void PushBack(const BidirectionalPath& path) {
-        for (size_t i = 0; i < path.Size(); ++i) {
-            PushBack(path.At(i), path.GapAt(i), path.TrashPreviousAt(i), path.TrashCurrentAt(i));
+    void PushBack(const BidirectionalPath& path, const Gap& gap = Gap()) {
+        if (path.Size() > 0) {
+            VERIFY(path.GapAt(0) == Gap());
+            PushBack(path.At(0), gap);
+            for (size_t i = 1; i < path.Size(); ++i) {
+                PushBack(path.At(i), path.GapAt(i));
+            }
         }
     }
 
@@ -241,25 +262,18 @@ public:
         }
     }
 
-    virtual void FrontEdgeAdded(EdgeId, BidirectionalPath*, int) {
-    }
-
-    virtual void FrontEdgeAdded(EdgeId, BidirectionalPath*, Gap) {
+    void FrontEdgeAdded(EdgeId, BidirectionalPath*, const Gap&) override {
+        //FIXME is it ok to be empty?
     }
 
-
-    virtual void BackEdgeAdded(EdgeId e, BidirectionalPath*, int gap) {
-        PushFront(g_.conjugate(e), gap);
+    void BackEdgeAdded(EdgeId e, BidirectionalPath*, const Gap& gap) override {
+        PushFront(g_.conjugate(e), gap.conjugate());
     }
 
-    virtual void BackEdgeAdded(EdgeId e, BidirectionalPath*, Gap gap) {
-        PushFront(g_.conjugate(e), gap);
+    void FrontEdgeRemoved(EdgeId, BidirectionalPath*) override {
     }
 
-    virtual void FrontEdgeRemoved(EdgeId, BidirectionalPath*) {
-    }
-
-    virtual void BackEdgeRemoved(EdgeId, BidirectionalPath *) {
+    void BackEdgeRemoved(EdgeId, BidirectionalPath *) override {
         PopFront();
     }
 
@@ -304,6 +318,7 @@ public:
         return result;
     }
 
+    //TODO is it ok not to compare gaps here?
     bool CompareFrom(size_t from, const BidirectionalPath& sample) const {
         if (from + sample.Size() > Size()) {
             return false;
@@ -336,28 +351,6 @@ public:
         return 0;
     }
 
-    size_t OverlapEndSize(const BidirectionalPath* path2) const {
-        if (Size() == 0) {
-            return 0;
-        }
-        int last1 = (int) Size() - 1;
-        int max_over = 0;
-        vector<size_t> begins2 = path2->FindAll(At(last1));
-        for (size_t i = 0; i < begins2.size(); ++i) {
-            int begin2 = (int) begins2[i];
-            int cur1 = last1;
-            while (begin2 > 0 && cur1 > 0 && path2->At(begin2 - 1) == At(cur1 - 1)) {
-                cur1--;
-                begin2--;
-            }
-            int over = last1 - cur1 + 1;
-            if (begin2 == 0 && cur1 > 0 && over > max_over) {
-                max_over = over;
-            }
-        }
-        return (size_t) max_over;
-    }
-
     int FindFirst(const BidirectionalPath& path, size_t from = 0) const {
         if (path.Size() > Size()) {
             return -1;
@@ -382,10 +375,6 @@ public:
         return -1;
     }
 
-    bool Contains(const BidirectionalPath& path) const {
-        return FindFirst(path) != -1;
-    }
-
     bool Equal(const BidirectionalPath& path) const {
         return operator==(path);
     }
@@ -398,91 +387,11 @@ public:
         return !operator==(path);
     }
 
-    void CheckConjugateEnd(size_t max_repeat_length) {
-        size_t prev_size = 0;
-        while (prev_size != Size()) {
-            prev_size = Size();
-            FindConjEdges(max_repeat_length);
-        }
-    }
-
-    size_t GetComponent(const debruijn_graph::ConnectedComponentCounter &component_counter) const {
-        std::unordered_map <size_t, size_t> component_sizes;
-        for (size_t i = 0; i < this->Size(); i++) {
-            auto e = this->At(i);
-            size_t comp_id = component_counter.GetComponent(e);
-            if (component_sizes.find(comp_id) == component_sizes.end())
-                component_sizes[comp_id] = 0;
-            component_sizes[comp_id] += g_.length(e);
-        }
-        size_t ans = 0;
-        size_t maxans = 0;
-        for (auto pp: component_sizes) {
-            if (pp.second > maxans) {
-                ans = pp.first;
-                maxans = pp.second;
-            }
-        }
-        return ans;
-    }
-
-    void FindConjEdges(size_t max_repeat_length) {
-        for (size_t begin_pos = 0; begin_pos < Size(); ++begin_pos) {
-            size_t begin = begin_pos;
-            vector<size_t> conj_pos = FindAll(g_.conjugate(At(begin_pos)), begin + 1);
-            for (auto end_pos = conj_pos.rbegin(); end_pos != conj_pos.rend(); ++end_pos) {
-                VERIFY(*end_pos < Size());
-                size_t end = *end_pos;
-                if (end <= begin) {
-                    continue;
-                }
-                while (begin < end && At(begin) == g_.conjugate(At(end))) {
-                    begin++;
-                    end--;
-                }
-                DEBUG("Found palindromic fragment from " << begin_pos << " to " << *end_pos);
-                Print();
-                VERIFY(*end_pos < Size());
-                size_t tail_size = Size() - *end_pos - 1;
-                size_t head_size = begin_pos;
-                size_t palindrom_half_size = begin - begin_pos;
-                size_t head_len = Length() - LengthAt(begin_pos);
-                size_t tail_len = *end_pos < Size() - 1 ? LengthAt(*end_pos + 1) : 0;
-//TODO : this is not true in case of gaps inside the palindrom_len;
-                size_t palindrom_len = (size_t) max((int) LengthAt(begin_pos) - (int) LengthAt(begin), 0);
-                size_t between = (size_t) max(0, (int) LengthAt(begin) - (int) (end < Size() - 1 ? LengthAt(end + 1) : 0));
-                DEBUG("tail len " << tail_len << " head len " << head_len << " palindrom_len "<< palindrom_len << " between " << between);
-                if (palindrom_len <= max_repeat_length) {
-                    if (palindrom_len < head_len && palindrom_len < tail_len) {
-                        DEBUG("too big head and end");
-                        continue;
-                    }
-                    if (between > palindrom_len) {
-                        DEBUG("too big part between");
-                        continue;
-                    }
-                }
-                bool delete_tail = tail_size < head_size;
-                if (tail_size == head_size) {
-                    delete_tail = tail_len < head_len;
-                }
-                if (delete_tail) {
-                    PopBack(tail_size + palindrom_half_size);
-                    DEBUG("Deleting tail  because of palindrom removal");
-                    return;
-                } else {
-                    GetConjPath()->PopBack(head_size + palindrom_half_size);
-                    DEBUG("Deleting head because of palindrom removal");
-                    return;
-                }
-            }
-        }
-    }
-
     BidirectionalPath SubPath(size_t from, size_t to) const {
+        VERIFY(from <= to && to <= Size());
         BidirectionalPath result(g_);
-        for (size_t i = from; i < min(to, Size()); ++i) {
-            result.PushBack(data_[i], gap_len_[i]);
+        for (size_t i = from; i < to; ++i) {
+            result.PushBack(data_[i], i == from ? Gap() : gap_len_[i]);
         }
         return result;
     }
@@ -505,165 +414,80 @@ public:
         if (Empty()) {
             return result;
         }
-        result.PushBack(g_.conjugate(Back()), 0);
+        result.PushBack(g_.conjugate(Back()));
         for (int i = ((int) Size()) - 2; i >= 0; --i) {
-            result.PushBack(g_.conjugate(data_[i]), gap_len_[i + 1].gap_ + gap_len_[i + 1].trash_current_ - gap_len_[i + 1].trash_previous_, gap_len_[i + 1].trash_current_, gap_len_[i + 1].trash_previous_);
+            result.PushBack(g_.conjugate(data_[i]), gap_len_[i + 1].conjugate());
         }
 
         return result;
     }
 
+    //FIXME remove
     vector<EdgeId> ToVector() const {
         return vector<EdgeId>(data_.begin(), data_.end());
     }
 
-    bool CameToInterstrandBulge() const {
-        if (Empty())
-            return false;
-
-        EdgeId lastEdge = Back();
-        VertexId lastVertex = g_.EdgeEnd(lastEdge);
-
-        if (g_.OutgoingEdgeCount(lastVertex) == 2) {
-            vector<EdgeId> bulgeEdges(g_.out_begin(lastVertex), g_.out_end(lastVertex));
-            VertexId nextVertex = g_.EdgeEnd(bulgeEdges[0]);
-
-            if (bulgeEdges[0] == g_.conjugate(bulgeEdges[1]) && nextVertex == g_.EdgeEnd(bulgeEdges[1]) && g_.CheckUniqueOutgoingEdge(nextVertex)
-                    && *(g_.out_begin(nextVertex)) == g_.conjugate(lastEdge)) {
-
-                DEBUG("Came to interstrand bulge " << g_.int_id(lastEdge));
-                return true;
-            }
-        }
-        return false;
-    }
-
-    bool IsInterstrandBulge() const {
-        if (Empty())
-            return false;
-
-        EdgeId lastEdge = Back();
-        VertexId lastVertex = g_.EdgeEnd(lastEdge);
-        VertexId prevVertex = g_.EdgeStart(lastEdge);
-
-        if (g_.OutgoingEdgeCount(prevVertex) == 2 && g_.IncomingEdgeCount(lastVertex) == 2 && g_.CheckUniqueOutgoingEdge(lastVertex)
-                && g_.CheckUniqueIncomingEdge(prevVertex) && *(g_.in_begin(prevVertex)) == g_.conjugate(*(g_.out_begin(lastVertex)))) {
-
-            vector<EdgeId> bulgeEdges(g_.out_begin(prevVertex), g_.out_end(prevVertex));
-            EdgeId bulgeEdge = bulgeEdges[0] == lastEdge ? bulgeEdges[1] : bulgeEdges[0];
-
-            if (bulgeEdge == g_.conjugate(lastEdge)) {
-                DEBUG("In interstrand bulge " << g_.int_id(lastEdge));
-                return true;
-            }
-        }
-        return false;
-    }
-
-    void Print() const {
-        DEBUG("Path " << id_);
-        DEBUG("Length " << Length());
-        DEBUG("Weight " << weight_);
-        DEBUG("#, edge, length, gap length, trash length, total length, total length from begin");
-        for (size_t i = 0; i < Size(); ++i) {
-            DEBUG(i << ", " << g_.int_id(At(i)) << ", " 
-                    << g_.length(At(i)) << ", " << GapAt(i) << ", " 
-                    << TrashPreviousAt(i) << "-" << TrashCurrentAt(i) 
-                    << ", " << LengthAt(i) << ", " 
-                    << ((Length() < LengthAt(i)) ? 0 : Length() - LengthAt(i)));
+    void PrintDEBUG() const {
+        for (const auto& s: PrintLines()) {
+            DEBUG(s);
         }
     }
 
-    void PrintInString() const {
-        stringstream str;
-        for (size_t i = 0; i < Size(); ++i) {
-            str << g_.int_id(At(i)) << " ";
-        }
-        DEBUG(str.str());
-    }
-    void PrintInfo() const {
-        INFO("Path " << id_);
-        INFO("Length " << Length());
-        INFO("Weight " << weight_);
-        INFO("#, edge, length, gap length, total length");
-        for (size_t i = 0; i < Size(); ++i) {
-            INFO(i << ", " << g_.int_id(At(i)) << ", " << g_.length(At(i)) << ", " << GapAt(i) << ", " << LengthAt(i));
+    void PrintINFO() const {
+        for (const auto& s: PrintLines()) {
+            INFO(s);
         }
     }
 
-    void Print(std::ostream& os) {
+    void Print(std::ostream &os) const {
         if (Empty()) {
             return;
         }
-        os << "Path " << GetId() << endl;
-        os << "Length " << Length() << endl;
-        os << "#, edge, length, gap, total length" << endl;
+        os << "Path " << GetId() << "\n";
+        os << "Length " << Length() << "\n";
+        os << "Weight " << weight_ << "\n";
+        os << "#, edge (length), gap info, total length, total length from start" << "\n";
         for (size_t i = 0; i < Size(); ++i) {
-            os << i << ", " << g_.int_id(At(i)) << ", " << g_.length(At(i))  << ", " << GapAt(i) << ", " << LengthAt(i) << endl;
-        }
-    }
-
-    void SetOverlapedBeginTo(BidirectionalPath* to) {
-        if (has_overlaped_begin_) {
-            to->SetOverlapBegin();
-        }
-        SetOverlapBegin();
-        to->SetOverlapEnd();
-    }
-
-    void SetOverlapedEndTo(BidirectionalPath* to) {
-        if (has_overlaped_end_) {
-            to->SetOverlapEnd();
+            os << i << ", " << g_.str(At(i))
+               << ", " << GapAt(i)
+               << ", " << LengthAt(i)
+               << ", " << ((Length() < LengthAt(i)) ? 0 : Length() - LengthAt(i)) << "\n";
         }
-        SetOverlapEnd();
-        to->SetOverlapBegin();
     }
 
-    void SetOverlap(bool overlap = true) {
-        overlap_ = overlap;
-        conj_path_->overlap_ = overlap;
+    std::string str() const {
+        stringstream ss;
+        Print(ss);
+        return ss.str();
     }
 
-    bool HasOverlapedBegin() const {
-        return has_overlaped_begin_;
+    auto begin() const -> decltype(data_.begin()) {
+        return data_.begin();
     }
 
-    bool HasOverlapedEnd() const {
-        return has_overlaped_end_;
+    auto end() const -> decltype(data_.end()) {
+        return data_.end();
     }
 
-    bool IsOverlap() const {
-        return overlap_;
-    }
-
-    void ResetOverlaps() {
-        overlap_ = false;
-        has_overlaped_begin_ = false;
-        has_overlaped_end_ = false;
-        conj_path_->overlap_ = false;
-        conj_path_->has_overlaped_begin_ = false;
-        conj_path_->has_overlaped_end_ = false;
-    }
 private:
 
-    void RecountLengths() {
-        cumulative_len_.clear();
-        size_t currentLength = 0;
-        for (auto iter = data_.rbegin(); iter != data_.rend(); ++iter) {
-            currentLength += g_.length((EdgeId) *iter);
-            cumulative_len_.push_front(currentLength);
-        }
+    vector<std::string> PrintLines() const {
+        auto as_str = str();
+        boost::trim(as_str);
+        std::vector<std::string> result;
+        boost::split(result, as_str, boost::is_any_of("\n"), boost::token_compress_on);
+        return result;
     }
 
-    void IncreaseLengths(size_t length, Gap gap_struct) {
+    void IncreaseLengths(size_t length, int gap) {
         for (auto iter = cumulative_len_.begin(); iter != cumulative_len_.end(); ++iter) {
-            *iter += length + gap_struct.gap_ - gap_struct.trash_previous_;
+            *iter += length + gap;
         }
         cumulative_len_.push_back(length);
     }
 
     void DecreaseLengths() {
-        size_t length = g_.length(data_.back()) + gap_len_.back().gap_ - gap_len_.back().trash_previous_;
+        size_t length = g_.length(data_.back()) + gap_len_.back().gap;
 
         for (auto iter = cumulative_len_.begin(); iter != cumulative_len_.end(); ++iter) {
             *iter -= length;
@@ -671,13 +495,13 @@ private:
         cumulative_len_.pop_back();
     }
 
-    void NotifyFrontEdgeAdded(EdgeId e, const Gap& gap) {
+    void NotifyFrontEdgeAdded(EdgeId e, Gap gap) {
         for (auto i = listeners_.begin(); i != listeners_.end(); ++i) {
             (*i)->FrontEdgeAdded(e, this, gap);
         }
     }
 
-    void NotifyBackEdgeAdded(EdgeId e, const Gap& gap) {
+    void NotifyBackEdgeAdded(EdgeId e, Gap gap) {
         for (auto i = listeners_.begin(); i != listeners_.end(); ++i) {
             (*i)->BackEdgeAdded(e, this, gap);
         }
@@ -695,16 +519,11 @@ private:
         }
     }
 
-    void PushFront(EdgeId e, const Gap& gap) {
-        PushFront(e, gap.gap_ + gap.trash_current_ - gap.trash_previous_, gap.trash_current_, gap.trash_previous_);
-    }
-
-    void PushFront(EdgeId e, int gap = 0, uint32_t trash_previous = 0, uint32_t trash_current = 0) {
+    void PushFront(EdgeId e, Gap gap) {
         data_.push_front(e);
         if (gap_len_.size() > 0) {
-            gap_len_[0].gap_ += gap;
-            gap_len_[0].trash_previous_ += trash_previous;
-            gap_len_[0].trash_current_ += trash_current;
+            VERIFY(gap_len_[0] == Gap());
+            gap_len_[0]= gap;
         }
         gap_len_.push_front(Gap(0, 0, 0));
 
@@ -712,49 +531,23 @@ private:
         if (cumulative_len_.empty()) {
             cumulative_len_.push_front(length);
         } else {
-            cumulative_len_.push_front(length + cumulative_len_.front() + gap - trash_previous );
+            cumulative_len_.push_front(cumulative_len_.front() + length + gap.gap);
         }
-        NotifyFrontEdgeAdded(e, Gap(gap, trash_previous, trash_current));
+        NotifyFrontEdgeAdded(e, gap);
     }
 
     void PopFront() {
         EdgeId e = data_.front();
-        if (gap_len_.size() > 1) {
-            gap_len_[1].gap_ = 0;
-            gap_len_[1].trash_previous_ = 0;
-            gap_len_[1].trash_current_ = 0;
-        }
         data_.pop_front();
         gap_len_.pop_front();
-
         cumulative_len_.pop_front();
-        NotifyFrontEdgeRemoved(e);
-    }
-
-    void SetOverlapBegin(bool overlap = true) {
-        if (has_overlaped_begin_ != overlap) {
-            has_overlaped_begin_ = overlap;
-        }
-        if (GetConjPath()->has_overlaped_end_ != overlap) {
-            GetConjPath()->has_overlaped_end_ = overlap;
+        if (!gap_len_.empty()) {
+            gap_len_.front() = Gap();
         }
-    }
 
-    void SetOverlapEnd(bool overlap = true) {
-        GetConjPath()->SetOverlapBegin(overlap);
+        NotifyFrontEdgeRemoved(e);
     }
 
-    const Graph& g_;
-    std::deque<EdgeId> data_;
-    BidirectionalPath* conj_path_;
-    std::deque<size_t> cumulative_len_;  // Length from beginning of i-th edge to path end for forward directed path: L(e1 + e2 + ... + eN) ... L(eN)
-    std::deque<Gap> gap_len_;  // e1 - gap2 - e2 - ... - gapN - eN
-    std::vector<PathListener *> listeners_;
-    const uint64_t id_;  //Unique ID
-    float weight_;
-    bool has_overlaped_begin_;
-    bool has_overlaped_end_;
-    bool overlap_;
     DECL_LOGGER("BidirectionalPath");
 };
 
@@ -794,8 +587,8 @@ inline void SkipGaps(const BidirectionalPath& path1, size_t& cur_pos1, int gap1,
 inline size_t FirstNotEqualPosition(const BidirectionalPath& path1, size_t pos1, const BidirectionalPath& path2, size_t pos2, bool use_gaps) {
     int cur_pos1 = (int) pos1;
     int cur_pos2 = (int) pos2;
-    int gap1 = path1.GapAt(cur_pos1);
-    int gap2 = path2.GapAt(cur_pos2);
+    int gap1 = path1.GapAt(cur_pos1).gap;
+    int gap2 = path2.GapAt(cur_pos2).gap;
     while (cur_pos1 >= 0 && cur_pos2 >= 0) {
         if (path1.At(cur_pos1) == path2.At(cur_pos2)) {
             cur_pos1--;
@@ -810,8 +603,8 @@ inline size_t FirstNotEqualPosition(const BidirectionalPath& path1, size_t pos1,
             SkipGaps(path1, p1, gap1, path2, p2, gap2, use_gaps, false);
             cur_pos1 = (int) p1;
             cur_pos2 = (int) p2;
-            gap1 = path1.GapAt(cur_pos1);
-            gap2 = path2.GapAt(cur_pos2);
+            gap1 = path1.GapAt(cur_pos1).gap;
+            gap2 = path2.GapAt(cur_pos2).gap;
         }
     }
     DEBUG("Equal!!");
@@ -832,8 +625,8 @@ inline size_t LastNotEqualPosition(const BidirectionalPath& path1, size_t pos1,
         } else {
             return cur_pos1;
         }
-        int gap1 = cur_pos1 < path1.Size() ? path1.GapAt(cur_pos1) : 0;
-        int gap2 = cur_pos2 < path2.Size() ? path2.GapAt(cur_pos2) : 0;
+        int gap1 = cur_pos1 < path1.Size() ? path1.GapAt(cur_pos1).gap : 0;
+        int gap2 = cur_pos2 < path2.Size() ? path2.GapAt(cur_pos2).gap : 0;
         SkipGaps(path1, cur_pos1, gap1, path2, cur_pos2, gap2, use_gaps, true);
     }
     return -1UL;
@@ -843,248 +636,19 @@ inline bool EqualEnds(const BidirectionalPath& path1, size_t pos1, const Bidirec
     return LastNotEqualPosition(path1, pos1, path2, pos2, use_gaps) == -1UL;
 }
 
-inline bool PathIdCompare(const BidirectionalPath* p1, const BidirectionalPath* p2) {
-    return p1->GetId() < p2->GetId();
-}
-
-
-
-typedef std::pair<BidirectionalPath*, BidirectionalPath*> PathPair;
-
-inline bool compare_path_pairs(const PathPair& p1, const PathPair& p2) {
-    if (p1.first->Length() != p2.first->Length() || p1.first->Size() == 0 || p2.first->Size() == 0) {
-        return p1.first->Length() > p2.first->Length();
-    }
-    const Graph& g = p1.first->graph();
-    return g.int_id(p1.first->Front()) < g.int_id(p2.first->Front());
-}
-
-class PathComparator {
-public:
-    bool operator()(const BidirectionalPath& p1, const BidirectionalPath& p2) const {
-        return p1.GetId() < p2.GetId();
-    }
-
-    bool operator()(const BidirectionalPath* p1, const BidirectionalPath* p2) const {
-        return p1->GetId() < p2->GetId();
-    }
-};
-
-typedef set<BidirectionalPath*, PathComparator> BidirectionalPathSet;
-
-template<class Value>
-using BidirectionalPathMap = map<BidirectionalPath*, Value, PathComparator>;
-
-typedef std::multiset <BidirectionalPath *, PathComparator> BidirectionalPathMultiset;
-
-class PathContainer {
-
-public:
-
-    typedef std::vector<PathPair> PathContainerT;
-
-    class Iterator : public PathContainerT::iterator {
-    public:
-        Iterator(const PathContainerT::iterator& iter)
-                : PathContainerT::iterator(iter) {
-        }
-        BidirectionalPath* get() const {
-            return this->operator *().first;
-        }
-        BidirectionalPath* getConjugate() const {
-            return this->operator *().second;
-        }
-    };
-
-    class ConstIterator : public PathContainerT::const_iterator {
-    public:
-        ConstIterator(const PathContainerT::const_iterator& iter)
-                : PathContainerT::const_iterator(iter) {
-        }
-        BidirectionalPath* get() const {
-            return this->operator *().first;
-        }
-        BidirectionalPath* getConjugate() const {
-            return this->operator *().second;
-        }
-    };
-
-    PathContainer() {
-    }
-
-    BidirectionalPath& operator[](size_t index) const {
-        return *(data_[index].first);
-    }
-
-    BidirectionalPath* Get(size_t index) const {
-        return data_[index].first;
-    }
-
-    BidirectionalPath* GetConjugate(size_t index) const {
-        return data_[index].second;
-    }
-
-    void DeleteAllPaths() {
-        for (size_t i = 0; i < data_.size(); ++i) {
-            delete data_[i].first;
-            delete data_[i].second;
-        }
-        clear();
-    }
-
-    ~PathContainer() {
-        DeleteAllPaths();
-    }
-
-    size_t size() const {
-        return data_.size();
-    }
-
-    void clear() {
-        data_.clear();
-    }
-
-    void reserve(size_t size) {
-        data_.reserve(size);
-    }
-
-    bool AddPair(BidirectionalPath* p, BidirectionalPath* cp) {
-        p->SetConjPath(cp);
-        cp->SetConjPath(p);
-        p->Subscribe(cp);
-        cp->Subscribe(p);
-        data_.push_back(std::make_pair(p, cp));
-        return true;
-    }
-
-    void SortByLength() {
-        std::stable_sort(data_.begin(), data_.end(), compare_path_pairs);
-    }
-
-    Iterator begin() {
-        return Iterator(data_.begin());
-    }
-
-    Iterator end() {
-        return Iterator(data_.end());
-    }
-
-
-    ConstIterator begin() const {
-        return ConstIterator(data_.begin());
-    }
-
-    ConstIterator end() const {
-        return ConstIterator(data_.end());
-    }
-
-    Iterator erase(Iterator iter) {
-        return Iterator(data_.erase(iter));
-    }
-
-    void print() const {
-        for (size_t i = 0; i < size(); ++i) {
-            Get(i)->Print();
-            GetConjugate(i)->Print();
-        }
-    }
-
-    void FilterEmptyPaths() {
-        DEBUG ("try to delete empty paths");
-        for (Iterator iter = begin(); iter != end();) {
-            if (iter.get()->Size() == 0) {
-                // FIXME: This is trash. PathContainer should own paths
-                delete iter.get();
-                delete iter.getConjugate();
-                iter = erase(iter);
-            } else {
-                ++iter;
-            }
-        }
-        DEBUG("empty paths are removed");
-    }
-
-    void FilterInterstandBulges() {
-        DEBUG ("Try to delete paths with interstand bulges");
-        for (Iterator iter = begin(); iter != end(); ++iter) {
-            if (iter.get()->IsInterstrandBulge()) {
-                iter.get()->PopBack();
-            }
-            if (iter.getConjugate()->IsInterstrandBulge()) {
-                iter.getConjugate()->PopBack();
-            }
-        }
-        DEBUG("deleted paths with interstand bulges");
-    }
-
-private:
-    std::vector<PathPair> data_;
-
-protected:
-    DECL_LOGGER("BidirectionalPath");
-
-};
-
-inline pair<size_t, size_t> ComparePaths(size_t start_pos1, size_t start_pos2, const BidirectionalPath& path1, const BidirectionalPath& path2,
-                                         size_t max_diff) {
-    path1.Print();
-    path2.Print();
-    if (start_pos1 >= path1.Size() || start_pos2 >= path2.Size()) {
-        return make_pair(start_pos1, start_pos2);
-    }
-    const Graph& g = path1.graph();
-    size_t cur_pos = start_pos1;
-    size_t last2 = start_pos2;
-    size_t last1 = cur_pos;
-    cur_pos++;
-    size_t diff_len = 0;
-    while (cur_pos < path1.Size()) {
-        if (diff_len > max_diff) {
-            return make_pair(last1, last2);
-        }
-        EdgeId e = path1[cur_pos];
-        vector<size_t> poses2 = path2.FindAll(e);
-        bool found = false;
-        for (size_t pos2 = 0; pos2 < poses2.size(); ++pos2) {
-            if (poses2[pos2] > last2) {
-                int diff = int(path2.LengthAt(last2)) - int(path2.LengthAt(poses2[pos2])) - int(g.length(path2.At(last2))) - path2.GapAt(poses2[pos2]);
-                if (std::abs(diff) > max_diff) {
-                    break;
-                }
-                last2 = poses2[pos2];
-                last1 = cur_pos;
-                DEBUG("found " << cur_pos);
-                found = true;
-                break;
-            }
-        }
-        if (!found) {
-            diff_len += g.length(e) + path1.GapAt(cur_pos);
-            DEBUG("not found " << cur_pos << " now diff len " << diff_len);
-        } else {
-            diff_len = 0;
-        }
-        cur_pos++;
-    }
-    return make_pair(last1, last2);
-}
-
-inline void DeletePaths(BidirectionalPathSet& paths) {
-    for (auto i = paths.begin(); i != paths.end(); ++i) {
-        delete (*i);
-    }
-}
+inline bool EndsWithInterstrandBulge(const BidirectionalPath &path) {
+    if (path.Empty())
+        return false;
 
-inline void DeletePaths(vector<BidirectionalPath*>& paths) {
-    for (auto i = paths.begin(); i != paths.end(); ++i) {
-        delete (*i);
-    }
-}
+    const Graph &g = path.g();
+    EdgeId e = path.Back();
+    VertexId v1 = g.EdgeStart(e);
+    VertexId v2 = g.EdgeEnd(e);
 
-inline void DeleteMapWithPaths(map<EdgeId, BidirectionalPath*> m) {
-    for (auto i = m.begin(); i != m.end(); ++i){
-        delete i->second;
-    }
+    return v2 == g.conjugate(v1) &&
+            e != g.conjugate(e) &&
+            g.OutgoingEdgeCount(v1) == 2 &&
+            g.CheckUniqueIncomingEdge(v1);
 }
 
 }  // path extend
diff --git a/src/common/assembly_graph/paths/bidirectional_path_container.hpp b/src/common/assembly_graph/paths/bidirectional_path_container.hpp
new file mode 100644
index 0000000..0205381
--- /dev/null
+++ b/src/common/assembly_graph/paths/bidirectional_path_container.hpp
@@ -0,0 +1,210 @@
+//
+// Created by andrey on 14.08.17.
+//
+
+#pragma once
+
+#include "bidirectional_path.hpp"
+#include "modules/path_extend/path_filter.hpp"
+#include <vector>
+#include <set>
+#include <map>
+
+namespace path_extend {
+
+using namespace std;
+
+typedef std::pair<BidirectionalPath*, BidirectionalPath*> PathPair;
+
+class PathComparator {
+public:
+    bool operator()(const BidirectionalPath& p1, const BidirectionalPath& p2) const {
+        return p1.GetId() < p2.GetId();
+    }
+
+    bool operator()(const BidirectionalPath* p1, const BidirectionalPath* p2) const {
+        return p1->GetId() < p2->GetId();
+    }
+};
+
+typedef set<BidirectionalPath*, PathComparator> BidirectionalPathSet;
+
+template<class Value>
+using BidirectionalPathMap = map<BidirectionalPath*, Value, PathComparator>;
+
+typedef multiset<BidirectionalPath *, PathComparator> BidirectionalPathMultiset;
+
+class PathContainer {
+public:
+
+    typedef vector<PathPair> PathContainerT;
+
+    class Iterator : public PathContainerT::iterator {
+    public:
+        Iterator(const PathContainerT::iterator& iter)
+            : PathContainerT::iterator(iter) {
+        }
+        BidirectionalPath* get() const {
+            return this->operator *().first;
+        }
+        BidirectionalPath* getConjugate() const {
+            return this->operator *().second;
+        }
+    };
+
+    class ConstIterator : public PathContainerT::const_iterator {
+    public:
+        ConstIterator(const PathContainerT::const_iterator& iter)
+            : PathContainerT::const_iterator(iter) {
+        }
+
+        ConstIterator(const PathContainer::Iterator& iter)
+            : PathContainerT::const_iterator(iter) {
+        }
+
+        BidirectionalPath* get() const {
+            return this->operator *().first;
+        }
+        BidirectionalPath* getConjugate() const {
+            return this->operator *().second;
+        }
+    };
+
+    PathContainer() {
+    }
+
+
+    PathContainer(const PathContainer&) = delete;
+    PathContainer& operator=(const PathContainer&) = delete;
+
+    PathContainer(PathContainer&&) = default;
+    PathContainer& operator=(PathContainer&&) = default;
+
+    PathContainer(ConstIterator begin, ConstIterator end) {
+        DeleteAllPaths();
+        for (ConstIterator it = begin; it != end; ++it) {
+            AddPair(new BidirectionalPath(*it.get()), new BidirectionalPath(*it.getConjugate()));
+        }
+    }
+
+    BidirectionalPath& operator[](size_t index) const {
+        return *(data_[index].first);
+    }
+
+    BidirectionalPath* Get(size_t index) const {
+        return data_[index].first;
+    }
+
+    BidirectionalPath* GetConjugate(size_t index) const {
+        return data_[index].second;
+    }
+
+    void Swap(size_t index) {
+        std::swap(data_[index].first, data_[index].second);
+    }
+
+    void DeleteAllPaths() {
+        for (size_t i = 0; i < data_.size(); ++i) {
+            DeletePathPair(data_[i]);
+        }
+        clear();
+    }
+
+    ~PathContainer() {
+        DeleteAllPaths();
+    }
+
+    size_t size() const {
+        return data_.size();
+    }
+
+    void clear() {
+        data_.clear();
+    }
+
+    void reserve(size_t size) {
+        data_.reserve(size);
+    }
+
+    bool AddPair(BidirectionalPath* p, BidirectionalPath* cp) {
+        p->SetConjPath(cp);
+        cp->SetConjPath(p);
+        p->Subscribe(cp);
+        cp->Subscribe(p);
+        data_.push_back(std::make_pair(p, cp));
+        return true;
+    }
+
+    void SortByLength(bool desc = true) {
+        std::stable_sort(data_.begin(), data_.end(), [=](const PathPair& p1, const PathPair& p2) {
+            if (p1.first->Empty() || p2.first->Empty() || p1.first->Length() != p2.first->Length()) {
+                return desc ? p1.first->Length() > p2.first->Length()
+                            : p1.first->Length() < p2.first->Length();
+            }
+            const Graph& g = p1.first->graph();
+            return g.int_id(p1.first->Front()) < g.int_id(p2.first->Front());
+        });
+    }
+
+    Iterator begin() {
+        return Iterator(data_.begin());
+    }
+
+    Iterator end() {
+        return Iterator(data_.end());
+    }
+
+    ConstIterator begin() const {
+        return ConstIterator(data_.begin());
+    }
+
+    ConstIterator end() const {
+        return ConstIterator(data_.end());
+    }
+
+    Iterator erase(Iterator iter) {
+        return Iterator(data_.erase(iter));
+    }
+
+    void print() const {
+        for (size_t i = 0; i < size(); ++i) {
+            Get(i)->PrintDEBUG();
+            GetConjugate(i)->PrintDEBUG();
+        }
+    }
+
+    void FilterPaths(const func::TypedPredicate<const BidirectionalPath&>& pred) {
+        DEBUG("Removing empty paths");
+        for (auto &pp : data_) {
+            if (pred(*pp.first)) {
+                VERIFY(pred(*pp.second)); //do we need it?
+                DeletePathPair(pp);
+            }
+        }
+
+        const PathPair empty_pp(nullptr, nullptr);
+        data_.erase(std::remove(data_.begin(), data_.end(), empty_pp), data_.end());
+        DEBUG("Empty paths removed");
+    }
+
+    void FilterEmptyPaths() {
+        FilterPaths(EmptyPathCondition());
+    }
+
+private:
+
+    void DeletePathPair(PathPair &pp) {
+        delete pp.first;
+        pp.first = nullptr;
+        delete pp.second;
+        pp.second = nullptr;
+    }
+
+    vector<PathPair> data_;
+
+protected:
+    DECL_LOGGER("BidirectionalPath");
+
+};
+
+}
\ No newline at end of file
diff --git a/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.cpp b/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.cpp
index a3a3004..26e1cf3 100644
--- a/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.cpp
+++ b/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.cpp
@@ -6,63 +6,36 @@
 
 namespace path_extend {
 
+void path_extend::ContigWriter::OutputPaths(const PathContainer &paths, const vector<PathsWriterT>& writers) const {
+    ScaffoldStorage storage;
 
-string path_extend::ContigWriter::ToFASTGPathFormat(const BidirectionalPath &path) const {
-    if (path.Empty())
-        return "";
-    string res = ids_.at(path.Front()).short_id_;
-    for (size_t i = 1; i < path.Size(); ++i) {
-        if (g_.EdgeEnd(path[i - 1]) != g_.EdgeStart(path[i])) {
-            res += ";\n" + ids_.at(path[i]).short_id_;
-        }
-        else {
-            res += "," + ids_.at(path[i]).short_id_;
+    ScaffoldSequenceMaker scaffold_maker(g_);
+    for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
+        BidirectionalPath* path = iter.get();
+        if (path->Length() <= 0)
+            continue;
+        string path_string = scaffold_maker.MakeSequence(*path);
+        if (path_string.length() >= g_.k()) {
+            storage.emplace_back(path_string, path);
         }
     }
-    return res;
-}
-
-void path_extend::ContigWriter::OutputPaths(const PathContainer &paths,
-                                                  const string &filename_base,
-                                                  bool write_fastg) const {
-    name_generator_->Preprocess(paths);
-    IOContigStorage storage(g_, constructor_, paths);
-
-    INFO("Writing contigs to " << filename_base);
-    io::osequencestream_simple oss(filename_base + ".fasta");
-    std::ofstream os_fastg;
-    if (write_fastg)
-        os_fastg.open((filename_base + ".paths").c_str());
 
-    size_t i = 0;
-    for (const auto& precontig : storage.Storage()) {
-        ++i;
-        std::string contig_id = name_generator_->MakeContigName(i, precontig);
-        oss.set_header(contig_id);
-        oss << precontig.sequence_;
+    //sorting by length and coverage
+    std::sort(storage.begin(), storage.end(), [] (const ScaffoldInfo &a, const ScaffoldInfo &b) {
+        if (a.length() == b.length())
+            return math::gr(a.coverage(), b.coverage());
+        return a.length() > b.length();
+    });
 
-        if (write_fastg) {
-            os_fastg << contig_id << endl;
-            os_fastg << ToFASTGPathFormat(*precontig.path_) << endl;
-            os_fastg << contig_id << "'" << endl;
-            os_fastg << ToFASTGPathFormat(*precontig.path_->GetConjPath()) << endl;
-        }
+    name_generator_->Preprocess(paths);
+    for (size_t i = 0; i < storage.size(); ++i) {
+        storage[i].name = name_generator_->MakeContigName(i+1, storage[i]);
     }
 
-    if (write_fastg)
-        os_fastg.close();
-    DEBUG("Contigs written");
-}
-
-
-void path_extend::PathInfoWriter::WritePaths(const PathContainer &paths, const string &filename) const {
-    std::ofstream oss(filename.c_str());
-
-    for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
-        iter.get()->Print(oss);
+    for (auto& writer : writers) {
+        writer(storage);
     }
-
-    oss.close();
+    DEBUG("Contigs written");
 }
 
 }
\ No newline at end of file
diff --git a/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp b/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp
index 7de980d..fd993c3 100644
--- a/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp
+++ b/src/common/assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp
@@ -7,54 +7,274 @@
 
 #pragma once
 
-
 #include "io_support.hpp"
 
-
 namespace path_extend {
 
-using namespace debruijn_graph;
+template<class Graph>
+using EdgeNamingF = std::function<std::string (const Graph&, EdgeId)>;
 
+template<class Graph>
+EdgeNamingF<Graph> IdNamingF(const string &prefix = "") {
+    return [=](const Graph &g, EdgeId e) {
+        return io::MakeContigId(g.int_id(e), prefix);
+    };
+}
 
-class ContigWriter {
-protected:
-    DECL_LOGGER("PathExtendIO")
+template<class Graph>
+EdgeNamingF<Graph> BasicNamingF(const string &prefix = "EDGE") {
+    return [=](const Graph &g, EdgeId e) {
+        return io::MakeContigId(g.int_id(e), g.length(e) + g.k(), g.coverage(e), prefix);
+    };
+}
+
+template<class Graph>
+class CanonicalEdgeHelper {
+    const Graph &g_;
+    const EdgeNamingF<Graph> naming_f_;
+    const string pos_orient_;
+    const string neg_orient_;
+public:
+
+    CanonicalEdgeHelper(const Graph &g,
+                        EdgeNamingF<Graph> naming_f = IdNamingF<Graph>(),
+                        const string& pos_orient = "+",
+                        const string& neg_orient = "-") :
+            g_(g), naming_f_(naming_f),
+            pos_orient_(pos_orient), neg_orient_(neg_orient) {
+    }
+
+    bool IsCanonical(EdgeId e) const {
+        return e <= g_.conjugate(e);
+    }
+
+    EdgeId Canonical(EdgeId e) const {
+        return IsCanonical(e) ? e : g_.conjugate(e);
+    }
+
+    std::string GetOrientation(EdgeId e) const {
+        return IsCanonical(e) ? pos_orient_ : neg_orient_;
+    }
+
+    std::string EdgeOrientationString(EdgeId e,
+                                      const std::string &delim = "") const {
+        return naming_f_(g_, Canonical(e)) + delim + GetOrientation(e);
+    }
+
+    std::string EdgeString(EdgeId e) const {
+        VERIFY(IsCanonical(e));
+        return naming_f_(g_, e);
+    }
+};
+
+template<class Graph>
+class FastgWriter {
+    typedef typename Graph::EdgeId EdgeId;
+    const Graph &graph_;
+    CanonicalEdgeHelper<Graph> short_namer_;
+    CanonicalEdgeHelper<Graph> extended_namer_;
+
+    string ToPathString(const BidirectionalPath &path) const {
+        if (path.Empty())
+            return "";
+        string res = short_namer_.EdgeOrientationString(path.Front());
+        for (size_t i = 1; i < path.Size(); ++i) {
+            if (graph_.EdgeEnd(path[i - 1]) != graph_.EdgeStart(path[i]) || path.GapAt(i).gap > 0) {
+                res += ";\n" + short_namer_.EdgeOrientationString(path[i]);
+            } else {
+                res += "," + short_namer_.EdgeOrientationString(path[i]);
+            }
+        }
+        return res;
+    }
+
+    string FormHeader(const string &id,
+                      const set<string>& next_ids) {
+        std::stringstream ss;
+        ss << id;
+        if (next_ids.size() > 0) {
+            auto delim = ":";
+            for (const auto &s : next_ids) {
+                ss  << delim << s;
+                delim = ",";
+            }
+        }
+        ss << ";";
+        return ss.str();
+    }
+
+public:
+
+    FastgWriter(const Graph &graph,
+                EdgeNamingF<Graph> edge_naming_f = BasicNamingF<Graph>())
+            : graph_(graph),
+              short_namer_(graph_),
+              extended_namer_(graph_, edge_naming_f, "", "'") {
+    }
+
+    void WriteSegmentsAndLinks(const string &fn) {
+        io::OutputSequenceStream os(fn);
+        for (auto it = graph_.ConstEdgeBegin(); !it.IsEnd(); ++it) {
+            EdgeId e = *it;
+            set<string> next;
+            for (EdgeId next_e : graph_.OutgoingEdges(graph_.EdgeEnd(e))) {
+                next.insert(extended_namer_.EdgeOrientationString(next_e));
+            }
+            os << io::SingleRead(FormHeader(extended_namer_.EdgeOrientationString(e), next),
+                                 graph_.EdgeNucls(e).str());
+        }
+    }
+
+    void WritePaths(const ScaffoldStorage &scaffold_storage, const string &fn) const {
+        std::ofstream os(fn);
+        for (const auto& scaffold_info : scaffold_storage) {
+            os << scaffold_info.name << "\n";
+            os << ToPathString(*scaffold_info.path) << "\n";
+            os << scaffold_info.name << "'" << "\n";
+            os << ToPathString(*scaffold_info.path->GetConjPath()) << "\n";
+        }
+    }
+
+};
 
-protected:
+template<class Graph>
+class GFAWriter {
+    typedef typename Graph::EdgeId EdgeId;
+    const Graph &graph_;
+    CanonicalEdgeHelper<Graph> edge_namer_;
+    std::ostream &os_;
+
+    void WriteSegment(const std::string& edge_id, const Sequence &seq, double cov) {
+        os_ << "S\t" << edge_id << "\t"
+            << seq.str() << "\t"
+            << "KC:i:" << size_t(math::round(cov)) << "\n";
+    }
+
+    void WriteSegments() {
+        for (auto it = graph_.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
+            EdgeId e = *it;
+            WriteSegment(edge_namer_.EdgeString(e), graph_.EdgeNucls(e),
+                         graph_.coverage(e) * double(graph_.length(e)));
+        }
+    }
+
+    void WriteLink(EdgeId e1, EdgeId e2,
+                   size_t overlap_size) {
+        os_ << "L\t" << edge_namer_.EdgeOrientationString(e1, "\t") << "\t"
+            << edge_namer_.EdgeOrientationString(e2, "\t") << "\t"
+            << overlap_size << "M\n";
+    }
+
+    void WriteLinks() {
+        //TODO switch to constant vertex iterator
+        for (auto it = graph_.SmartVertexBegin(/*canonical only*/true); !it.IsEnd(); ++it) {
+            VertexId v = *it;
+            for (auto inc_edge : graph_.IncomingEdges(v)) {
+                for (auto out_edge : graph_.OutgoingEdges(v)) {
+                    WriteLink(inc_edge, out_edge, graph_.k());
+                }
+            }
+        }
+    }
+
+    void WritePath(const std::string& name, size_t segment_id, const vector<std::string> &edge_strs) {
+        os_ << "P" << "\t" ;
+        os_ << name << "_" << segment_id << "\t";
+        std::string delimeter = "";
+        for (const auto& e : edge_strs) {
+            os_ << delimeter << e;
+            delimeter = ",";
+        }
+        os_ << "\t*\n";
+//        delimeter = "";
+//        for (size_t i = 0; i < edge_strs.size() - 1; ++i) {
+//            os_ << delimeter << "*";
+//            delimeter = ",";
+//        }
+//        os_ << "\n";
+    }
+
+public:
+    GFAWriter(const Graph &graph, std::ostream &os,
+              EdgeNamingF<Graph> naming_f = IdNamingF<Graph>())
+            : graph_(graph),
+              edge_namer_(graph_, naming_f),
+              os_(os) {
+    }
+
+    void WriteSegmentsAndLinks() {
+        WriteSegments();
+        WriteLinks();
+    }
+
+    void WritePaths(const ScaffoldStorage &scaffold_storage) {
+        for (const auto& scaffold_info : scaffold_storage) {
+            const path_extend::BidirectionalPath &p = *scaffold_info.path;
+            if (p.Size() == 0) {
+                continue;
+            }
+            std::vector<std::string> segmented_path;
+            //size_t id = p.GetId();
+            size_t segment_id = 1;
+            for (size_t i = 0; i < p.Size() - 1; ++i) {
+                EdgeId e = p[i];
+                segmented_path.push_back(edge_namer_.EdgeOrientationString(e));
+                if (graph_.EdgeEnd(e) != graph_.EdgeStart(p[i+1]) || p.GapAt(i+1).gap > 0) {
+                    WritePath(scaffold_info.name, segment_id, segmented_path);
+                    segment_id++;
+                    segmented_path.clear();
+                }
+            }
+
+            segmented_path.push_back(edge_namer_.EdgeOrientationString(p.Back()));
+            WritePath(scaffold_info.name, segment_id, segmented_path);
+        }
+    }
+
+};
+
+typedef std::function<void (const ScaffoldStorage&)> PathsWriterT;
+
+class ContigWriter {
     const Graph& g_;
-    ContigConstructor<Graph> &constructor_;
-    map<EdgeId, ExtendedContigIdT> ids_;
     shared_ptr<ContigNameGenerator> name_generator_;
 
-    string ToFASTGPathFormat(const BidirectionalPath &path) const;
+public:
 
+    static void WriteScaffolds(const ScaffoldStorage &scaffold_storage, const string &fn) {
+        io::OutputSequenceStream oss(fn);
+        std::ofstream os_fastg;
+
+        for (const auto& scaffold_info : scaffold_storage) {
+            TRACE("Scaffold " << scaffold_info.name << " originates from path " << scaffold_info.path->str());
+            oss << io::SingleRead(scaffold_info.name, scaffold_info.sequence);
+        }
+    }
+
+    static PathsWriterT BasicFastaWriter(const string &fn) {
+        return [=](const ScaffoldStorage& scaffold_storage) {
+            WriteScaffolds(scaffold_storage, fn);
+        };
+    }
 
-public:
     ContigWriter(const Graph& g,
-                 ContigConstructor<Graph> &constructor,
-                 const ConnectedComponentCounter &c_counter,
                  shared_ptr<ContigNameGenerator> name_generator) :
             g_(g),
-            constructor_(constructor),
-            ids_(),
             name_generator_(name_generator) {
-        MakeContigIdMap(g_, ids_, c_counter, "NODE");
     }
 
-    void OutputPaths(const PathContainer &paths,
-                               const string &filename_base,
-                               bool write_fastg = true) const;
-
-};
-
+    void OutputPaths(const PathContainer &paths, const vector<PathsWriterT>& writers) const;
 
-class PathInfoWriter {
-protected:
-    DECL_LOGGER("PathExtendIO")
+    void OutputPaths(const PathContainer &paths, PathsWriterT writer) const {
+        OutputPaths(paths, vector<PathsWriterT>{writer});
+    }
 
-public:
+    void OutputPaths(const PathContainer &paths, const string &fn) const {
+        OutputPaths(paths, BasicFastaWriter(fn));
+    }
 
-    void WritePaths(const PathContainer &paths, const string &filename) const;
+private:
+    DECL_LOGGER("ContigWriter")
 };
 
 }
diff --git a/src/common/assembly_graph/paths/bidirectional_path_io/io_support.cpp b/src/common/assembly_graph/paths/bidirectional_path_io/io_support.cpp
index f3f07e4..7df41d4 100644
--- a/src/common/assembly_graph/paths/bidirectional_path_io/io_support.cpp
+++ b/src/common/assembly_graph/paths/bidirectional_path_io/io_support.cpp
@@ -84,73 +84,43 @@ void path_extend::TranscriptToGeneJoiner::Construct(const PathContainer &paths)
     }
 }
 
-string path_extend::IOContigStorage::ToString(const BidirectionalPath &path) const {
-    stringstream ss;
-    if (path.IsInterstrandBulge() && path.Size() == 1) {
-        ss << constructor_.construct(path.Back()).first.substr(k_, g_.length(path.Back()) - k_);
-        return ss.str();
+std::string path_extend::ScaffoldSequenceMaker::MakeSequence(const BidirectionalPath &path) const {
+    TRACE("Forming sequence for path " << path.str());
+    //TODO what is it and why is it here?
+    if (path.Size() == 1 && EndsWithInterstrandBulge(path)) {
+        TRACE("Interstrand bulge edge");
+        return g_.EdgeNucls(path.Back()).Subseq(k_, g_.length(path.Back())).str();
     }
 
-    if (!path.Empty()) {
-        ss << constructor_.construct(path[0]).first.substr(0, k_);
-    }
+    if (path.Empty())
+        return "";
 
+    std::string answer = g_.EdgeNucls(path[0]).Subseq(0, k_).str();
+    VERIFY(path.GapAt(0) == Gap());
 
-    size_t i = 0;
-    while (i < path.Size()) {
-        int gap = i == 0 ? 0 : path.GapAt(i);
-        if (gap > (int) k_) {
-            for (size_t j = 0; j < gap - k_; ++j) {
-                ss << "N";
-            }
-            auto temp_str = constructor_.construct(path[i]).first;
-            if (i != path.Size() - 1) {
-                for (size_t j = 0; j < path.TrashPreviousAt(i + 1); ++j) {
-                    temp_str.pop_back();
-                    if (temp_str.size() == 0) {
-                        break;
-                    }
-                }
-            }
-            ss << temp_str;
-        }
-        else {
-            int overlapLen = (int) k_ - gap;
-            if (overlapLen >= (int) g_.length(path[i]) + (int) k_) {
-                overlapLen -= (int) g_.length(path[i]) + (int) k_;
-                ++i;
-                //skipping overlapping edges
-                while (i < path.Size() && overlapLen >= (int) g_.length(path[i]) + path.GapAt(i)) {
-                    overlapLen -= (int) g_.length(path[i]) + path.GapAt(i);
-                    ++i;
-                }
-                if (i == path.Size()) {
-                    break;
-                }
-
-                overlapLen = overlapLen + (int) k_ - path.GapAt(i);
-
-                if(overlapLen < 0) {
-                    for (int j = 0; j < abs(overlapLen); ++j) {
-                        ss << "N";
-                    }
-                    overlapLen = 0;
-                }
-            }
-            auto temp_str = g_.EdgeNucls(path[i]).Subseq(overlapLen).str();
-            if (i != path.Size() - 1) {
-                for (size_t j = 0; j < path.TrashPreviousAt(i + 1); ++j) {
-                    temp_str.pop_back();
-                    if (temp_str.size() == 0) {
-                        break;
-                    }
-                }
-            }
-            ss << temp_str;
+    for (size_t i = 0; i < path.Size(); ++i) {
+        Gap gap = path.GapAt(i);
+        TRACE("Adding edge " << g_.str(path[i]));
+        TRACE("Gap " << gap);
+
+        answer.erase((gap.trash_previous <= answer.length()) ?
+                            answer.length() - gap.trash_previous : 0);
+
+        int overlap_after_trim = gap.overlap_after_trim(k_);
+        TRACE("Overlap after trim " << overlap_after_trim);
+        if (overlap_after_trim < 0) {
+            answer += std::string(abs(overlap_after_trim), 'N');
+            overlap_after_trim = 0;
         }
-        ++i;
+        TRACE("Corrected overlap after trim " << overlap_after_trim);
+
+        VERIFY(overlap_after_trim >= 0);
+
+        answer += g_.EdgeNucls(path[i]).Subseq(gap.trash_current + overlap_after_trim).str();
     }
-    return ss.str();
+    TRACE("Sequence formed");
+
+    return answer;
 }
 
 void path_extend::ScaffoldBreaker::SplitPath(const BidirectionalPath &path, PathContainer &result) const {
@@ -160,14 +130,14 @@ void path_extend::ScaffoldBreaker::SplitPath(const BidirectionalPath &path, Path
         BidirectionalPath *p = new BidirectionalPath(path.graph(), path[i]);
         ++i;
 
-        while (i < path.Size() and path.GapAt(i) <= min_gap_) {
-            p->PushBack(path[i], path.GapAt(i), path.TrashPreviousAt(i), path.TrashCurrentAt(i));
+        while (i < path.Size() && path.GapAt(i).gap <= min_gap_) {
+            p->PushBack(path[i], path.GapAt(i));
             ++i;
         }
 
         if (i < path.Size()) {
-            DEBUG("split path " << i << " gap " << path.GapAt(i));
-            p->Print();
+            DEBUG("split path " << i << " gap " << path.GapAt(i).gap);
+            p->PrintDEBUG();
         }
 
         BidirectionalPath *cp = new BidirectionalPath(p->Conjugate());
diff --git a/src/common/assembly_graph/paths/bidirectional_path_io/io_support.hpp b/src/common/assembly_graph/paths/bidirectional_path_io/io_support.hpp
index e46bd42..fcfde4f 100644
--- a/src/common/assembly_graph/paths/bidirectional_path_io/io_support.hpp
+++ b/src/common/assembly_graph/paths/bidirectional_path_io/io_support.hpp
@@ -11,57 +11,35 @@
 namespace path_extend {
 using namespace debruijn_graph;
 
+struct ScaffoldInfo {
+    std::string sequence;
+    BidirectionalPath* path;
+    std::string name;
 
-struct IOContig {
-    std::string sequence_;
-    BidirectionalPath* path_;
+    ScaffoldInfo(const std::string& sequence, BidirectionalPath* path) :
+        sequence(sequence), path(path) { }
 
-    IOContig(const std::string& sequence, BidirectionalPath* path) :
-        sequence_(sequence), path_(path) { }
-};
+    size_t length() const {
+        return sequence.length();
+    }
 
-struct IOContigGreater
-{
-    bool operator()(const IOContig &a, const IOContig &b) const {
-        if (a.sequence_.length() ==  b.sequence_.length())
-            return math::gr(a.path_->Coverage(), b.path_->Coverage());
-        return a.sequence_.length() > b.sequence_.length();
+    double coverage() const {
+        return path->Coverage();
     }
 };
 
-class IOContigStorage {
-private:
-    const Graph &g_;
-    ContigConstructor<Graph> &constructor_;
-    size_t k_;
-    vector<IOContig> storage_;
+typedef vector<ScaffoldInfo> ScaffoldStorage;
 
-    string ToString(const BidirectionalPath& path) const;
+class ScaffoldSequenceMaker {
+    const Graph &g_;
+    const size_t k_;
 public:
-    IOContigStorage(const Graph &g, ContigConstructor<Graph> &constructor, const PathContainer &paths):
-        g_(g),
-        constructor_(constructor),
-        k_(g.k()),
-        storage_() {
-
-        for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
-            BidirectionalPath* path = iter.get();
-            if (path->Length() <= 0)
-                continue;
-            string path_string = ToString(*path);
-            if (path_string.length() >= g.k()) {
-                storage_.emplace_back(path_string, path);
-            }
-        }
-        std::sort(storage_.begin(), storage_.end(), IOContigGreater());
+    ScaffoldSequenceMaker(const Graph& g) : g_(g), k_(g_.k()) {
     }
 
-    const vector<IOContig>& Storage() const {
-        return storage_;
-    }
+    string MakeSequence(const BidirectionalPath &scaffold) const;
 };
 
-
 //Finds common long edges in paths and joins them into
 //Based on disjoint set union
 class TranscriptToGeneJoiner {
@@ -89,13 +67,11 @@ public:
     void Construct(const PathContainer &paths);
 };
 
-
-
 class ContigNameGenerator {
 public:
     virtual void Preprocess(const PathContainer& paths) = 0;
 
-    virtual std::string MakeContigName(size_t index, const IOContig &precontig) = 0;
+    virtual std::string MakeContigName(size_t index, const ScaffoldInfo &scaffold_info) = 0;
 
     virtual ~ContigNameGenerator() {
     }
@@ -105,8 +81,8 @@ class DefaultContigNameGenerator: public ContigNameGenerator {
 public:
     void Preprocess(const PathContainer&) override {}
 
-    std::string MakeContigName(size_t index, const IOContig &precontig) override {
-        return io::MakeContigId(index, precontig.sequence_.length(), precontig.path_->Coverage());
+    std::string MakeContigName(size_t index, const ScaffoldInfo &scaffold_info) override {
+        return io::MakeContigId(index, scaffold_info.length(), scaffold_info.coverage());
     }
 };
 
@@ -118,10 +94,9 @@ public:
 
     void Preprocess(const PathContainer&) override {}
 
-    std::string MakeContigName(size_t index, const IOContig &precontig) override {
-        EdgeId e = precontig.path_->At(0);
-        size_t component = c_counter_.GetComponent(e);
-        return io::MakeContigComponentId(index, precontig.sequence_.length(), precontig.path_->Coverage(), component);
+    std::string MakeContigName(size_t index, const ScaffoldInfo &scaffold_info) override {
+        return io::AddComponentId(io::MakeContigId(index, scaffold_info.length(), scaffold_info.coverage()),
+                                  c_counter_.GetComponent(scaffold_info.path->Front()));
     }
 };
 
@@ -145,8 +120,8 @@ public:
         transcript_joiner_.Construct(paths);
     }
 
-    std::string MakeContigName(size_t index, const IOContig &precontig) override {
-        size_t id = transcript_joiner_.GetPathId(precontig.path_);
+    std::string MakeContigName(size_t index, const ScaffoldInfo &scaffold_info) override {
+        size_t id = transcript_joiner_.GetPathId(scaffold_info.path);
         size_t parent_id = transcript_joiner_.FindTree(id);
         DEBUG("Path " << id << " Parent " << parent_id);
         if (gene_ids_.find(parent_id) == gene_ids_.end()) {
@@ -154,7 +129,11 @@ public:
             isoform_num_[parent_id] = 0;
             gene_num_++;
         }
-        string contig_id = io::MakeRNAContigId(index, precontig.sequence_.length(), precontig.path_->Coverage(), gene_ids_[parent_id], isoform_num_[parent_id]);
+        string contig_id = io::MakeRNAContigId(index,
+                                               scaffold_info.length(),
+                                               scaffold_info.coverage(),
+                                               gene_ids_[parent_id],
+                                               isoform_num_[parent_id]);
         isoform_num_[parent_id]++;
         return contig_id;
     }
diff --git a/src/common/assembly_graph/paths/mapping_path.hpp b/src/common/assembly_graph/paths/mapping_path.hpp
index 3551e04..d42b9eb 100644
--- a/src/common/assembly_graph/paths/mapping_path.hpp
+++ b/src/common/assembly_graph/paths/mapping_path.hpp
@@ -8,7 +8,8 @@
 #pragma once
 
 #include "sequence/sequence.hpp"
-#include "utils/range.hpp"
+#include "sequence/range.hpp"
+#include <boost/iterator/iterator_facade.hpp>
 
 namespace omnigraph {
 
@@ -167,6 +168,62 @@ class MappingPath {
             : edges_(edges),
               range_mappings_(range_mappings) {}
 
+    MappingPath(const std::vector<pair<ElementId, MappingRange>>& edge_mappings) {
+        edges_.reserve(edge_mappings.size());
+        range_mappings_.reserve(edge_mappings.size());
+        for (const auto &em : edge_mappings) {
+            edges_.push_back(em.first);
+            range_mappings_.push_back(em.second);
+        }
+    }
+
+    class mapping_path_iter : public boost::iterator_facade<mapping_path_iter,
+            std::pair<const ElementId, const MappingRange>,
+            boost::random_access_traversal_tag,
+            std::pair<const ElementId, const MappingRange>> {
+        friend class boost::iterator_core_access;
+
+        const MappingPath &mapping_path_;
+        size_t pos_;
+
+        std::pair<const ElementId, const MappingRange> dereference() const {
+            return mapping_path_[pos_];
+        };
+
+        bool equal(const mapping_path_iter &that) const {
+            return &mapping_path_ == &that.mapping_path_ && pos_ == that.pos_;
+        }
+
+        ptrdiff_t distance_to(const mapping_path_iter &that) const {
+            return that.pos_ - pos_;
+        }
+
+        void advance(ptrdiff_t n) {
+            pos_ += n;
+        }
+
+        void increment() {
+            advance(1);
+        }
+
+        void decrement() {
+            advance(-1);
+        }
+
+    public:
+        mapping_path_iter(const MappingPath &mapping_path, size_t pos) :
+                mapping_path_(mapping_path),
+                pos_(pos) {}
+    };
+
+    mapping_path_iter begin() const {
+        return mapping_path_iter(*this, 0);
+    }
+
+    mapping_path_iter end() const {
+        return mapping_path_iter(*this, size());
+    };
+
     size_t size() const { return edges_.size(); }
 
     size_t empty() const { return edges_.empty(); }
@@ -180,6 +237,7 @@ class MappingPath {
     };
 
     std::pair<const ElementId, const MappingRange> operator[](size_t idx) const {
+        VERIFY(idx < size());
         return std::make_pair(edges_[idx], range_mappings_[idx]);
     }
 
@@ -240,62 +298,157 @@ inline std::ostream& operator<<(std::ostream& os, const MappingPath<ElementId>&
 }
 
 template<class Graph>
-struct GapDescription {
+class GapDescription {
     typedef typename Graph::EdgeId EdgeId;
-    EdgeId start, end;
-    Sequence gap_seq;
-    //FIXME discuss using size_t
-    size_t edge_gap_start_position, edge_gap_end_position;
 
-    GapDescription() :
-            start(0),
-            end(0),
-            edge_gap_start_position(0),
-            edge_gap_end_position(0) {
+    //Edges on the left and on the right of the gap
+    EdgeId left_;
+    EdgeId right_;
+
+    //Estimated nucleotide gap/overlap between the edges !after trimming! (see further).
+    // Negative values indicate the overlap between edges.
+    // Should be non-negative for proper final joinings.
+    int estimated_dist_;
+
+    //Number of nucleotides to trim from the (end of the left)/(beginning of the right) edge
+    size_t left_trim_;
+    size_t right_trim_;
+
+    //Optional "filling" sequence, giving "additional" nucleotides which
+    // should be added while closing the gap.
+    // Length guaranteed to be equal to estimated_gap (if present).
+    boost::optional<Sequence> filling_seq_;
+
+    GapDescription(EdgeId left, EdgeId right,
+                   int estimated_dist,
+                   size_t left_trim, size_t right_trim,
+                   boost::optional<Sequence> filling_seq) :
+            left_(left),
+            right_(right),
+            estimated_dist_(estimated_dist),
+            left_trim_(left_trim),
+            right_trim_(right_trim),
+            filling_seq_(filling_seq) {
+    }
+
+    auto AsTuple() const ->
+    decltype(std::make_tuple(left_, right_, left_trim_, right_trim_, estimated_dist_, filling_seq_)) {
+        return std::make_tuple(left_, right_, left_trim_, right_trim_, estimated_dist_, filling_seq_);
+    }
+
+public:
+    static const int INVALID_GAP = std::numeric_limits<int>::min();
+
+    GapDescription(EdgeId left, EdgeId right,
+                   int estimated_dist,
+                   size_t left_trim = 0, size_t right_trim = 0) :
+            GapDescription(left, right,
+                           estimated_dist,
+                           left_trim, right_trim,
+                           boost::none) {
+    }
+
+    GapDescription() : GapDescription(EdgeId(0), EdgeId(0), INVALID_GAP) {
     }
 
-    GapDescription(EdgeId start_e, EdgeId end_e,
-                   const Sequence &gap,
-                   size_t gap_start, size_t gap_end) :
-            start(start_e),
-            end(end_e),
-            gap_seq(gap.str()),
-            edge_gap_start_position(gap_start),
-            edge_gap_end_position(gap_end) {
+    GapDescription(EdgeId left, EdgeId right,
+                   const Sequence &filling_seq,
+                   size_t left_trim = 0, size_t right_trim = 0) :
+            left_(left),
+            right_(right),
+            estimated_dist_(int(filling_seq.size())),
+            left_trim_(left_trim),
+            right_trim_(right_trim),
+            filling_seq_(filling_seq) {
+    }
+
+    EdgeId left() const {
+        return left_;
+    }
+
+    EdgeId right() const {
+        return right_;
+    }
+
+    size_t left_trim() const {
+        return left_trim_;
+    }
+
+    size_t right_trim() const {
+        return right_trim_;
+    }
+
+    bool no_trim() const {
+        return left_trim_ == 0 && right_trim() == 0;
+    }
+
+    int estimated_dist() const {
+        return estimated_dist_;
+    }
+
+    bool has_filling() const {
+        return filling_seq_;
+    }
+
+    Sequence filling_seq() const {
+        return *filling_seq_;
+    }
+
+    void set_left(EdgeId e) {
+        left_ = e;
+    }
+
+    void set_right(EdgeId e) {
+        right_ = e;
+    }
+
+    void set_left_trim(size_t trim) {
+        left_trim_ = trim;
+    }
+
+    void set_estimated_dist(int dist) {
+        VERIFY_MSG(!filling_seq_, "Filling sequence specified");
+        estimated_dist_ = dist;
+    }
+
+    void set_filling_seq(Sequence fill_seq) {
+        estimated_dist_ = fill_seq.size();
+        filling_seq_ = boost::make_optional(fill_seq);
     }
 
     GapDescription<Graph> conjugate(const Graph &g) const {
-        GapDescription<Graph> res(
-                g.conjugate(end), g.conjugate(start), !gap_seq,
-                g.length(end) - edge_gap_end_position,
-                g.length(start) - edge_gap_start_position);
+        GapDescription<Graph> res(g.conjugate(right_),
+                                  g.conjugate(left_),
+                                  estimated_dist_,
+                                  right_trim_,
+                                  left_trim_,
+                                  filling_seq_ ? boost::make_optional(!*filling_seq_) : boost::none);
         return res;
     }
 
     string str(const Graph &g) const {
         stringstream s;
-        s << g.int_id(start) << " " << edge_gap_start_position << endl
-          << g.int_id(end) << " " << edge_gap_end_position << endl
-          << gap_seq.str() << endl;
+        s << "left: " << g.int_id(left_)
+          << "; right: " << g.int_id(right_)
+          << "; estimated distance : " << estimated_dist_
+          << "; left trim: " << left_trim_
+          << "; right trim: " << right_trim_
+          << "; sequence " << (filling_seq_ ? filling_seq_->str() : "no_sequence") << endl;
         return s.str();
     }
 
-    bool operator<(const GapDescription &b) const {
-        return start < b.start ||
-               (start == b.start && end < b.end) ||
-               (start == b.start && end == b.end &&
-                edge_gap_start_position < b.edge_gap_start_position);
+    bool operator<(const GapDescription &rhs) const {
+        return AsTuple() < rhs.AsTuple();
     }
 
     bool operator!=(const GapDescription rhs) const {
-        return start != rhs.start
-               || end != rhs.end
-               || gap_seq != rhs.gap_seq
-               || edge_gap_start_position != rhs.edge_gap_start_position
-               || edge_gap_end_position != rhs.edge_gap_end_position;
+        return AsTuple() != rhs.AsTuple();
     }
 
-};
+    bool operator==(const GapDescription rhs) const {
+        return !(*this != rhs);
+    }
 
+};
 
 }
diff --git a/src/common/assembly_graph/paths/path_processor.hpp b/src/common/assembly_graph/paths/path_processor.hpp
index 0408100..514185d 100644
--- a/src/common/assembly_graph/paths/path_processor.hpp
+++ b/src/common/assembly_graph/paths/path_processor.hpp
@@ -8,7 +8,7 @@
 #pragma once
 
 #include "utils/standard_base.hpp"
-#include "common/adt/bag.hpp"
+#include "adt/bag.hpp"
 #include "assembly_graph/dijkstra/dijkstra_helper.hpp"
 
 namespace omnigraph {
@@ -65,7 +65,7 @@ private:
         size_t curr_depth_;
         size_t call_cnt_;
         Path reversed_edge_path_;
-        bag<VertexId> vertex_cnts_;
+        adt::bag<VertexId> vertex_cnts_;
 
         const Graph& g_;
         const DijkstraT& dijkstra_;
@@ -173,10 +173,12 @@ private:
 
 public:
 
-    PathProcessor(const Graph& g, VertexId start, size_t length_bound) :
+    PathProcessor(const Graph& g, VertexId start, size_t length_bound,
+                  size_t dijkstra_vertex_limit = MAX_DIJKSTRA_VERTICES) :
               g_(g),
               start_(start),
-              dijkstra_(DijkstraHelper<Graph>::CreateBoundedDijkstra(g, length_bound, MAX_DIJKSTRA_VERTICES)) {
+              dijkstra_(DijkstraHelper<Graph>::CreateBoundedDijkstra(g, length_bound,
+                                                                     dijkstra_vertex_limit)) {
         TRACE("Dijkstra launched");
         dijkstra_.Run(start);
         TRACE("Dijkstra finished");
@@ -184,7 +186,9 @@ public:
 
     // dfs from the end vertices
     // 3 two mistakes, 2 bad dijkstra, 1 some bad dfs, 0 = okay
-    int Process(VertexId end, size_t min_len, size_t max_len, Callback& callback, size_t edge_depth_bound = -1ul) const {
+    int Process(VertexId end, size_t min_len, size_t max_len,
+                Callback& callback,
+                size_t edge_depth_bound = std::numeric_limits<size_t>::max()) const {
         TRACE("Process launched");
         int error_code = 0;
 
@@ -219,7 +223,8 @@ private:
 template<class Graph>
 int ProcessPaths(const Graph& g, size_t min_len, size_t max_len,
                  typename Graph::VertexId start, typename Graph::VertexId end,
-                 typename PathProcessor<Graph>::Callback& callback, size_t max_edge_cnt = -1ul) {
+                 typename PathProcessor<Graph>::Callback& callback,
+                 size_t max_edge_cnt = std::numeric_limits<size_t>::max()) {
     PathProcessor<Graph> processor(g, start, max_len);
     return processor.Process(end, min_len, max_len, callback, max_edge_cnt);
 }
diff --git a/src/common/assembly_graph/stats/picture_dump.hpp b/src/common/assembly_graph/stats/picture_dump.hpp
index bee431d..5c9d03f 100644
--- a/src/common/assembly_graph/stats/picture_dump.hpp
+++ b/src/common/assembly_graph/stats/picture_dump.hpp
@@ -26,7 +26,7 @@
 #include "io/reads/wrapper_collection.hpp"
 #include "io/reads/osequencestream.hpp"
 #include "io/dataset_support/dataset_readers.hpp"
-#include "utils/copy_file.hpp"
+#include "utils/filesystem/copy_file.hpp"
 
 #include <boost/algorithm/string.hpp>
 
@@ -63,11 +63,11 @@ shared_ptr<visualization::graph_colorer::GraphColorer<Graph>> DefaultColorer(con
 template <class graph_pack>
 void CollectContigPositions(graph_pack &gp) {
     if (!cfg::get().pos.contigs_for_threading.empty() &&
-        path::FileExists(cfg::get().pos.contigs_for_threading))
+        fs::FileExists(cfg::get().pos.contigs_for_threading))
       visualization::position_filler::FillPos(gp, cfg::get().pos.contigs_for_threading, "thr_", true);
 
     if (!cfg::get().pos.contigs_to_analyze.empty() &&
-        path::FileExists(cfg::get().pos.contigs_to_analyze))
+        fs::FileExists(cfg::get().pos.contigs_to_analyze))
       visualization::position_filler::FillPos(gp, cfg::get().pos.contigs_to_analyze, "anlz_", true);
 }
 
@@ -263,6 +263,17 @@ struct detail_info_printer {
 
   private:
 
+    template<typename T>
+    std::string ToString(const T& t, size_t length) {
+        std::ostringstream ss;
+        ss << t;
+        std::string result = ss.str();
+        while (result.size() < length)
+            result = "0" + result;
+        return result;
+    }
+
+
     void ProduceDetailedInfo(const string &pos_name,
                              config::info_printer_pos pos) {
         using namespace visualization;
@@ -281,9 +292,9 @@ struct detail_info_printer {
         }
 
         if (config.save_graph_pack) {
-            string saves_folder = path::append_path(path::append_path(folder_, "saves/"),
+            string saves_folder = fs::append_path(fs::append_path(folder_, "saves/"),
                                               ToString(call_cnt++, 2) + "_" + pos_name + "/");
-            path::make_dirs(saves_folder);
+            fs::make_dirs(saves_folder);
             graphio::ConjugateDataPrinter<conj_graph_pack::graph_t> printer(gp_.g);
             graphio::PrintGraphPack(saves_folder + "graph_pack", printer, gp_);
             //TODO: separate
@@ -291,9 +302,9 @@ struct detail_info_printer {
         }
 
         if (config.save_all) {
-            string saves_folder = path::append_path(path::append_path(folder_, "saves/"),
+            string saves_folder = fs::append_path(fs::append_path(folder_, "saves/"),
                                                           ToString(call_cnt++, 2) + "_" + pos_name);
-            path::make_dirs(saves_folder);
+            fs::make_dirs(saves_folder);
             string p = saves_folder + "/saves";
             INFO("Saving current state to " << p);
 
@@ -302,17 +313,17 @@ struct detail_info_printer {
         }
 
         if (config.save_full_graph) {
-            string saves_folder = path::append_path(path::append_path(folder_, "saves/"),
+            string saves_folder = fs::append_path(fs::append_path(folder_, "saves/"),
                                               ToString(call_cnt++, 2) + "_" + pos_name + "/");
-            path::make_dirs(saves_folder);
+            fs::make_dirs(saves_folder);
             graphio::ConjugateDataPrinter<conj_graph_pack::graph_t> printer(gp_.g);
             graphio::PrintBasicGraph(saves_folder + "graph", printer);
         }
 
         if (config.lib_info) {
-            string saves_folder = path::append_path(path::append_path(folder_, "saves/"),
+            string saves_folder = fs::append_path(fs::append_path(folder_, "saves/"),
                                                   ToString(call_cnt++, 2) + "_" + pos_name + "/");
-            path::make_dirs(saves_folder);
+            fs::make_dirs(saves_folder);
             config::write_lib_data(saves_folder + "lib_info");
         }
 
@@ -333,9 +344,9 @@ struct detail_info_printer {
         } 
 
         VERIFY(cfg::get().developer_mode);
-        string pics_folder = path::append_path(path::append_path(folder_, "pictures/"),
+        string pics_folder = fs::append_path(fs::append_path(folder_, "pictures/"),
                                           ToString(call_cnt++, 2) + "_" + pos_name + "/");
-        path::make_dirs(pics_folder);
+        fs::make_dirs(pics_folder);
         PrepareForDrawing(gp_);
     
         auto path1 = FindGenomeMappingPath(gp_.genome.GetSequence(), gp_.g, gp_.index,
@@ -365,10 +376,10 @@ struct detail_info_printer {
         }
     
         if (!config.components_for_kmer.empty()) {
-            string kmer_folder = path::append_path(pics_folder, "kmer_loc/");
+            string kmer_folder = fs::append_path(pics_folder, "kmer_loc/");
             make_dir(kmer_folder);
             auto kmer = RtSeq(gp_.k_value + 1, config.components_for_kmer.substr(0, gp_.k_value + 1).c_str());
-            string file_name = path::append_path(kmer_folder, pos_name + ".dot");
+            string file_name = fs::append_path(kmer_folder, pos_name + ".dot");
             WriteKmerComponent(gp_, kmer, file_name, colorer, labeler_);
         }
     
@@ -385,7 +396,7 @@ struct detail_info_printer {
         }
 
         if (!config.components_for_genome_pos.empty()) {
-            string pos_loc_folder = path::append_path(pics_folder, "pos_loc/");
+            string pos_loc_folder = fs::append_path(pics_folder, "pos_loc/");
             make_dir(pos_loc_folder);
             vector<string> positions;
             boost::split(positions, config.components_for_genome_pos,
@@ -394,9 +405,9 @@ struct detail_info_printer {
                 boost::optional<RtSeq> close_kp1mer = FindCloseKP1mer(gp_,
                                                                                  std::stoi(*it), gp_.k_value);
                 if (close_kp1mer) {
-                    string locality_folder = path::append_path(pos_loc_folder, *it + "/");
+                    string locality_folder = fs::append_path(pos_loc_folder, *it + "/");
                     make_dir(locality_folder);
-                    WriteKmerComponent(gp_, *close_kp1mer, path::append_path(locality_folder, pos_name + ".dot"), colorer, labeler_);
+                    WriteKmerComponent(gp_, *close_kp1mer, fs::append_path(locality_folder, pos_name + ".dot"), colorer, labeler_);
                 } else {
                     WARN(
                         "Failed to find genome kp1mer close to the one at position "
diff --git a/src/common/assembly_graph/stats/statistics.hpp b/src/common/assembly_graph/stats/statistics.hpp
index cb6e7b4..8a22a1f 100644
--- a/src/common/assembly_graph/stats/statistics.hpp
+++ b/src/common/assembly_graph/stats/statistics.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "math/xmath.h"
 #include "pipeline/config_struct.hpp"
 #include "assembly_graph/paths/mapping_path.hpp"
@@ -148,10 +148,13 @@ public:
             }
         }
         if (edge_count > 0) {
+            size_t total_genome_size = 0;
+            for (const auto &chr: cfg::get().ds.reference_genome)
+                total_genome_size += 2*chr.size();
             INFO("Error edges count: " << black_count << " which is " <<
                  100.0 * (double) black_count / (double) edge_count << "% of all edges");
             INFO("Total length of all black edges: " << sum_length << ". While double genome length is " <<
-                 (2 * cfg::get().ds.reference_genome.size()));
+                 total_genome_size);
         } else {
             INFO("Error edges count: " << black_count << " which is 0% of all edges");
         }
diff --git a/src/common/io/dataset_support/dataset_readers.hpp b/src/common/io/dataset_support/dataset_readers.hpp
index 4b04751..e08211a 100644
--- a/src/common/io/dataset_support/dataset_readers.hpp
+++ b/src/common/io/dataset_support/dataset_readers.hpp
@@ -8,7 +8,7 @@
 #pragma once
 
 #include "utils/logger/logger.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "io/reads/io_helper.hpp"
 #include "pipeline/library.hpp"
 #include "pipeline/config_struct.hpp"
diff --git a/src/common/io/dataset_support/read_converter.hpp b/src/common/io/dataset_support/read_converter.hpp
index 6939f1a..c75999d 100644
--- a/src/common/io/dataset_support/read_converter.hpp
+++ b/src/common/io/dataset_support/read_converter.hpp
@@ -17,7 +17,7 @@
 #include "io/reads/binary_converter.hpp"
 #include "io/reads/io_helper.hpp"
 #include "dataset_readers.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
 #include <fstream>
 
@@ -33,7 +33,7 @@ private:
     const static size_t current_binary_format_version = 11;
 
     static bool CheckBinaryReadsExist(SequencingLibraryT& lib) {
-        return path::FileExists(lib.data().binary_reads_info.bin_reads_info_file);
+        return fs::FileExists(lib.data().binary_reads_info.bin_reads_info_file);
     }
 
     static bool LoadLibIfExists(SequencingLibraryT& lib) {
diff --git a/src/common/io/kmers/kmer_iterator.hpp b/src/common/io/kmers/kmer_iterator.hpp
index 07d04a6..123f2d4 100644
--- a/src/common/io/kmers/kmer_iterator.hpp
+++ b/src/common/io/kmers/kmer_iterator.hpp
@@ -35,7 +35,7 @@ std::vector<raw_kmer_iterator<Seq>> make_kmer_iterator(const std::string &FileNa
     size_t chunk = round_up(file_size / amount,
                             getpagesize() * Seq::GetDataSize(K) * sizeof(typename Seq::DataType));
     size_t offset = 0;
-    if (chunk > file_size)
+    if (chunk == 0 || chunk > file_size)
         chunk = file_size;
 
     while (offset < file_size) {
diff --git a/src/common/io/kmers/mmapped_reader.hpp b/src/common/io/kmers/mmapped_reader.hpp
index 998659f..fc4914b 100644
--- a/src/common/io/kmers/mmapped_reader.hpp
+++ b/src/common/io/kmers/mmapped_reader.hpp
@@ -14,6 +14,7 @@
 #include "utils/verify.hpp"
 
 #include <boost/iterator/iterator_facade.hpp>
+#include "common/adt/pointer_iterator.hpp"
 
 #include <fcntl.h>
 #include <unistd.h>
@@ -200,8 +201,8 @@ public:
 template<typename T>
 class MMappedRecordReader : public MMappedReader {
 public:
-    typedef pointer_iterator<T> iterator;
-    typedef const pointer_iterator<T> const_iterator;
+    typedef adt::pointer_iterator<T> iterator;
+    typedef const adt::pointer_iterator<T> const_iterator;
 
     MMappedRecordReader(const std::string &FileName, bool unlink = true,
                         size_t blocksize = 64 * 1024 * 1024 / (sizeof(T) * (unsigned) getpagesize()) *
@@ -289,8 +290,8 @@ class MMappedRecordArrayReader : public MMappedReader {
     size_t elcnt_;
 
 public:
-    typedef typename array_vector<T>::iterator iterator;
-    typedef typename array_vector<T>::const_iterator const_iterator;
+    typedef typename adt::array_vector<T>::iterator iterator;
+    typedef typename adt::array_vector<T>::const_iterator const_iterator;
 
     MMappedRecordArrayReader(const std::string &FileName,
                              size_t elcnt = 1,
diff --git a/src/common/io/kmers/mmapped_writer.hpp b/src/common/io/kmers/mmapped_writer.hpp
index 9b3b2ce..2b4aaba 100644
--- a/src/common/io/kmers/mmapped_writer.hpp
+++ b/src/common/io/kmers/mmapped_writer.hpp
@@ -8,8 +8,9 @@
 #ifndef HAMMER_MMAPPED_WRITER_HPP
 #define HAMMER_MMAPPED_WRITER_HPP
 
-#include "common/adt/pointer_iterator.hpp"
-#include "common/adt/array_vector.hpp"
+#include "adt/pointer_iterator.hpp"
+#include "adt/array_vector.hpp"
+#include "common/utils/verify.hpp"
 
 #include <string>
 
@@ -19,6 +20,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <strings.h>
+#include <stdio.h>
 
 class MMappedWriter {
     int StreamFile;
@@ -98,8 +100,8 @@ public:
 template<typename T>
 class MMappedRecordWriter : public MMappedWriter {
 public:
-    typedef pointer_iterator<T> iterator;
-    typedef const pointer_iterator<T> const_iterator;
+    typedef adt::pointer_iterator<T> iterator;
+    typedef const adt::pointer_iterator<T> const_iterator;
 
     MMappedRecordWriter() = default;
 
@@ -142,8 +144,8 @@ template<typename T>
 class MMappedRecordArrayWriter : public MMappedWriter {
     size_t elcnt_;
 public:
-    typedef typename array_vector<T>::iterator iterator;
-    typedef typename array_vector<T>::const_iterator const_iterator;
+    typedef typename adt::array_vector<T>::iterator iterator;
+    typedef typename adt::array_vector<T>::const_iterator const_iterator;
 
     MMappedRecordArrayWriter() = default;
 
diff --git a/src/common/io/reads/binary_converter.hpp b/src/common/io/reads/binary_converter.hpp
index ff427cb..1c4326d 100644
--- a/src/common/io/reads/binary_converter.hpp
+++ b/src/common/io/reads/binary_converter.hpp
@@ -206,7 +206,7 @@ public:
 
         std::string fname;
         for (size_t i = 0; i < file_num_; ++i) {
-            fname = file_name_prefix_ + "_" + ToString(i) + ".seq";
+            fname = file_name_prefix_ + "_" + std::to_string(i) + ".seq";
             file_ds_.push_back(new std::ofstream(fname, std::ios_base::binary));
         }
     }
diff --git a/src/common/io/reads/binary_streams.hpp b/src/common/io/reads/binary_streams.hpp
index 9769b15..f04a604 100644
--- a/src/common/io/reads/binary_streams.hpp
+++ b/src/common/io/reads/binary_streams.hpp
@@ -29,7 +29,7 @@ public:
 
     BinaryFileSingleStream(const std::string& file_name_prefix, size_t file_num) {
         std::string fname;
-        fname = file_name_prefix + "_" + ToString(file_num) + ".seq";
+        fname = file_name_prefix + "_" + std::to_string(file_num) + ".seq";
         stream_.open(fname.c_str(), std::ios_base::binary | std::ios_base::in);
 
         reset();
@@ -90,7 +90,7 @@ public:
 
     BinaryFilePairedStream(const std::string& file_name_prefix, size_t file_num, size_t insert_szie): stream_(), insert_size_ (insert_szie) {
         std::string fname;
-        fname = file_name_prefix + "_" + ToString(file_num) + ".seq";
+        fname = file_name_prefix + "_" + std::to_string(file_num) + ".seq";
         stream_.open(fname.c_str(), std::ios_base::binary | std::ios_base::in);
 
         reset();
diff --git a/src/common/io/reads/fasta_fastq_gz_parser.hpp b/src/common/io/reads/fasta_fastq_gz_parser.hpp
index d976577..292acf1 100644
--- a/src/common/io/reads/fasta_fastq_gz_parser.hpp
+++ b/src/common/io/reads/fasta_fastq_gz_parser.hpp
@@ -39,7 +39,12 @@ namespace io {
 
 namespace fastafastqgz {
 // STEP 1: declare the type of file handler and the read() function
+// Silence bogus gcc warnings
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+// STEP 1: declare the type of file handler and the read() function
 KSEQ_INIT(gzFile, gzread)
+#pragma GCC diagnostic pop
 }
 
 class FastaFastqGzParser: public Parser {
diff --git a/src/common/io/reads/file_reader.hpp b/src/common/io/reads/file_reader.hpp
index 49037d6..4f5678f 100644
--- a/src/common/io/reads/file_reader.hpp
+++ b/src/common/io/reads/file_reader.hpp
@@ -18,7 +18,7 @@
 #include "ireader.hpp"
 #include "single_read.hpp"
 #include "parser.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 
 namespace io {
 
@@ -35,7 +35,7 @@ public:
     explicit FileReadStream(const std::string &filename,
                             OffsetType offset_type = PhredOffset)
             : filename_(filename), offset_type_(offset_type), parser_(NULL) {
-        path::CheckFileExistenceFATAL(filename_);
+        fs::CheckFileExistenceFATAL(filename_);
         parser_ = SelectParser(filename_, offset_type_);
     }
 
diff --git a/src/common/io/reads/ireadstream.hpp b/src/common/io/reads/ireadstream.hpp
index e9f4089..9047e54 100644
--- a/src/common/io/reads/ireadstream.hpp
+++ b/src/common/io/reads/ireadstream.hpp
@@ -21,8 +21,12 @@
 #include "read.hpp"
 #include "sequence/nucl.hpp"
 
+// Silence bogus gcc warnings
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
 // STEP 1: declare the type of file handler and the read() function
 KSEQ_INIT(gzFile, gzread)
+#pragma GCC diagnostic pop
 
 /*
 * Read name, seq and qual strings from FASTQ data (one by one)
diff --git a/src/common/io/reads/osequencestream.hpp b/src/common/io/reads/osequencestream.hpp
index 9545f8c..960821f 100644
--- a/src/common/io/reads/osequencestream.hpp
+++ b/src/common/io/reads/osequencestream.hpp
@@ -22,38 +22,45 @@
 
 namespace io {
 
+inline std::string MakeContigId(size_t number, const std::string& prefix = "NODE") {
+    return prefix.empty() ? std::to_string(number) : (prefix + "_" + std::to_string(number));
+}
+
 inline std::string MakeContigId(size_t number, size_t length, const std::string& prefix = "NODE") {
-    return prefix + "_" + ToString(number) + "_length_" + ToString(length);
+    return MakeContigId(number, prefix) + "_length_" + std::to_string(length);
 }
 
 inline std::string MakeContigId(size_t number, size_t length, double coverage, const std::string& prefix = "NODE") {
-    return MakeContigId(number, length, prefix) + "_cov_" + ToString(coverage);
+    return MakeContigId(number, length, prefix) + "_cov_" + std::to_string(coverage);
 }
 
 inline std::string MakeContigId(size_t number, size_t length, double coverage, size_t id, const std::string& prefix = "NODE") {
-    return MakeContigId(number, length, coverage, prefix) + "_ID_" +  ToString(id);
+    return MakeContigId(number, length, coverage, prefix) + "_ID_" +  std::to_string(id);
 }
 
 inline std::string MakeRNAContigId(size_t number, size_t length, double coverage, size_t gene_id, size_t isoform_id, const std::string& prefix = "NODE") {
-    return MakeContigId(number, length, coverage, prefix) + "_g" + ToString(gene_id)  + "_i" + ToString(isoform_id);
+    return MakeContigId(number, length, coverage, prefix) + "_g" + std::to_string(gene_id)  + "_i" + std::to_string(isoform_id);
 }
 
-inline std::string MakeContigComponentId(size_t number, size_t length, double coverage, size_t component_id, const std::string& prefix = "NODE") {
-    return MakeContigId(number, length, coverage, prefix)  + "_component_" + ToString(component_id);
+inline std::string AddComponentId(const string& s, size_t component_id) {
+    return s + "_component_" + std::to_string(component_id);
+}
+
+inline void WriteWrapped(const std::string &s, ostream &os, size_t max_width = 60) {
+    size_t cur = 0;
+    while (cur < s.size()) {
+        os << s.substr(cur, max_width) << "\n";
+        cur += max_width;
+    }
 }
 
 class osequencestream {
 protected:
     std::ofstream ofstream_;
-
     size_t id_;
 
     void write_str(const std::string& s) {
-        size_t cur = 0;
-        while (cur < s.size()) {
-            ofstream_ << s.substr(cur, 60) << std::endl;
-            cur += 60;
-        }
+        WriteWrapped(s, ofstream_);
     }
 
     virtual void write_header(const std::string& s) {
@@ -62,81 +69,25 @@ protected:
     }
 
 public:
-    osequencestream(const std::string& filename): id_(1) {
-            ofstream_.open(filename.c_str());
+    osequencestream(const std::string& filename):
+            ofstream_(filename), id_(1) {
     }
 
+    virtual ~osequencestream() {}
 
-    virtual ~osequencestream() {
-        ofstream_.close();
-    }
-
-    virtual osequencestream& operator<<(const std::string& s) {
+    osequencestream& operator<<(const std::string& s) {
         write_header(s);
         write_str(s);
         return *this;
     }
 
-    virtual osequencestream& operator<<(const Sequence& seq) {
+    osequencestream& operator<<(const Sequence& seq) {
         std::string s = seq.str();
         return operator <<(s);
     }
 
-    /**
-     * Has different way of making headers
-     * Doesn't increase counters, don't mix with other methods!
-     */
-    virtual osequencestream& operator<<(const SingleRead& read) {
-        ofstream_ << ">" << read.name() << std::endl;
-        size_t cur = 0;
-        std::string s = read.GetSequenceString();
-        while (cur < s.size()) {
-            ofstream_ << s.substr(cur, 60) << std::endl;
-            cur += 60;
-        }
-        return *this;
-    }
-};
-
-
-
-
-
-
-class PairedOutputSequenceStream {
-protected:
-    std::ofstream ofstreaml_;
-    std::ofstream ofstreamr_;
-
-  static void write(const SingleRead& read, std::ofstream& stream) {
-    stream << ">" << read.name() << std::endl;
-    size_t cur = 0;
-    std::string s = read.GetSequenceString();
-    while (cur < s.size()) {
-      stream << s.substr(cur, 60) << std::endl;
-      cur += 60;
-    }
-  }
-
-public:
-    PairedOutputSequenceStream(const std::string& filename1, const std::string &filename2) {
-      ofstreaml_.open(filename1);
-      ofstreamr_.open(filename2);
-    }
-
-    virtual ~PairedOutputSequenceStream() {
-        ofstreaml_.close();
-        ofstreamr_.close();
-    }
-
-    PairedOutputSequenceStream& operator<<(const PairedRead& read) {
-        write(read.first(), ofstreaml_);
-        write(read.second(), ofstreamr_);
-        return *this;
-    }
 };
 
-
 class osequencestream_cov: public osequencestream {
 protected:
     double coverage_;
@@ -151,231 +102,46 @@ public:
     osequencestream_cov(const std::string& filename)
         : osequencestream(filename), coverage_(0.) { }
 
-    virtual ~osequencestream_cov() {
-        ofstream_.close();
-    }
-
     osequencestream_cov& operator<<(double coverage) {
         coverage_ = coverage;
         return *this;
     }
 
-    osequencestream_cov& operator<<(const std::string& s) {
-        write_header(s);
-        write_str(s);
-        return *this;
-    }
-
-    osequencestream_cov& operator<<(const Sequence& seq) {
-        std::string s = seq.str();
-        return operator <<(s);
-    }
-
-};
-
-
-class osequencestream_simple: public osequencestream {
-protected:
-    std::string header_;
-
-    double cov_;
-
-    virtual void write_header(const std::string& /*s*/) {
-        ofstream_ << ">" << header_ << std::endl;
-    }
-
-public:
-    osequencestream_simple(const std::string& filename)
-            : osequencestream(filename), header_("") { }
-
-    virtual ~osequencestream_simple() {
-        ofstream_.close();
-    }
-
-    void set_header(const std::string &header) {
-        header_ = header;
-    }
-
-    osequencestream_simple& operator<<(const std::string& s) {
-        write_header(s);
-        write_str(s);
-        return *this;
-    }
-
-    osequencestream_simple& operator<<(const Sequence& seq) {
-        std::string s = seq.str();
-        return operator <<(s);
-    }
+    using osequencestream::operator<<;
 
 };
 
-class osequencestream_with_id: public osequencestream {
-protected:
-    size_t uid_;
-
-    double cov_;
-
-    virtual void write_header(const std::string& s) {
-        ofstream_ << ">" << GetId(s) << std::endl;
-        id_++;
-    }
-
-public:
-    osequencestream_with_id(const std::string& filename)
-        : osequencestream(filename), uid_(0), cov_(0.0) { }
-
-    virtual ~osequencestream_with_id() {
-        ofstream_.close();
-    }
-
-    std::string GetId(const std::string& s) const {
-        return MakeContigId(id_, s.size(), cov_, uid_);
-    }
-
-    void setCoverage(double c) {
-        cov_ = c;
-    }
-
-    void setID(size_t uid) {
-        uid_ = uid;
-    }
-
-    osequencestream_with_id& operator<<(const std::string& s) {
-        write_header(s);
-        write_str(s);
-        return *this;
-    }
-
-    osequencestream_with_id& operator<<(double coverage) {
-        cov_ = coverage;
-        return *this;
-    }
-
-    osequencestream_with_id& operator<<(const Sequence& seq) {
-        std::string s = seq.str();
-        return operator <<(s);
-    }
-
-};
-
-class osequencestream_with_manual_node_id: public osequencestream_with_id {
-    bool is_id_set_;
-    virtual void write_header(const std::string& s) {
-        //for manual NODE ID setting osequencestream need to chech that node ID is really manually set
-        if (!is_id_set_) {
-            WARN ("NODE ID is not set manually, setting to 0");
-            id_ = 0;
-        }
-        ofstream_ << ">" << MakeContigId(id_, s.size(), cov_, uid_) << std::endl;
-        is_id_set_ = false;
-    }
-
-public:
-//unfortunately constructor inheritance is supported only since g++4.8
-    osequencestream_with_manual_node_id(const std::string& filename): osequencestream_with_id(filename) {
-        is_id_set_ = false;
-    }
-
-    void setNodeID(int id) {
-        id_ = id;
-        is_id_set_ = true;
-    }
-
-    osequencestream_with_manual_node_id& operator<<(const std::string& s) {
-        write_header(s);
-        write_str(s);
-        return *this;
-    }
-
-    osequencestream_with_manual_node_id& operator<<(const Sequence& seq) {
-        std::string s = seq.str();
-        return operator <<(s);
-    }
-
-
-};
-
-
-class osequencestream_with_data_for_scaffold: public osequencestream_with_id  {
-protected:
-    std::ofstream scstream_;
-
-    virtual void write_header(const std::string& s) {
-        scstream_ << id_ << "\tNODE_" << id_ << "\t" << s.size() << "\t" << (int) round(cov_) << std::endl;
-        ofstream_ << ">" << MakeContigId(id_++, s.size(), cov_, uid_) << std::endl;
-    }
-
+class OutputSequenceStream {
+    std::ofstream ofstream_;
 public:
-    osequencestream_with_data_for_scaffold(const std::string& filename): osequencestream_with_id(filename) {
-        id_ = 1;
-        std::string sc_filename = filename + ".info";
-        scstream_.open(sc_filename.c_str());
-    }
 
-    virtual ~osequencestream_with_data_for_scaffold() {
-        ofstream_.close();
-        scstream_.close();
+    OutputSequenceStream(const std::string& filename):
+            ofstream_(filename) {
     }
 
-    osequencestream_with_data_for_scaffold& operator<<(const std::string& s) {
-        write_header(s);
-        write_str(s);
+    OutputSequenceStream& operator<<(const SingleRead& read) {
+        ofstream_ << ">" << read.name() << "\n";
+        WriteWrapped(read.GetSequenceString(), ofstream_);
         return *this;
     }
-
-    osequencestream_with_data_for_scaffold& operator<<(const Sequence& seq) {
-        std::string s = seq.str();
-        return operator <<(s);
-    }
 };
 
-class osequencestream_for_fastg: public osequencestream_with_id  {
-protected:
-    std::string header_;
-
-    virtual void write_header(const std::string& s) {
-        ofstream_ << ">" << s;
-    }
+class PairedOutputSequenceStream {
+    OutputSequenceStream os_l_;
+    OutputSequenceStream os_r_;
 
 public:
-    osequencestream_for_fastg(const std::string& filename):
-            osequencestream_with_id(filename) {
-        id_ = 1;
-    }
-
-    virtual ~osequencestream_for_fastg() {
-        ofstream_.close();
-    }
-
-    void set_header(const std::string& h) {
-        header_=  h;
-    }
-
-    osequencestream_for_fastg& operator<<(const std::set<std::string>& s) {
-        write_header(header_);
-        if (s.size() > 0) {
-            auto iter = s.begin();
-            ofstream_ << ":" << *iter;
-            ++iter;
-            while (iter != s.end()) {
-                ofstream_ << "," << *iter;
-                ++iter;
-            }
-        }
-        ofstream_ << ";" << std::endl;
-        return *this;
+    PairedOutputSequenceStream(const std::string& filename1,
+                               const std::string &filename2) :
+            os_l_(filename1),
+            os_r_(filename2) {
     }
 
-    osequencestream_for_fastg& operator<<(const std::string& s) {
-        write_str(s);
+    PairedOutputSequenceStream& operator<<(const PairedRead& read) {
+        os_l_ << read.first();
+        os_r_ << read.second();
         return *this;
     }
-
-    osequencestream_for_fastg& operator<<(const Sequence& seq) {
-        std::string s = seq.str();
-        return operator <<(s);
-    }
-
 };
 
 }
diff --git a/src/common/io/reads/paired_read.hpp b/src/common/io/reads/paired_read.hpp
index 2c498d7..3f5a58b 100644
--- a/src/common/io/reads/paired_read.hpp
+++ b/src/common/io/reads/paired_read.hpp
@@ -86,11 +86,6 @@ public:
         return !file.fail();
     }
 
-    void print_size() const {
-        first_.print_size();
-        second_.print_size();
-    }
-
 private:
     SingleRead first_;
     SingleRead second_;
diff --git a/src/common/io/reads/read.hpp b/src/common/io/reads/read.hpp
index 913a6f3..f50e023 100644
--- a/src/common/io/reads/read.hpp
+++ b/src/common/io/reads/read.hpp
@@ -23,7 +23,7 @@
 #include "sequence/sequence.hpp"
 #include "sequence/nucl.hpp"
 #include "sequence/sequence_tools.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
 //fixme deprecated!!! used in hammer!
 class Read {
diff --git a/src/common/io/reads/read_processor.hpp b/src/common/io/reads/read_processor.hpp
index a8d060b..2fcf0c6 100644
--- a/src/common/io/reads/read_processor.hpp
+++ b/src/common/io/reads/read_processor.hpp
@@ -10,7 +10,7 @@
 
 #include "io/reads/mpmc_bounded.hpp"
 
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #pragma GCC diagnostic push
 #ifdef __clang__
diff --git a/src/common/io/reads/sequence_reader.hpp b/src/common/io/reads/sequence_reader.hpp
index 86daf5d..aeb15f6 100644
--- a/src/common/io/reads/sequence_reader.hpp
+++ b/src/common/io/reads/sequence_reader.hpp
@@ -8,7 +8,7 @@
 #pragma once
 
 #include "io/reads/ireader.hpp"
-#include "common/basic/reads/single_read.hpp"
+#include "basic/reads/single_read.hpp"
 
 namespace io {
 
diff --git a/src/common/io/reads/single_read.hpp b/src/common/io/reads/single_read.hpp
index 15bac77..067ec3f 100644
--- a/src/common/io/reads/single_read.hpp
+++ b/src/common/io/reads/single_read.hpp
@@ -12,7 +12,7 @@
 #include "sequence/sequence.hpp"
 #include "sequence/nucl.hpp"
 #include "sequence/sequence_tools.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
 #include <string>
 
@@ -39,14 +39,11 @@ class SingleRead {
 public:
 
     static std::string EmptyQuality(const std::string &seq) {
-        return std::string(seq.size(), (char) 33);
+        return std::string(seq.size(), (char) PhredOffset);
     }
 
-    static const int BAD_QUALITY_THRESHOLD = 2;
-
     SingleRead() :
             name_(""), seq_(""), qual_(""), left_offset_(0), right_offset_(0), valid_(false) {
-        DEBUG(name_ << " created");
     }
 
     SingleRead(const std::string &name, const std::string &seq,
@@ -54,7 +51,6 @@ public:
                SequenceOffsetT left_offset = 0, SequenceOffsetT right_offset = 0) :
             name_(name), seq_(seq), qual_(qual), left_offset_(left_offset), right_offset_(right_offset) {
         Init();
-        DEBUG(name_ << " created");
         for (size_t i = 0; i < qual_.size(); ++i) {
             qual_[i] = (char) (qual_[i] - offset);
         }
@@ -64,7 +60,6 @@ public:
                const std::string &qual,
                SequenceOffsetT left_offset = 0, SequenceOffsetT right_offset = 0) :
             name_(name), seq_(seq), qual_(qual), left_offset_(left_offset), right_offset_(right_offset) {
-        DEBUG(name_ << " created");
         Init();
     }
 
@@ -72,7 +67,6 @@ public:
                SequenceOffsetT left_offset = 0, SequenceOffsetT right_offset = 0) :
             name_(name), seq_(seq), qual_(EmptyQuality(seq_)), left_offset_(left_offset),
             right_offset_(right_offset) {
-        DEBUG(name_ << " created");
         Init();
     }
 
@@ -196,12 +190,6 @@ public:
         return !file.fail();
     }
 
-
-    void print_size() const {
-        std::cerr << size() << std::endl;
-    }
-
-
 private:
     /*
      * @variable The name of SingleRead in input file.
@@ -237,10 +225,10 @@ private:
         //        TODO remove naming?
         std::string new_name;
         if (name_.length() >= 3 && name_.substr(name_.length() - 3) == "_RC") {
-            new_name = name_.substr(0, name_.length() - 3) + "_SUBSTR(" + ToString(size() - to) + "," +
-                       ToString(size() - from) + ")" + "_RC";
+            new_name = name_.substr(0, name_.length() - 3) + "_SUBSTR(" + std::to_string(size() - to) + "," +
+                       std::to_string(size() - from) + ")" + "_RC";
         } else {
-            new_name = name_ + "_SUBSTR(" + ToString(from) + "," + ToString(to) + ")";
+            new_name = name_ + "_SUBSTR(" + std::to_string(from) + "," + std::to_string(to) + ")";
         }
         return SingleRead(new_name, seq_.substr(from, len), qual_.substr(from, len),
                           SequenceOffsetT(from + (size_t) left_offset_),
diff --git a/src/common/io/reads/wrapper_collection.hpp b/src/common/io/reads/wrapper_collection.hpp
index 1f6c405..e2c2d49 100644
--- a/src/common/io/reads/wrapper_collection.hpp
+++ b/src/common/io/reads/wrapper_collection.hpp
@@ -24,7 +24,7 @@ public:
     /* virtual */
     IdSettingReaderWrapper& operator>>(SingleRead& read) {
         this->reader() >> read;
-        read.ChangeName(ToString(next_id_++));
+        read.ChangeName(std::to_string(next_id_++));
         return *this;
     }
 };
diff --git a/src/common/utils/log.hpp b/src/common/math/log.hpp
similarity index 100%
rename from src/common/utils/log.hpp
rename to src/common/math/log.hpp
diff --git a/src/common/modules/alignment/bwa_index.cpp b/src/common/modules/alignment/bwa_index.cpp
index 9973477..51f36b1 100644
--- a/src/common/modules/alignment/bwa_index.cpp
+++ b/src/common/modules/alignment/bwa_index.cpp
@@ -262,7 +262,7 @@ omnigraph::MappingPath<debruijn_graph::EdgeId> BWAIndex::AlignSequence(const Seq
 
     std::string seq = sequence.str();
     mem_alnreg_v ar = mem_align1(memopt_.get(), idx_->bwt, idx_->bns, idx_->pac,
-                                 seq.length(), seq.data());
+                                 int(seq.length()), seq.data());
     for (size_t i = 0; i < ar.n; ++i) {
         const mem_alnreg_t &a = ar.a[i];
         if (a.secondary >= 0) continue; // skip secondary alignments
@@ -281,10 +281,10 @@ omnigraph::MappingPath<debruijn_graph::EdgeId> BWAIndex::AlignSequence(const Seq
         size_t read_length = seq.length() ;
         //we had to reduce the range to kmer-based
         if (pos + (a.re - a.rb) >= g_.length(ids_[a.rid]) ){
-            if (a.qe > g_.k() + a.qb)
+            if (a.qe > int(g_.k()) + a.qb)
                 initial_range_end -= g_.k();
             else continue;
-            if (a.re > g_.k() + a.rb)
+            if (a.re > int(g_.k()) + a.rb)
                 mapping_range_end -= g_.k();
             else continue;
             if (read_length >= g_.k())
@@ -302,8 +302,8 @@ omnigraph::MappingPath<debruijn_graph::EdgeId> BWAIndex::AlignSequence(const Seq
 //            fprintf (stderr,"%d %d %d\n", pos, pos + a.re - a.rb , g_.length(ids_[a.rid]) );
 
             res.push_back(g_.conjugate(ids_[a.rid]),
-                          { omnigraph::Range(a.qb, initial_range_end).Invert(read_length),
-                            omnigraph::Range(pos, mapping_range_end ).Invert(g_.length(ids_[a.rid])) });
+                          { Range(a.qb, initial_range_end).Invert(read_length),
+                            Range(pos, mapping_range_end ).Invert(g_.length(ids_[a.rid])) });
 
         }
 
diff --git a/src/common/modules/alignment/edge_index.hpp b/src/common/modules/alignment/edge_index.hpp
index da84b58..56a5e51 100644
--- a/src/common/modules/alignment/edge_index.hpp
+++ b/src/common/modules/alignment/edge_index.hpp
@@ -7,9 +7,9 @@
 
 #pragma once
 
-#include "common/assembly_graph/core/graph.hpp"
-#include "common/assembly_graph/core/action_handlers.hpp"
-#include "utils/indices/edge_info_updater.hpp"
+#include "assembly_graph/core/graph.hpp"
+#include "assembly_graph/core/action_handlers.hpp"
+#include "assembly_graph/index/edge_info_updater.hpp"
 #include "edge_index_refiller.hpp"
     
 namespace debruijn_graph {
@@ -24,7 +24,7 @@ class EdgeIndex: public omnigraph::GraphActionHandler<Graph> {
 
 public:
     typedef typename Graph::EdgeId EdgeId;
-    using InnerIndex = KmerFreeEdgeIndex<Graph, DefaultStoring>;
+    using InnerIndex = KmerFreeEdgeIndex<Graph>;
     typedef Graph GraphT;
     typedef typename InnerIndex::KMer KMer;
     typedef typename InnerIndex::KMerIdx KMerIdx;
diff --git a/src/common/modules/alignment/edge_index_refiller.cpp b/src/common/modules/alignment/edge_index_refiller.cpp
index c03c5ad..2284261 100644
--- a/src/common/modules/alignment/edge_index_refiller.cpp
+++ b/src/common/modules/alignment/edge_index_refiller.cpp
@@ -4,8 +4,8 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/indices/edge_index_builders.hpp"
-#include "utils/indices/edge_multi_index.hpp"
+#include "index/edge_index_builders.hpp"
+#include "index/edge_multi_index.hpp"
 #include "core/graph.hpp"
 
 #include "edge_index_refiller.hpp"
diff --git a/src/common/modules/alignment/kmer_mapper.hpp b/src/common/modules/alignment/kmer_mapper.hpp
index 1f11d1f..14b8e1f 100644
--- a/src/common/modules/alignment/kmer_mapper.hpp
+++ b/src/common/modules/alignment/kmer_mapper.hpp
@@ -8,7 +8,7 @@
 #pragma once
 
 #include "sequence/sequence_tools.hpp"
-#include "common/adt/kmer_vector.hpp"
+#include "adt/kmer_vector.hpp"
 #include "edge_index.hpp"
 
 #include "kmer_map.hpp"
@@ -67,7 +67,7 @@ public:
         if (normalized_)
             return;
 
-        KMerVector<Kmer> all(k_, size());
+        adt::KMerVector<Kmer> all(k_, size());
         for (auto it = begin(); it != end(); ++it)
             all.push_back(it->first);
 
diff --git a/src/common/modules/alignment/kmer_mapper_logger.hpp b/src/common/modules/alignment/kmer_mapper_logger.hpp
index 3643030..202ebfc 100644
--- a/src/common/modules/alignment/kmer_mapper_logger.hpp
+++ b/src/common/modules/alignment/kmer_mapper_logger.hpp
@@ -16,7 +16,7 @@
 #define KMER_MAPPER_LOGGER_H_
 
 #include "sequence/sequence.hpp"
-#include "common/assembly_graph/core/action_handlers.hpp"
+#include "assembly_graph/core/action_handlers.hpp"
 #include "utils/standard_base.hpp"
 
 namespace debruijn {
diff --git a/src/common/modules/alignment/pacbio/pac_index.hpp b/src/common/modules/alignment/pacbio/pac_index.hpp
index ff779ab..5504e8c 100644
--- a/src/common/modules/alignment/pacbio/pac_index.hpp
+++ b/src/common/modules/alignment/pacbio/pac_index.hpp
@@ -7,8 +7,8 @@
 
 #pragma once
 
-#include "utils/indices/edge_multi_index.hpp"
-#include "common/modules/alignment/edge_index_refiller.hpp"
+#include "assembly_graph/index/edge_multi_index.hpp"
+#include "modules/alignment/edge_index_refiller.hpp"
 #include "assembly_graph/paths/mapping_path.hpp"
 #include "assembly_graph/paths/path_processor.hpp"
 // FIXME: Layering violation, get rid of this
@@ -73,7 +73,7 @@ private:
     bool ignore_map_to_middle;
     debruijn_graph::config::debruijn_config::pacbio_processor pb_config_;
 public:
-    MappingDescription Locate(const Sequence &s) const;
+    MappingDescription GetSeedsFromRead(const Sequence &s) const;
 
     PacBioMappingIndex(const Graph &g, size_t k, size_t debruijn_k_, bool ignore_map_to_middle, string out_dir, debruijn_graph::config::debruijn_config::pacbio_processor pb_config )
             : g_(g),
@@ -166,9 +166,8 @@ public:
             }
         }
     }
-
     ClustersSet GetOrderClusters(const Sequence &s) const {
-        MappingDescription descr = Locate(s);
+        MappingDescription descr = GetSeedsFromRead(s);
         ClustersSet res;
         TRACE(read_count << " read_count");
 
@@ -239,6 +238,36 @@ public:
         FilterClusters(res);
         return res;
     }
+
+    //Filter incapsulated clusters caused by similar regions
+    void FilterDominatedClusters(ClustersSet &clusters) const {
+        for (auto i_iter = clusters.begin(); i_iter != clusters.end();) {
+            size_t edge_id = g_.int_id(i_iter->edgeId);
+            auto sorted_by_edge = i_iter->sorted_positions;
+
+            DEBUG("filtering  with cluster edge, stage 2 "<< edge_id << " len " << sorted_by_edge.size() << " clusters still alive: "<< clusters.size());
+            for (auto j_iter = clusters.begin(); j_iter != clusters.end();) {
+                if (i_iter != j_iter) {
+                    if (dominates(*i_iter, *j_iter)) {
+                        TRACE("cluster is dominated");
+                        auto tmp_iter = j_iter;
+                        tmp_iter++;
+                        TRACE("cluster on edge " << g_.int_id(j_iter->edgeId));
+                        TRACE("erased - dominated");
+                        clusters.erase(j_iter);
+                        j_iter = tmp_iter;
+                    } else {
+                        j_iter++;
+                    }
+                } else {
+                    j_iter++;
+                }
+            }
+            DEBUG("cluster size "<< i_iter->sorted_positions.size() << "survived filtering");
+            i_iter++;
+        }
+    }
+    
     //filter clusters that are too small or fully located on a vertex or dominated by some other cluster.
     void FilterClusters(ClustersSet &clusters) const {
         for (auto i_iter = clusters.begin(); i_iter != clusters.end();) {
@@ -250,10 +279,10 @@ public:
             double good = 0;
             DEBUG("filtering cluster of size " << sorted_by_edge.size());
             DEBUG(edge_id <<" : edgeId");
-            for (auto iter = sorted_by_edge.begin();
-                    iter < sorted_by_edge.end(); iter++) {
+            for (auto iter = sorted_by_edge.begin(); iter < sorted_by_edge.end(); iter++) {
                 if (iter->IsUnique())
                     good++;
+//TODO:: back to quality for laaarge genomes (kmer size)?
                 //good += 1.0 / (iter->quality * iter->quality);
             }
             DEBUG("good " << good);
@@ -280,31 +309,7 @@ public:
                 }
             }
         }
-        for (auto i_iter = clusters.begin(); i_iter != clusters.end();) {
-            size_t edge_id = g_.int_id(i_iter->edgeId);
-            auto sorted_by_edge = i_iter->sorted_positions;
-
-            DEBUG("filtering  with cluster edge, stage 2 "<< edge_id << " len " << sorted_by_edge.size() << " clusters still alive: "<< clusters.size());
-            for (auto j_iter = clusters.begin(); j_iter != clusters.end();) {
-                if (i_iter != j_iter) {
-                    if (dominates(*i_iter, *j_iter)) {
-                        TRACE("cluster is dominated");
-                        auto tmp_iter = j_iter;
-                        tmp_iter++;
-                        TRACE("cluster on edge " << g_.int_id(j_iter->edgeId));
-                        TRACE("erased - dominated");
-                        clusters.erase(j_iter);
-                        j_iter = tmp_iter;
-                    } else {
-                        j_iter++;
-                    }
-                } else {
-                    j_iter++;
-                }
-            }
-            DEBUG("cluster size "<< i_iter->sorted_positions.size() << "survived filtering");
-            i_iter++;
-        }
+        FilterDominatedClusters(clusters);
     }
 
     // is "non strictly dominates" required?
@@ -329,8 +334,7 @@ public:
         vector<vector<EdgeId>> res;
         EdgeId prev_edge = EdgeId(0);
 
-        for (auto iter = cur_cluster.begin(); iter != cur_cluster.end();
-                ++iter) {
+        for (auto iter = cur_cluster.begin(); iter != cur_cluster.end();) {
             EdgeId cur_edge = iter->second->edgeId;
             if (prev_edge != EdgeId(0)) {
 //Need to find sequence of edges between clusters
@@ -374,26 +378,6 @@ public:
                     vector<EdgeId> intermediate_path = BestScoredPath(s, start_v, end_v, limits.first, limits.second, seq_start, seq_end, s_add, e_add);
                     if (intermediate_path.size() == 0) {
                         DEBUG("Tangled region between edgees "<< g_.int_id(prev_edge) << " " << g_.int_id(cur_edge) << " is not closed, additions from edges: " << int(g_.length(prev_edge)) - int(prev_last_index.edge_position) <<" " << int(cur_first_index.edge_position) - int(debruijn_k - pacbio_k ) << " and seq "<< - seq_start + seq_end);
-                        if (pb_config_.additional_debug_info) {
-                            DEBUG(" escpected gap length: " << -int(g_.length(prev_edge)) + int(prev_last_index.edge_position) - int(cur_first_index.edge_position) + int(debruijn_k - pacbio_k ) - seq_start + seq_end);
-                            omnigraph::PathStorageCallback<Graph> callback(g_);
-                            ProcessPaths(g_, 0, 4000,
-                                            start_v, end_v,
-                                            callback);
-                            vector<vector<EdgeId> > paths = callback.paths();
-                            stringstream s_buf;
-                            for (auto p_iter = paths.begin();
-                                    p_iter != paths.end(); p_iter++) {
-                                size_t tlen = 0;
-                                for (auto path_iter = p_iter->begin();
-                                        path_iter != p_iter->end();
-                                        path_iter++) {
-                                    tlen += g_.length(*path_iter);
-                                }
-                                s_buf << tlen << " ";
-                            }
-                            DEBUG(s_buf.str());
-                        }
                         res.push_back(cur_sorted);
                         cur_sorted.clear();
                         prev_edge = EdgeId(0);
@@ -406,6 +390,7 @@ public:
             }
             cur_sorted.push_back(cur_edge);
             prev_edge = cur_edge;
+            ++iter;
         }
         if (cur_sorted.size() > 0)
             res.push_back(cur_sorted);
@@ -420,60 +405,60 @@ public:
         return res;
     }
 
-    vector<int> GetWeightedColors(const ClustersSet &mapping_descr) const {
-        int len = (int) mapping_descr.size();
+    vector<vector<int>> FillConnectionsTable (const ClustersSet &mapping_descr) const{
+        size_t len =  mapping_descr.size();
         DEBUG("getting colors, table size "<< len);
         vector<vector<int> > cons_table(len);
+        for (size_t i = 0; i < len; i++) {
+            cons_table[i].resize(len);
+            cons_table[i][i] = 0;
+        }
+        size_t i = 0;
+        for (auto i_iter = mapping_descr.begin(); i_iter != mapping_descr.end();
+             ++i_iter, ++i) {
+            size_t j = i;
+            for (auto j_iter = i_iter;
+                 j_iter != mapping_descr.end(); ++j_iter, ++j) {
+                if (i_iter == j_iter)
+                    continue;
+                cons_table[i][j] = IsConsistent(*i_iter, *j_iter);
+            }
+        }
+        return cons_table;
+    }
 
+    vector<int> GetWeightedColors(const ClustersSet &mapping_descr) const {
+        size_t len = mapping_descr.size();
         vector<int> colors(len);
         vector<int> cluster_size(len);
         vector<int> max_size(len);
-        vector<int> prev(len);
-
-        for (int i = 0; i < len; i++) {
-            cons_table[i].resize(len);
-            cons_table[i][i] = 0;
+        vector<size_t> prev(len);
+        size_t i = 0;
+        for (i = 0; i < len; i++) {
             prev[i] = -1;
         }
-        int i = 0;
-
-        for (int i = 0; i < len; i++) {
-//-1 not initialized, -2 - removed as trash
+        for (i = 0; i < len; i++) {
             colors[i] = UNDEF_COLOR;
         }
+        i = 0;
         for (auto i_iter = mapping_descr.begin(); i_iter != mapping_descr.end();
                 ++i_iter, ++i) {
             cluster_size[i] = i_iter->size;
         }
-        i = 0;
-        if (len > 1) {
-            TRACE(len << "clusters");
-        }
 
-        for (auto i_iter = mapping_descr.begin(); i_iter != mapping_descr.end();
-                ++i_iter, ++i) {
-            int j = i;
-            for (auto j_iter = i_iter;
-                    j_iter != mapping_descr.end(); ++j_iter, ++j) {
-                if (i_iter == j_iter)
-                    continue;
-                cons_table[i][j] = IsConsistent(*i_iter, *j_iter);
-            }
-        }
-        i = 0;
+        auto cons_table = FillConnectionsTable(mapping_descr);
         int cur_color = 0;
-
         while (true) {
             for (i = 0; i < len; i++) {
                 max_size[i] = 0;
-                prev[i] = -1;
+                prev[i] = -1ul;
             }
             i = 0;
             for (auto i_iter = mapping_descr.begin(); i_iter != mapping_descr.end();
                         ++i_iter, ++i) {
                 if (colors[i] != UNDEF_COLOR) continue;
                 max_size[i] = cluster_size[i];
-                for (int j = 0; j < i; j ++) {
+                for (size_t j = 0; j < i; j ++) {
                     if (colors[j] != -1) continue;
                     if (cons_table[j][i] && max_size[i] < cluster_size[i] + max_size[j]) {
                         max_size[i] = max_size[j] + cluster_size[i];
@@ -483,10 +468,10 @@ public:
             }
             int maxx = 0;
             int maxi = -1;
-            for (int j = 0; j < len; j++) {
+            for (size_t j = 0; j < len; j++) {
                 if (max_size[j] > maxx) {
                     maxx = max_size[j];
-                    maxi = j;
+                    maxi = int(j);
                 }
             }
             if (maxi == -1) {
@@ -496,9 +481,9 @@ public:
             colors[maxi] = cur_color;
             int real_maxi = maxi, min_i = maxi;
 
-            while (prev[maxi] != -1) {
+            while (prev[maxi] != -1ul) {
                 min_i = maxi;
-                maxi = prev[maxi];
+                maxi = int(prev[maxi]);
                 colors[maxi] = cur_color;
             }
             while (real_maxi >= min_i) {
@@ -511,7 +496,6 @@ public:
         return colors;
     }
 
-
     GapDescription CreateGapDescription(const KmerCluster<debruijn_graph::Graph>& a,
                                         const KmerCluster<debruijn_graph::Graph>& b,
                                         const Sequence& read) const {
@@ -524,30 +508,58 @@ public:
         return GapDescription(a.edgeId,
                               b.edgeId,
                               read.Subseq(seq_start, seq_end),
-                              a.sorted_positions[a.last_trustable_index].edge_position + pacbio_k - debruijn_k,
+                              g_.length(a.edgeId) - a.sorted_positions[a.last_trustable_index].edge_position - pacbio_k + debruijn_k,
                               b.sorted_positions[b.first_trustable_index].edge_position);
     }
 
+    OneReadMapping AddGapDescriptions(const vector<typename ClustersSet::iterator> &start_clusters,
+                                      const vector<typename ClustersSet::iterator> &end_clusters,
+                                      const vector<vector<EdgeId>> &sortedEdges, const Sequence &s,
+                                      const vector<bool> &block_gap_closer, size_t used_seeds_count) const {
+        DEBUG("adding gaps between subreads");
+        vector<GapDescription> illumina_gaps;
+        for (size_t i = 0; i + 1 < sortedEdges.size() ; i++) {
+            if (block_gap_closer[i])
+                continue;
+            size_t j = i + 1;
+            EdgeId before_gap = sortedEdges[i][sortedEdges[i].size() - 1];
+            EdgeId after_gap = sortedEdges[j][0];
+//do not add "gap" for rc-jumping
+            if (before_gap != after_gap && before_gap != g_.conjugate(after_gap)) {
+                if (TopologyGap(before_gap, after_gap, true)) {
+                    if (start_clusters[j]->CanFollow(*end_clusters[i])) {
+                        auto gap = CreateGapDescription(*end_clusters[i],
+                                                        *start_clusters[j],
+                                                        s);
+                        if (gap != GapDescription()) {
+                            illumina_gaps.push_back(gap);
+                            DEBUG("adding gap between alignments number " << i<< " and " << j);
+                        }
+                    }
+
+                }
+            }
+
+        }
+        return OneReadMapping(sortedEdges, illumina_gaps, vector<size_t>(0), used_seeds_count);
+    }
 
     OneReadMapping GetReadAlignment(Sequence &s) const {
         ClustersSet mapping_descr = GetOrderClusters(s);
-        DEBUG("clusters got");
-        int len = (int) mapping_descr.size();
-        vector<size_t> real_length;
-
         vector<int> colors = GetWeightedColors(mapping_descr);
-        vector<vector<EdgeId> > sortedEdges;
+        size_t len =  mapping_descr.size();
+        vector<size_t> real_length;
+        vector<vector<EdgeId>> sortedEdges;
         vector<bool> block_gap_closer;
         vector<typename ClustersSet::iterator> start_clusters, end_clusters;
-        vector<GapDescription> illumina_gaps;
         vector<int> used(len);
         size_t used_seed_count = 0;
         auto iter = mapping_descr.begin();
-        for (int i = 0; i < len; i++, iter ++) {
-            used[i] = 0; 
+        for (size_t i = 0; i < len; i++, iter ++) {
+            used[i] = 0;
             DEBUG(colors[i] <<" " << iter->str(g_));
         }
-        for (int i = 0; i < len; i++) {
+        for (size_t i = 0; i < len; i++) {
             if (!used[i]) {
                 DEBUG("starting new subread");
                 size_t cur_seed_count = 0;
@@ -613,33 +625,7 @@ public:
                 }
             }
         }
-        DEBUG("adding gaps between subreads");
-
-        for (size_t i = 0; i + 1 < sortedEdges.size() ; i++) {
-                if (block_gap_closer[i])
-                    continue;
-                size_t j = i + 1;
-                EdgeId before_gap = sortedEdges[i][sortedEdges[i].size() - 1];
-                EdgeId after_gap = sortedEdges[j][0];
-//do not add "gap" for rc-jumping
-                if (before_gap != after_gap
-                        && before_gap != g_.conjugate(after_gap)) {
-                    if (i != j && TopologyGap(before_gap, after_gap, true)) {
-                        if (start_clusters[j]->CanFollow(*end_clusters[i])) {
-                            auto gap = CreateGapDescription(*end_clusters[i],
-                                                            *start_clusters[j],
-                                                            s);
-                            if (gap != GapDescription()) {
-                                illumina_gaps.push_back(gap);
-                                DEBUG("adding gap between alignments number " << i<< " and " << j);
-                            }
-                        }
-
-                    }
-                }
-
-        }
-        return OneReadMapping(sortedEdges, illumina_gaps, real_length, used_seed_count);
+        return AddGapDescriptions(start_clusters,end_clusters, sortedEdges, s, block_gap_closer, used_seed_count);
     }
 
     std::pair<int, int> GetPathLimits(const KmerCluster<Graph> &a,
@@ -688,7 +674,6 @@ public:
             not_found = (distance_it == distance_cashed.end());
         }
         if (not_found) {
-//TODO: constants
             omnigraph::DijkstraHelper<debruijn_graph::Graph>::BoundedDijkstra dijkstra(
                     omnigraph::DijkstraHelper<debruijn_graph::Graph>::CreateBoundedDijkstra(g_, pb_config_.max_path_in_dijkstra, pb_config_.max_vertex_in_dijkstra));
             dijkstra.Run(start_v);
@@ -696,36 +681,34 @@ public:
                 result = dijkstra.GetDistance(end_v);
             }
 #pragma omp critical(pac_index)
-        {
-            distance_it = distance_cashed.insert({vertex_pair, result}).first;
-        }
+            {
+                distance_it = distance_cashed.insert({vertex_pair, result}).first;
+            }
         } else {
             DEBUG("taking from cashed");
         }
 
-
         result = distance_it->second;
         DEBUG (result);
         if (result == size_t(-1)) {
             return 0;
         }
         //TODO: Serious optimization possible
-
+        int near_to_cluster_end = 500;
         for (auto a_iter = a.sorted_positions.begin();
                 a_iter != a.sorted_positions.end(); ++a_iter) {
-            if (a_iter - a.sorted_positions.begin() > 500 &&  a.sorted_positions.end() - a_iter >500) continue;
+            if (a_iter - a.sorted_positions.begin() > near_to_cluster_end &&  a.sorted_positions.end() - a_iter > near_to_cluster_end) continue;
             int cnt = 0;
             for (auto b_iter = b.sorted_positions.begin();
-                    b_iter != b.sorted_positions.end() && cnt <500; ++b_iter, cnt ++) {
-                if (similar_in_graph(*a_iter, *b_iter,
-                            (int) (result + addition))) {
+                    b_iter != b.sorted_positions.end() && cnt < near_to_cluster_end; ++b_iter, cnt ++) {
+                if (similar_in_graph(*a_iter, *b_iter, (int) (result + addition))) {
                     return 1;
                 }
             }
             cnt = 0;
-            if (b.sorted_positions.size() > 500) {
+            if ( (int) b.sorted_positions.size() > near_to_cluster_end) {
                 for (auto b_iter = b.sorted_positions.end() - 1;
-                                        b_iter != b.sorted_positions.begin() && cnt < 500; --b_iter, cnt ++) {
+                                        b_iter != b.sorted_positions.begin() && cnt < near_to_cluster_end; --b_iter, cnt ++) {
                     if (similar_in_graph(*a_iter, *b_iter,
                                 (int) (result + addition))) {
                         return 1;
@@ -733,9 +716,7 @@ public:
                 }
             }
         }
-
         return 0;
-
     }
 
     string PathToString(const vector<EdgeId>& path) const {
@@ -747,7 +728,7 @@ public:
         }
         return res;
     }
-
+//TODO this should be replaced by Dijkstra based graph-read alignment
     vector<EdgeId> BestScoredPath(const Sequence &s, VertexId start_v, VertexId end_v,
                                   int path_min_length, int path_max_length,
                                   int start_pos, int end_pos, string &s_add,
@@ -763,7 +744,7 @@ public:
         int s_len = int(s.size());
         string seq_string = s.Subseq(start_pos, min(end_pos + 1, s_len)).str();
         size_t best_path_ind = paths.size();
-        size_t best_score = 1000000000;
+        int best_score = STRING_DIST_INF;
         DEBUG("need to find best scored path between "<<paths.size()<<" , seq_len " << seq_string.length());
         if (paths.size() == 0) {
             DEBUG ("no paths");
@@ -785,7 +766,7 @@ public:
                     DEBUG(g_.int_id(*j_iter));
                 }
             }
-            size_t cur_score = StringDistance(cur_string, seq_string);
+            int cur_score = StringDistance(cur_string, seq_string);
             if (paths.size() > 1 && paths.size() < 10) {
                 DEBUG("score: "<< cur_score);
             }
@@ -795,7 +776,7 @@ public:
             }
         }
         DEBUG(best_score);
-        if (best_score == 1000000000)
+        if (best_score == STRING_DIST_INF)
             return vector<EdgeId>(0);
         if (paths.size() > 1 && paths.size() < 10) {
             DEBUG("best score found! Path " <<best_path_ind <<" score "<< best_score);
@@ -837,8 +818,8 @@ public:
         for (auto iter = largest_clusters.begin(); iter != largest_clusters.end(); ++iter) {
             auto first_cluster = iter->second.sorted_positions[iter->second.first_trustable_index];
             auto last_cluster = iter->second.sorted_positions[iter->second.last_trustable_index];
-            omnigraph::MappingRange range(omnigraph::Range(first_cluster.read_position, last_cluster.read_position),
-                                          omnigraph::Range(first_cluster.edge_position, last_cluster.edge_position));
+            omnigraph::MappingRange range(Range(first_cluster.read_position, last_cluster.read_position),
+                                          Range(first_cluster.edge_position, last_cluster.edge_position));
             result.join({iter->second.edgeId, range});
         }
 
@@ -861,7 +842,7 @@ public:
 };
 
 template<class Graph>
-typename PacBioMappingIndex<Graph>::MappingDescription PacBioMappingIndex<Graph>::Locate(const Sequence &s) const {
+typename PacBioMappingIndex<Graph>::MappingDescription PacBioMappingIndex<Graph>::GetSeedsFromRead(const Sequence &s) const {
     MappingDescription res;
     //WARNING: removed read_count from here to make const methods
     int local_read_count = 0;
@@ -881,12 +862,14 @@ typename PacBioMappingIndex<Graph>::MappingDescription PacBioMappingIndex<Graph>
         auto keys = tmp_index.get(kwh);
         TRACE("Valid key, size: "<< keys.size());
 
+        int quality = (int) keys.size();
+        if (quality > 1000) {
+            DEBUG ("Ignoring repretive kmer")
+            continue;
+        }
         for (auto iter = keys.begin(); iter != keys.end(); ++iter) {
 
-            int quality = (int) keys.size();
             TRACE("and quality:" << quality);
-            if (banned_kmers.find(Sequence(kwh.key())) != banned_kmers.end())
-                continue;
             int offset = (int)iter->offset;
             int s_stretched = int ((double)s.size() * 1.2 + 50);
             int edge_len = int(g_.length(iter->edge_id));
diff --git a/src/common/modules/alignment/pacbio/pacbio_read_structures.hpp b/src/common/modules/alignment/pacbio/pacbio_read_structures.hpp
index 6ae4b7a..30f293c 100644
--- a/src/common/modules/alignment/pacbio/pacbio_read_structures.hpp
+++ b/src/common/modules/alignment/pacbio/pacbio_read_structures.hpp
@@ -7,14 +7,15 @@
 
 #pragma once
 
-#include "utils/indices/perfect_hash_map.hpp"
-#include "common/modules/alignment/sequence_mapper.hpp"
-#include "common/assembly_graph/core/graph.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
+#include "modules/alignment/sequence_mapper.hpp"
+#include "assembly_graph/core/graph.hpp"
 #include <algorithm>
 #include <map>
 #include <set>
 
 namespace pacbio {
+static const int STRING_DIST_INF = 1e8;
 typedef omnigraph::GapDescription<debruijn_graph::Graph> GapDescription;
 
 template<class T>
@@ -149,55 +150,6 @@ private:
     ;
 };
 
-//template<class Graph>
-//struct GapDescription {
-//    typedef typename Graph::EdgeId EdgeId;
-//    EdgeId start, end;
-//    Sequence gap_seq;
-//    int edge_gap_start_position, edge_gap_end_position;
-//
-//
-//    GapDescription(EdgeId start_e, EdgeId end_e, const Sequence &gap, int gap_start, int gap_end) :
-//            start(start_e), end(end_e), gap_seq(gap.str()), edge_gap_start_position(gap_start), edge_gap_end_position(gap_end) {
-//    }
-//
-//    GapDescription(const KmerCluster<Graph> &a, const KmerCluster<Graph> & b, Sequence read, int pacbio_k) {
-//        edge_gap_start_position = a.sorted_positions[a.last_trustable_index].edge_position;
-//        edge_gap_end_position = b.sorted_positions[b.first_trustable_index].edge_position + pacbio_k - 1;
-//        start = a.edgeId;
-//        end = b.edgeId;
-//        DEBUG(read.str());
-//        gap_seq = read.Subseq(a.sorted_positions[a.last_trustable_index].read_position,
-//                              b.sorted_positions[b.first_trustable_index].read_position + pacbio_k - 1);
-//        DEBUG(gap_seq.str());
-//        DEBUG("gap added");
-//    }
-//
-//    GapDescription<Graph> conjugate(Graph &g, int shift) const {
-//        GapDescription<Graph> res(
-//                g.conjugate(end), g.conjugate(start), (!gap_seq),
-//                (int) g.length(end) + shift - edge_gap_end_position,
-//                (int) g.length(start) + shift - edge_gap_start_position);
-//         DEBUG("conjugate created" << res.str(g));
-//         return res;
-//    }
-//
-//    string str(Graph &g) const {
-//        stringstream s;
-//        s << g.int_id(start) << " " << edge_gap_start_position <<endl << g.int_id(end) << " " << edge_gap_end_position << endl << gap_seq.str()<< endl;
-//        return s.str();
-//    }
-//
-//    bool operator <(const GapDescription& b) const {
-//        return (start < b.start || (start == b.start &&  end < b.end) ||
-//                (start == b.start &&  end == b.end && edge_gap_start_position < b.edge_gap_start_position));
-//    }
-//
-//private:
-//    DECL_LOGGER("PacIndex")
-//    ;
-//}
-
 struct StatsCounter{
     map<size_t,size_t> path_len_in_edges;
     vector<size_t> subreads_length;
@@ -268,9 +220,9 @@ inline int StringDistance(string &a, string &b) {
         int high = min(min(b_len, i + d + 1), i + a_len - b_len + d + 1);
         TRACE(low << " " <<high);
         for (int j = low; j < high; j++)
-            table[i][j] = 1000000;
+            table[i][j] = STRING_DIST_INF;
     }
-    table[a_len - 1][b_len - 1] = 1000000;
+    table[a_len - 1][b_len - 1] = STRING_DIST_INF;
     table[0][0] = 0;
 //free deletions on begin
 //      for(int j = 0; j < b_len; j++)
diff --git a/src/common/modules/alignment/rna/ss_coverage.hpp b/src/common/modules/alignment/rna/ss_coverage.hpp
new file mode 100644
index 0000000..412e4c2
--- /dev/null
+++ b/src/common/modules/alignment/rna/ss_coverage.hpp
@@ -0,0 +1,61 @@
+//
+// Created by andrey on 22.05.17.
+//
+
+#pragma once
+
+#include <assembly_graph/core/graph.hpp>
+
+namespace debruijn_graph {
+
+
+class SSCoverageStorage {
+public:
+    typedef std::unordered_map<EdgeId, double> InnerMap;
+
+private:
+    const Graph& g_;
+
+    InnerMap storage_;
+
+public:
+    SSCoverageStorage(const Graph& g): g_(g), storage_() {}
+
+    double GetCoverage(EdgeId e, bool reverse = false) const {
+        if (reverse) {
+            e = g_.conjugate(e);
+        }
+
+        auto it = storage_.find(e);
+        if (it == storage_.end())
+            return 0.0;
+        return it->second;
+    }
+
+    void IncreaseKmerCount(EdgeId e, size_t count, bool add_reverse = false) {
+        storage_[e] += (double) count;
+        if (add_reverse)
+            storage_[g_.conjugate(e)] += (double) count;
+    }
+
+    void Clear() {
+        storage_.clear();
+    }
+
+    void RecalculateCoverage() {
+        for(auto& it : storage_) {
+            it.second = it.second / double(g_.length(it.first));
+        }
+    }
+
+    InnerMap::const_iterator begin() const {
+        return storage_.begin();
+    }
+
+    InnerMap::const_iterator end() const {
+        return storage_.end();
+    }
+};
+
+
+}
\ No newline at end of file
diff --git a/src/common/modules/alignment/rna/ss_coverage_filler.hpp b/src/common/modules/alignment/rna/ss_coverage_filler.hpp
new file mode 100644
index 0000000..e7f56c3
--- /dev/null
+++ b/src/common/modules/alignment/rna/ss_coverage_filler.hpp
@@ -0,0 +1,62 @@
+//
+// Created by andrey on 23.05.17.
+//
+
+#pragma once
+
+#include <modules/alignment/sequence_mapper_notifier.hpp>
+#include "modules/alignment/rna/ss_coverage.hpp"
+
+namespace debruijn_graph {
+
+class SSCoverageFiller: public SequenceMapperListener {
+private:
+    const Graph& g_;
+
+    SSCoverageStorage& storage_;
+
+    std::vector<SSCoverageStorage> tmp_storages_;
+
+    bool symmetric_;
+
+    void ProcessRange(size_t thread_index, const MappingPath<EdgeId>& read) {
+        for (size_t i = 0; i < read.size(); ++i) {
+            const auto& range = read[i].second;
+            size_t kmer_count = range.mapped_range.end_pos - range.mapped_range.start_pos;
+            tmp_storages_[thread_index].IncreaseKmerCount(read[i].first, kmer_count, symmetric_);
+        }
+    }
+public:
+    SSCoverageFiller(const Graph& g, SSCoverageStorage& storage, bool symmertic = false):
+        g_(g), storage_(storage), tmp_storages_(), symmetric_(symmertic) {}
+
+    void StartProcessLibrary(size_t threads_count) override {
+        tmp_storages_.clear();
+
+        for (size_t i = 0; i < threads_count; ++i) {
+            tmp_storages_.emplace_back(g_);
+        }
+    }
+
+    void StopProcessLibrary() override {
+        for (auto& storage : tmp_storages_)
+            storage.Clear();
+        storage_.RecalculateCoverage();
+    }
+
+    void ProcessSingleRead(size_t thread_index, const io::SingleRead& /* r */, const MappingPath<EdgeId>& read) override {
+        ProcessRange(thread_index, read);
+    }
+
+    void ProcessSingleRead(size_t thread_index, const io::SingleReadSeq& /* r */, const MappingPath<EdgeId>& read) override {
+        ProcessRange(thread_index, read);
+    }
+
+    void MergeBuffer(size_t thread_index) override {
+        for (const auto& it : tmp_storages_[thread_index])
+            storage_.IncreaseKmerCount(it.first, size_t(it.second));
+        tmp_storages_[thread_index].Clear();
+    }
+};
+
+}
\ No newline at end of file
diff --git a/src/common/modules/alignment/sequence_mapper.hpp b/src/common/modules/alignment/sequence_mapper.hpp
index 7572fb6..d40ecd0 100644
--- a/src/common/modules/alignment/sequence_mapper.hpp
+++ b/src/common/modules/alignment/sequence_mapper.hpp
@@ -10,19 +10,18 @@
 #include "assembly_graph/paths/mapping_path.hpp"
 #include "assembly_graph/paths/path_processor.hpp"
 #include "sequence/sequence_tools.hpp"
-#include "common/assembly_graph/core/basic_graph_stats.hpp"
+#include "assembly_graph/core/basic_graph_stats.hpp"
 
 #include "edge_index.hpp"
 #include "kmer_mapper.hpp"
 
 #include <cstdlib>
-#include "common/assembly_graph/core/basic_graph_stats.hpp"
+#include "assembly_graph/core/basic_graph_stats.hpp"
 
 namespace debruijn_graph {
 using omnigraph::MappingPath;
 using omnigraph::Path;
 using omnigraph::MappingRange;
-using omnigraph::Range;
 
 template<class Graph>
 MappingPath<typename Graph::EdgeId> ConjugateMapping(const Graph& g, 
diff --git a/src/common/modules/alignment/sequence_mapper_notifier.hpp b/src/common/modules/alignment/sequence_mapper_notifier.hpp
index 35120e2..ee04bdd 100644
--- a/src/common/modules/alignment/sequence_mapper_notifier.hpp
+++ b/src/common/modules/alignment/sequence_mapper_notifier.hpp
@@ -8,12 +8,12 @@
 #ifndef SEQUENCE_MAPPER_NOTIFIER_HPP_
 #define SEQUENCE_MAPPER_NOTIFIER_HPP_
 
-#include "utils/memory_limit.hpp"
 #include "sequence_mapper.hpp"
 #include "short_read_mapper.hpp"
 #include "io/reads/paired_read.hpp"
 #include "io/reads/read_stream_vector.hpp"
 #include "pipeline/graph_pack.hpp"
+#include "common/utils/perf/memory_limit.hpp"
 
 #include <vector>
 #include <cstdlib>
@@ -43,14 +43,13 @@ class SequenceMapperNotifier {
 public:
     typedef SequenceMapper<conj_graph_pack::graph_t> SequenceMapperT;
 
-    SequenceMapperNotifier(const conj_graph_pack& gp)
-            : gp_(gp) { }
+    typedef std::vector<SequenceMapperListener*> ListenersContainer;
+
+    SequenceMapperNotifier(const conj_graph_pack& gp, size_t lib_count)
+            : gp_(gp), listeners_(lib_count) { }
 
     void Subscribe(size_t lib_index, SequenceMapperListener* listener) {
-        while ((int)lib_index >= (int)listeners_.size() - 1) {
-            std::vector<SequenceMapperListener*> vect;
-            listeners_.push_back(vect);
-        }
+        VERIFY(lib_index < listeners_.size());
         listeners_[lib_index].push_back(listener);
     }
 
@@ -63,7 +62,7 @@ public:
         streams.reset();
         NotifyStartProcessLibrary(lib_index, threads_count);
         size_t counter = 0, n = 15;
-        size_t fmem = get_free_memory();
+        size_t fmem = utils::get_free_memory();
 
         #pragma omp parallel for num_threads(threads_count) shared(counter)
         for (size_t i = 0; i < streams.size(); ++i) {
@@ -74,7 +73,7 @@ public:
                 if (size == BUFFER_SIZE || 
                     // Stop filling buffer if the amount of available is smaller
                     // than half of free memory.
-                    (10 * get_free_memory() / 4 < fmem && size > 10000)) {
+                    (10 * utils::get_free_memory() / 4 < fmem && size > 10000)) {
                     #pragma omp critical
                     {
                         counter += size;
diff --git a/src/common/modules/alignment/short_read_mapper.hpp b/src/common/modules/alignment/short_read_mapper.hpp
index db9e564..3e812fd 100644
--- a/src/common/modules/alignment/short_read_mapper.hpp
+++ b/src/common/modules/alignment/short_read_mapper.hpp
@@ -9,7 +9,7 @@
 
 
 #include "sequence_mapper.hpp"
-#include "common/modules/alignment/pacbio/pac_index.hpp"
+#include "modules/alignment/pacbio/pac_index.hpp"
 #include "modules/alignment/bwa_sequence_mapper.hpp"
 
 namespace debruijn_graph {
diff --git a/src/common/utils/coverage_model/CMakeLists.txt b/src/common/modules/coverage_model/CMakeLists.txt
similarity index 100%
rename from src/common/utils/coverage_model/CMakeLists.txt
rename to src/common/modules/coverage_model/CMakeLists.txt
diff --git a/src/common/utils/coverage_model/kmer_coverage_model.cpp b/src/common/modules/coverage_model/kmer_coverage_model.cpp
similarity index 99%
rename from src/common/utils/coverage_model/kmer_coverage_model.cpp
rename to src/common/modules/coverage_model/kmer_coverage_model.cpp
index ce77e11..fa2eecb 100644
--- a/src/common/utils/coverage_model/kmer_coverage_model.cpp
+++ b/src/common/modules/coverage_model/kmer_coverage_model.cpp
@@ -27,7 +27,6 @@
 #include <cstddef>
 #include <cmath>
 
-namespace utils {
 namespace coverage_model {
 
 using std::isfinite;
@@ -377,4 +376,3 @@ void KMerCoverageModel::Fit() {
 }
 
 }
-}
diff --git a/src/common/utils/coverage_model/kmer_coverage_model.hpp b/src/common/modules/coverage_model/kmer_coverage_model.hpp
similarity index 98%
rename from src/common/utils/coverage_model/kmer_coverage_model.hpp
rename to src/common/modules/coverage_model/kmer_coverage_model.hpp
index 2268262..cd8aaa3 100644
--- a/src/common/utils/coverage_model/kmer_coverage_model.hpp
+++ b/src/common/modules/coverage_model/kmer_coverage_model.hpp
@@ -10,7 +10,6 @@
 #include <vector>
 #include <cstddef>
 
-namespace utils {
 namespace coverage_model {
 
 class KMerCoverageModel {
@@ -45,4 +44,3 @@ private:
 };
 
 }
-}
diff --git a/src/common/modules/genome_consistance_checker.cpp b/src/common/modules/genome_consistance_checker.cpp
index ac40130..cfd0697 100644
--- a/src/common/modules/genome_consistance_checker.cpp
+++ b/src/common/modules/genome_consistance_checker.cpp
@@ -1,174 +1,315 @@
 #include "modules/genome_consistance_checker.hpp"
+#include "modules/path_extend/paired_library.hpp"
 #include "assembly_graph/core/graph.hpp"
 #include <algorithm>
+#include <numeric>
 #include <limits>
+
 namespace debruijn_graph {
 using omnigraph::MappingRange;
 using namespace std;
 
 //gap or overlap size. WITHOUT SIGN!
-static size_t gap(const Range &a, const Range &b) {
-    return max(a.end_pos, b.start_pos) - min (a.end_pos, b.start_pos);
+size_t AbsGap(const Range &a, const Range &b) {
+    return max(a.end_pos, b.start_pos) - min(a.end_pos, b.start_pos);
 }
-bool GenomeConsistenceChecker::consequent(const Range &mr1, const Range &mr2) const{
-    if (mr1.end_pos > mr2.start_pos + absolute_max_gap_)
-        return false;
-    if (mr1.end_pos + absolute_max_gap_ < mr2.start_pos)
-        return false;
-    return true;
 
-}
-bool GenomeConsistenceChecker::consequent(const MappingRange &mr1, const MappingRange &mr2) const {
+bool GenomeConsistenceChecker::Consequent(const MappingRange &mr1, const MappingRange &mr2) const {
     //do not want to think about handling gaps near 0 position.
-    if (!consequent(mr1.initial_range, mr2.initial_range) || !consequent(mr1.mapped_range, mr2.mapped_range))
-        return false;
-    size_t initial_gap = gap(mr1.initial_range, mr2.initial_range);
-    size_t mapped_gap = gap(mr1.mapped_range, mr2.mapped_range);
-    size_t max_gap = max(initial_gap, mapped_gap);
-    if ( max_gap > relative_max_gap_* double (max (min(mr1.initial_range.size(), mr1.mapped_range.size()), min(mr2.initial_range.size(), mr2.mapped_range.size()))))
+    size_t max_gap = max(AbsGap(mr1.initial_range, mr2.initial_range),
+                         AbsGap(mr1.mapped_range, mr2.mapped_range));
+
+    if (max_gap > absolute_max_gap_)
         return false;
-    return true;
+    size_t len = max(min(mr1.initial_range.size(), mr1.mapped_range.size()),
+                     min(mr2.initial_range.size(), mr2.mapped_range.size()));
+    return max_gap <= size_t(math::round(relative_max_gap_* double(len)));
 }
 
 PathScore GenomeConsistenceChecker::CountMisassemblies(const BidirectionalPath &path) const {
-    PathScore straight = CountMisassembliesWithStrand(path, "0");
-    PathScore reverse = CountMisassembliesWithStrand(path, "1");
+    PathScore score = InternalCountMisassemblies(path);
+    if (path.Size() == 0) {
+        WARN ("0 length path in GCChecker!!!");
+        return PathScore(0,0,0);
+    }
     size_t total_length = path.LengthAt(0);
 //TODO: constant;
-    if (total_length > std::max(straight.mapped_length, reverse.mapped_length) * 2) {
-        if (total_length > 10000) {
+    if (total_length > score.mapped_length * 2) {
+        if (total_length > SIGNIFICANT_LENGTH_LOWER_LIMIT) {
             INFO ("For path length " << total_length <<" mapped less than half of the path, skipping");
         }
         return PathScore(0,0,0);
     } else {
-        if (straight.mapped_length > reverse.mapped_length) {
-            return straight;
-        } else {
-            return reverse;
-        }
+        return score;
     }
 }
 
-vector<pair<EdgeId, MappingRange> > GenomeConsistenceChecker::ConstructEdgeOrder() const {
-    vector<pair<EdgeId, MappingRange> > to_sort;
-    for(auto e: storage_) {
-        if (excluded_unique_.find(e) == excluded_unique_.end() ) {
-            set<MappingRange> mappings = gp_.edge_pos.GetEdgePositions(e, "fxd0");
-            if (mappings.size() > 1) {
-                INFO("edge " << e << "smth strange");
-            } else if (mappings.size() == 0) {
-                continue;
-            } else {
-                to_sort.push_back(make_pair(e, *mappings.begin()));
-            }
+MappingPath<EdgeId> GenomeConsistenceChecker::ConstructEdgeOrder(const string& chr_name) const {
+    vector<pair<EdgeId, MappingRange>> to_sort;
+    DEBUG ("constructing edge order for chr " << chr_name);
+    for (auto e: storage_) {
+        set<MappingRange> mappings = gp_.edge_pos.GetEdgePositions(e, chr_name);
+        VERIFY_MSG(mappings.size() <= 1, "Presumably unique edge " << e << " with multiple mappings!");
+        if (!mappings.empty()) {
+            to_sort.push_back(make_pair(e, *mappings.begin()));
         }
     }
-    sort(to_sort.begin(), to_sort.end(), [](const pair<EdgeId, MappingRange> & a, const pair<EdgeId, MappingRange> & b) -> bool
-    {
+    DEBUG("Sorting " << to_sort << " positions:");
+    sort(to_sort.begin(), to_sort.end(),
+         [](const pair<EdgeId, MappingRange> & a, const pair<EdgeId, MappingRange> & b) {
         return a.second.initial_range.start_pos < b.second.initial_range.start_pos;
-    }
-    );
-    return to_sort;
+    });
+    return MappingPathT(to_sort);
 }
 
+void GenomeConsistenceChecker::ReportEdge(EdgeId e, double w) const{
+    INFO("Edge " << gp_.g.int_id(e) << " weight " << w << " len " << gp_.g.length(e) << " cov " << gp_.g.coverage(e));
+    if (!genome_info_.Multiplicity(e)) {
+        INFO(" no chromosome position");
+    } else {
+        auto info = genome_info_.UniqueChromosomeIdx(e);
+        INFO ("Chromosome " << info.first << " index " << info.second);
+    }
+}
 
-void GenomeConsistenceChecker::SpellGenome() {
+void GenomeConsistenceChecker::ReportVariants(vector<pair<double, EdgeId>> &sorted_w) const {
+    sort(sorted_w.rbegin(), sorted_w.rend());
     size_t count = 0;
-    auto to_sort = ConstructEdgeOrder();
-    vector<size_t> starts;
-    vector<size_t> ends;
-    for(size_t i = 0; i <to_sort.size(); i++) {
-        if (i > 0 && to_sort[i].second.initial_range.start_pos - to_sort[i-1].second.initial_range.end_pos > storage_.GetMinLength() ) {
-            INFO ("Large gap " << to_sort[i].second.initial_range.start_pos - to_sort[i-1].second.initial_range.end_pos );
-            starts.push_back(to_sort[i].second.initial_range.start_pos);
-            ends.push_back(to_sort[i-1].second.initial_range.end_pos);
-        }
-        if (i == 0) {
-            starts.push_back(to_sort[i].second.initial_range.start_pos);
+    double additional_weight = 0;
+    size_t reporting = 4;
+    for (const auto pair: sorted_w) {
+        if (count == 0) {
+            INFO("First candidate:");
         }
-        if (i == to_sort.size() - 1){
-            ends.push_back(to_sort[i].second.initial_range.end_pos);
+        if (count < reporting) {
+            ReportEdge(pair.second, pair.first);
+        } else {
+            additional_weight += pair.first;
         }
-        INFO("edge " << gp_.g.int_id(to_sort[i].first) << " length "<< gp_.g.length(to_sort[i].first) <<
-                     " coverage " << gp_.g.coverage(to_sort[i].first) << " mapped to " << to_sort[i].second.mapped_range.start_pos
-             << " - " << to_sort[i].second.mapped_range.end_pos << " init_range " << to_sort[i].second.initial_range.start_pos << " - " << to_sort[i].second.initial_range.end_pos );
-        genome_spelled_[to_sort[i].first] = count;
         count++;
     }
-    vector<size_t> lengths;
-    size_t total_len = 0;
-    for (size_t i = 0; i < starts.size(); i++) {
-        lengths.push_back(ends[i] - starts[i]);
-        total_len += lengths[i];
+    if (reporting < sorted_w.size()) {
+        INFO("Additional weight " << additional_weight << " of " << sorted_w.size() - reporting <<
+             " candidates");
     }
-    sort(lengths.begin(), lengths.end());
-    reverse(lengths.begin(), lengths.end());
-    size_t cur = 0;
-    size_t i = 0;
-    while (cur < total_len / 2 && i < lengths.size()) {
-        cur += lengths[i];
-        i++;
+    if (sorted_w.size() == 0) {
+        INFO("No uniqueness info");
     }
-    INFO("Assuming gaps of length > " << storage_.GetMinLength() << " unresolvable..");
-    if (lengths.size() > 0)
-        INFO("Rough estimates on N50/L50:" << lengths[i - 1] << " / " << i - 1 << " with len " << total_len);
 }
 
-PathScore GenomeConsistenceChecker::CountMisassembliesWithStrand(const BidirectionalPath &path, const string strand) const {
-    if (strand == "1") {
-        return (CountMisassembliesWithStrand(*path.GetConjPath(), "0"));
+void GenomeConsistenceChecker::ReportPathEndByLongLib(const path_extend::BidirectionalPathSet &covering_paths,
+                                                      EdgeId current_edge) const {
+    vector<pair<double, EdgeId>> sorted_w;
+    for (const auto & cov_path: covering_paths) {
+        double w = cov_path->GetWeight();
+        map<EdgeId, double> next_weigths;
+        if (math::gr(w, 1.0)) {
+            for (size_t p_ind = 0; p_ind < cov_path->Size(); p_ind++) {
+                if (cov_path->At(p_ind) == current_edge) {
+                    for (size_t p_ind2  = p_ind + 1; p_ind2 < cov_path->Size(); p_ind2++) {
+                        if (gp_.g.length(cov_path->At(p_ind2)) >= storage_.min_length() ) {
+                            next_weigths[cov_path->At(p_ind2)] += w;
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+        for (const auto &p: next_weigths) {
+            sorted_w.push_back(make_pair(p.second, p.first));
+        }
     }
-    PathScore res(0, 0, 0);
-    EdgeId prev;
-    size_t prev_in_genome = std::numeric_limits<std::size_t>::max();
-    size_t prev_in_path = std::numeric_limits<std::size_t>::max();
-    MappingRange prev_range;
-    for (int i = 0; i < (int) path.Size(); i++) {
-        if (genome_spelled_.find(path.At(i)) != genome_spelled_.end()) {
-            size_t cur_in_genome =  genome_spelled_[path.At(i)];
-            MappingRange cur_range = *gp_.edge_pos.GetEdgePositions(path.At(i), "fxd0").begin();
-            if (prev_in_genome != std::numeric_limits<std::size_t>::max()) {
-                if (cur_in_genome == prev_in_genome + 1) {
-                    int dist_in_genome = (int) cur_range.initial_range.start_pos -  (int) prev_range.initial_range.end_pos;
-                    int dist_in_path = (int) path.LengthAt(prev_in_path) - (int) path.LengthAt(i) +  (int) cur_range.mapped_range.start_pos - (int) prev_range.mapped_range.end_pos;
-                    DEBUG("Edge " << prev.int_id() << "  position in genome ordering: " << prev_in_genome);
-                    DEBUG("Gap in genome / gap in path: " << dist_in_genome << " / " << dist_in_path);
-                    if (size_t(abs(dist_in_genome - dist_in_path)) > absolute_max_gap_ && (dist_in_genome * (1 + relative_max_gap_) < dist_in_path || dist_in_path * (1 + relative_max_gap_) < dist_in_genome)) {
-
-                        res.wrong_gap_size ++;
+    INFO("Looking on long reads, last long edge: ");
+    ReportVariants(sorted_w);
+}
+
+void GenomeConsistenceChecker::ReportPathEndByPairedLib(const shared_ptr<path_extend::PairedInfoLibrary> paired_lib,
+                                                        EdgeId current_edge) const {
+    vector<pair<double, EdgeId>> sorted_w;
+    set<EdgeId> result;
+    paired_lib->FindJumpEdges(current_edge, result, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(),
+                              storage_.min_length());
+    for (const auto e: result) {
+        double w = paired_lib->CountPairedInfo(current_edge, e, std::numeric_limits<int>::min(),
+                                               std::numeric_limits<int>::max());
+        if (math::gr(w, 1.0))
+            sorted_w.push_back(make_pair(w, e));
+    }
+    INFO("Looking on lib IS " << paired_lib->GetIS());
+    ReportVariants(sorted_w);
+}
+
+void GenomeConsistenceChecker::CheckPathEnd(const BidirectionalPath &path) const {
+    for (int i =  (int)path.Size() - 1; i >= 0; --i) {
+        if (storage_.IsUnique(path.At(i))) {
+            EdgeId current_edge = path.At(i);
+            if (genome_info_.Multiplicity(current_edge)) {
+                const auto &chr_info = genome_info_.UniqueChromosomeInfo(current_edge);
+                size_t index = chr_info.UniqueEdgeIdx(current_edge);
+                if (index == 0 || index == chr_info.size()) {
+                    DEBUG("Path length " << path.Length() << " ended at the chromosome " << chr_info.name()
+                          << (index == 0 ? " start": " end"));
+                    return;
+                }
+            }
+            INFO("Path length " << path.Length() << " ended, last unique: ");
+            ReportEdge(current_edge, -1.0);
+            for (size_t lib_index = 0; lib_index < reads_.lib_count(); ++lib_index) {
+                const auto &lib = reads_[lib_index];
+                if (lib.is_paired()) {
+                    shared_ptr<path_extend::PairedInfoLibrary> paired_lib;
+                    if (lib.is_mate_pair())
+                        paired_lib = path_extend::MakeNewLib(gp_.g, lib, gp_.paired_indices[lib_index]);
+                    else if (lib.type() == io::LibraryType::PairedEnd)
+                        paired_lib = path_extend::MakeNewLib(gp_.g, lib, gp_.clustered_indices[lib_index]);
+                    ReportPathEndByPairedLib(paired_lib, current_edge);
+                } else if (lib.is_long_read_lib()) {
+                    ReportPathEndByLongLib(long_reads_cov_map_[lib_index].GetCoveringPaths(current_edge), current_edge);
+                }
+            }
+            return;
+        }
+    }
+}
+
+size_t GenomeConsistenceChecker::GetSupportingPathCount(EdgeId e1, EdgeId e2, size_t lib_index) const {
+    auto covering_paths = long_reads_cov_map_[lib_index].GetCoveringPaths(e1);
+    size_t res = 0;
+    for (const auto & cov_path: covering_paths) {
+        double w = cov_path->GetWeight();
+        if (math::gr(w, 1.0)) {
+            for (size_t p_ind = 0; p_ind < cov_path->Size(); p_ind++) {
+                if (cov_path->At(p_ind) == e1) {
+                    for (size_t p_ind2 = p_ind + 1; p_ind2 < cov_path->Size(); p_ind2++) {
+                        if (storage_.IsUnique(cov_path->At(p_ind2))) {
+                            if (e2 == cov_path->At(p_ind2))
+                                res += size_t(w);
+                            break;
+                        }
                     }
-                } else {
-                    if (path.At(i) != circular_edge_ && path.At(prev_in_path) != circular_edge_)
-                        res.misassemblies++;
-                    else
-                        INFO("Skipping fake(circular) misassembly");
+                    break;
                 }
             }
-            res.mapped_length += cur_range.mapped_range.size();
-            prev = path.At(i);
-            prev_in_genome = cur_in_genome;
-            prev_range = cur_range;
-            prev_in_path = i;
         }
     }
-    if (prev_in_path != std::numeric_limits<std::size_t>::max())
-        DEBUG("Edge " << prev.int_id() << "  position in genome ordering: " << prev_in_genome);
     return res;
 }
-void GenomeConsistenceChecker::RefillPos() {
-    RefillPos("0");
-    RefillPos("1");
+
+void GenomeConsistenceChecker::PrintMisassemblyInfo(EdgeId e1, EdgeId e2) const {
+    VERIFY(genome_info_.Multiplicity(e1));
+    VERIFY(genome_info_.Multiplicity(e2));
+    const auto &chr_info1 = genome_info_.UniqueChromosomeInfo(e1);
+    const auto &chr_info2 = genome_info_.UniqueChromosomeInfo(e2);
+    size_t ind1 = chr_info1.UniqueEdgeIdx(e1);
+    size_t ind2 = chr_info2.UniqueEdgeIdx(e2);
+//FIXME: checks, compliment_strands;
+    EdgeId true_next = chr_info1.EdgeAt((chr_info1.UniqueEdgeIdx(e1) + 1) % chr_info1.size());
+    EdgeId true_prev = chr_info2.EdgeAt((chr_info2.UniqueEdgeIdx(e2) + chr_info2.size() - 1) % chr_info2.size());
+    INFO("Next genomic edge " << true_next.int_id() << " len " << gp_.g.length(true_next) << " prev " << true_prev.int_id() << " len " << gp_.g.length(true_prev));
+    if (chr_info1.name() == chr_info2.name() && ind1 < ind2) {
+        INFO("Same chromosome large forward jump misassembly");
+    } else if (chr_info1.name() == chr_info2.name() && ind1 > ind2)  {
+        INFO("Backward jump misassembly");
+    } else if (chr_info1.name().substr(1) == chr_info2.name().substr(1))  {
+        string revers = (ind1 + ind2 + 2 > chr_info1.size() ? " backwards " : " forward " );
+        INFO("Inversion" + revers +  "misassembly, chr edge size  " << chr_info1.size());
+    } else if (ind1 + 1 == chr_info1.size()  || ind2 == 0) {
+        string start_end = (ind2 == 0 ? " start " : " end ");
+        INFO("Chromosome " + start_end + " misassembly ");
+    } else {
+        INFO("Something else misassembly");
+    }
+    for (size_t lib_index = 0; lib_index < reads_.lib_count(); ++lib_index) {
+        const auto &lib = reads_[lib_index];
+        if (lib.is_paired()) {
+            shared_ptr<path_extend::PairedInfoLibrary> paired_lib;
+            if (lib.is_mate_pair())
+                paired_lib = path_extend::MakeNewLib(gp_.g, lib, gp_.paired_indices[lib_index]);
+            else if (lib.type() == io::LibraryType::PairedEnd)
+                paired_lib = path_extend::MakeNewLib(gp_.g, lib, gp_.clustered_indices[lib_index]);
+            INFO("for lib " << lib_index << " IS" << paired_lib->GetIS());
+            INFO("Misassembly weight regardless of dists: " << paired_lib->CountPairedInfo(e1, e2, -1000000, 1000000));
+            INFO("Next weight " << paired_lib->CountPairedInfo(e1, true_next, -1000000, 1000000));
+            INFO("Prev weight " << paired_lib->CountPairedInfo(true_prev, e2, -1000000, 1000000));
+        } else if (lib.is_long_read_lib()) {
+            INFO("for lib " << lib_index << " of long reads: ");
+            INFO("Misassembly weight " << GetSupportingPathCount(e1, e2 ,lib_index));
+            INFO("Next weight " << GetSupportingPathCount(e1, true_next ,lib_index) );
+            INFO("Prev weight " << GetSupportingPathCount(true_prev, e2 ,lib_index) );
+
+        }
+    }
 }
 
+void GenomeConsistenceChecker::ClassifyPosition(size_t prev_pos, size_t cur_pos,
+                                                const BidirectionalPath & path, PathScore &res) const{
+    EdgeId cur_e = path.At(cur_pos);
+    const auto& chr_info = genome_info_.UniqueChromosomeInfo(cur_e);
+    size_t cur_in_genome = chr_info.UniqueEdgeIdx(cur_e);
+    string cur_chr = chr_info.name();
+    MappingRange cur_range = gp_.edge_pos.GetUniqueEdgePosition(cur_e, cur_chr);
+    EdgeId prev_e = path.At(prev_pos);
+    const auto& prev_chr_info = genome_info_.UniqueChromosomeInfo(prev_e);
+    size_t prev_in_genome = prev_chr_info.UniqueEdgeIdx(prev_e);
+    string prev_chr = prev_chr_info.name();
+    MappingRange prev_range = gp_.edge_pos.GetUniqueEdgePosition(prev_e, prev_chr);
 
-void GenomeConsistenceChecker::RefillPos(const string &strand) {
-    for (auto e: storage_) {
-        RefillPos(strand, e);
+    res.mapped_length += cur_range.mapped_range.size();
+    if (cur_in_genome == prev_in_genome + 1 && cur_chr == prev_chr) {
+        int dist_in_genome = (int) cur_range.initial_range.start_pos -  (int) prev_range.initial_range.end_pos;
+        int dist_in_path = (int) path.LengthAt(prev_pos) - (int) path.LengthAt(cur_pos) +
+                (int) cur_range.mapped_range.start_pos - (int) prev_range.mapped_range.end_pos;
+        DEBUG("Edge " << prev_e.int_id() << "  position in genome ordering: " << prev_in_genome);
+        DEBUG("Gap in genome / gap in path: " << dist_in_genome << " / " << dist_in_path);
+        if (size_t(abs(dist_in_genome - dist_in_path)) > absolute_max_gap_ &&
+                (dist_in_genome * (1 + relative_max_gap_) < dist_in_path ||
+                        dist_in_path * (1 + relative_max_gap_) < dist_in_genome)) {
+            res.wrong_gap_size ++;
+        }
+    } else {
+        if (cur_chr == prev_chr && (circular_edges_.find(prev_e) != circular_edges_.end() ||
+                                    circular_edges_.find(cur_e) != circular_edges_.end())) {
+            INFO("Skipping fake(circular) misassembly");
+        } else if (cur_in_genome > prev_in_genome && cur_chr == prev_chr
+                && prev_range.initial_range.end_pos + SIGNIFICANT_LENGTH_LOWER_LIMIT > cur_range.initial_range.start_pos) {
+            INFO("Local misassembly between edges: "<<prev_e.int_id() << " and " << cur_e.int_id());
+            size_t total = 0;
+            for (auto j = prev_in_genome + 1; j < cur_in_genome; j ++) {
+                total += gp_.g.length(chr_info.EdgeAt(j));
+            }
+            INFO("Jumped over " << cur_in_genome - prev_in_genome - 1 << " uniques of total length: " << total);
+        } else if (IsCloseToEnd(prev_range, prev_chr_info) && IsCloseToStart(cur_range, chr_info)) {
+            INFO ("Skipping fake misassembly - connected " << prev_chr << " and " << cur_chr);
+        } else {
+            INFO("Extensive misassembly between edges: "<<prev_e.int_id() << " and " << cur_e.int_id());
+            INFO("Ranges: " << prev_range << " and " << cur_range);
+            INFO("Genomic positions: " << prev_in_genome << ", " << prev_chr <<
+                         " and " << cur_in_genome <<", "<< cur_chr<< " resp.");
+            PrintMisassemblyInfo(prev_e, cur_e);
+            res.misassemblies++;
+        }
+    }
+}
+
+
+PathScore GenomeConsistenceChecker::InternalCountMisassemblies(const BidirectionalPath &path) const {
+    PathScore res(0, 0, 0);
+    size_t prev_pos = std::numeric_limits<std::size_t>::max();
+    for (int i = 0; i < (int) path.Size(); i++) {
+//const method, so at instead of []
+        EdgeId e = path.At(i);
+        if (genome_info_.Multiplicity(e)) {
+            if (prev_pos != std::numeric_limits<std::size_t>::max()) {
+                ClassifyPosition(prev_pos, i, path, res);
+            }
+            prev_pos = i;
+        }
     }
+    return res;
 }
 
-void GenomeConsistenceChecker::FindBestRangeSequence(const set<MappingRange>& old_mappings, vector<MappingRange>& used_mappings) const {
-    vector<MappingRange> to_process (old_mappings.begin(), old_mappings.end());
+vector<MappingRange> GenomeConsistenceChecker::FindBestRangeSequence(const set<MappingRange>& mappings) const {
+    vector<MappingRange> to_process(mappings.begin(), mappings.end());
     sort(to_process.begin(), to_process.end(), [](const MappingRange & a, const MappingRange & b) -> bool
     {
         return a.mapped_range.start_pos < b.mapped_range.start_pos;
@@ -177,9 +318,9 @@ void GenomeConsistenceChecker::FindBestRangeSequence(const set<MappingRange>& ol
 //max weight path in orgraph of mappings
     TRACE("constructing mapping graph" << sz << " vertices");
     vector<vector<size_t>> consecutive_mappings(sz);
-    for(size_t i = 0; i < sz; i++) {
+    for (size_t i = 0; i < sz; i++) {
         for (size_t j = i + 1; j < sz; j++) {
-            if (consequent(to_process[i], to_process[j])) {
+            if (Consequent(to_process[i], to_process[j])) {
                 consecutive_mappings[i].push_back(j);
             } else {
                 if (to_process[j].mapped_range.start_pos > to_process[i].mapped_range.end_pos + absolute_max_gap_) {
@@ -189,88 +330,217 @@ void GenomeConsistenceChecker::FindBestRangeSequence(const set<MappingRange>& ol
         }
     }
     vector<size_t> scores(sz), prev(sz);
-    for(size_t i = 0; i < sz; i++) {
+    for (size_t i = 0; i < sz; i++) {
         scores[i] = to_process[i].initial_range.size();
         prev[i] = std::numeric_limits<std::size_t>::max();
     }
-    for(size_t i = 0; i < sz; i++) {
+    for (size_t i = 0; i < sz; i++) {
         for (size_t j = 0; j < consecutive_mappings[i].size(); j++) {
             TRACE(consecutive_mappings[i][j]);
-            if (scores[consecutive_mappings[i][j]] < scores[i] + to_process[consecutive_mappings[i][j]].initial_range.size()) {
-                scores[consecutive_mappings[i][j]] = scores[i] + to_process[consecutive_mappings[i][j]].initial_range.size();
+            if (scores[consecutive_mappings[i][j]] < scores[i]
+                                                     + to_process[consecutive_mappings[i][j]].initial_range.size()) {
+                scores[consecutive_mappings[i][j]] = scores[i]
+                                                     + to_process[consecutive_mappings[i][j]].initial_range.size();
                 prev[consecutive_mappings[i][j]] = i;
             }
         }
     }
     size_t cur_max = 0;
     size_t cur_i = 0;
-    for(size_t i = 0; i < sz; i++) {
+    for (size_t i = 0; i < sz; i++) {
         if (scores[i] > cur_max) {
             cur_max = scores[i];
             cur_i = i;
         }
     }
-    used_mappings.clear();
+
+    vector<MappingRange> answer;
     while (cur_i != std::numeric_limits<std::size_t>::max()) {
-        used_mappings.push_back(to_process[cur_i]);
+        answer.push_back(to_process[cur_i]);
         cur_i = prev[cur_i];
     }
-    reverse(used_mappings.begin(), used_mappings.end());
-};
+    reverse(answer.begin(), answer.end());
+    return answer;
+}
 
-void GenomeConsistenceChecker::RefillPos(const string &strand, const EdgeId &e) {
-    set<MappingRange> old_mappings = gp_.edge_pos.GetEdgePositions(e, strand);
-    TRACE("old mappings sz " << old_mappings.size() );
-    size_t total_mapped = 0;
-    for (auto mp:old_mappings) {
-        total_mapped += mp.initial_range.size();
-    }
-    if (total_mapped  > (double) gp_.g.length(e) * 1.5) {
-       INFO ("Edge " << gp_.g.int_id(e) << "is not unique, excluding");
-       excluded_unique_.insert(e);
-       return;
-    }
-//TODO: support non-unique edges;
-    if (total_mapped  < (double) gp_.g.length(e) * 0.5) {
-        DEBUG ("Edge " << gp_.g.int_id(e) << "is not mapped on strand "<< strand <<", not used");
-        return;
+map<EdgeId, string> GenomeConsistenceChecker::EdgeLabels() const {
+    INFO("Constructing reference labels");
+    map<EdgeId, string> answer;
+    size_t count = 0;
+    for (const auto &chr: genome_info_.Chromosomes()) {
+        const auto &chr_info = genome_info_.ChrInfo(chr);
+        for (size_t pos = 0; pos < chr_info.size(); ++pos) {
+            EdgeId e = chr_info.EdgeAt(pos);
+            auto mr = gp_.edge_pos.GetUniqueEdgePosition(e, chr);
+            VERIFY(!answer.count(e));
+            answer[e] += chr +
+                         "order: " + to_string(count) +
+                         "\n mapped range: " +
+                         to_string(mr.mapped_range.start_pos) + " : "
+                         + to_string(mr.mapped_range.end_pos) +
+                         "\n init range: " +
+                         to_string(mr.initial_range.start_pos) + " : "
+                         + to_string(mr.initial_range.end_pos) + "\n";
+        }
     }
-    TRACE(total_mapped << " " << gp_.g.length(e));
-    string new_strand = "fxd" + strand;
-    vector<MappingRange> used_mappings;
-    FindBestRangeSequence(old_mappings, used_mappings);
+    return answer;
+}
 
-    size_t cur_i = 0;
-    MappingRange new_mapping;
-    new_mapping = used_mappings[cur_i];
-    size_t used_mapped = new_mapping.initial_range.size();
-    TRACE ("Edge " << gp_.g.int_id(e) << " length "<< gp_.g.length(e));
-    TRACE ("new_mapping mp_range "<< new_mapping.mapped_range.start_pos << " - " << new_mapping.mapped_range.end_pos
-         << " init_range " << new_mapping.initial_range.start_pos << " - " << new_mapping.initial_range.end_pos );
-    while (cur_i  < used_mappings.size() - 1) {
-        cur_i ++;
-        used_mapped += used_mappings[cur_i].initial_range.size();
-        new_mapping = new_mapping.Merge(used_mappings[cur_i]);
-        TRACE("new_mapping mp_range "<< new_mapping.mapped_range.start_pos << " - " << new_mapping.mapped_range.end_pos
-             << " init_range " << new_mapping.initial_range.start_pos << " - " << new_mapping.initial_range.end_pos );
-    }
-//used less that 0.9 of aligned length
-    if (total_mapped * 10  >=  used_mapped * 10  + gp_.g.length(e)) {
+void GenomeConsistenceChecker::Fill() {
+    gp_.edge_pos.clear();
+    if (!gp_.edge_pos.IsAttached()) {
+        gp_.edge_pos.Attach();
+    }
+
+    //FIXME set the parameters to something more reasonable
+    EdgesPositionHandler<Graph> tmp_edge_pos(gp_.g, 0, 0);
+    visualization::position_filler::PosFiller<Graph> pos_filler(gp_.g, MapperInstance(gp_), tmp_edge_pos);
+
+    for (const auto &chr: gp_.genome.GetChromosomes()) {
+        pos_filler.Process(chr.sequence, "0_" + chr.name);
+        pos_filler.Process(ReverseComplement(chr.sequence), "1_" + chr.name);
+    }
+
+    for (auto e: storage_) {
+        FillPos(e, tmp_edge_pos);
+    }
+
+    vector<size_t> theoretic_lens;
+    for (const auto &prefix: vector<std::string>{"0_", "1_"}) {
+        for (const auto &chr: gp_.genome.GetChromosomes()) {
+            string label = prefix + chr.name;
+            INFO("Spelling label " << label);
+            auto mapping_path = ConstructEdgeOrder(label);
+            genome_info_.AddInfo(ChromosomeInfo(label, mapping_path));
+            utils::push_back_all(theoretic_lens, MappedRegions(mapping_path));
+        }
+    }
+
+    TheoreticLenStats(theoretic_lens);
+}
+
+void GenomeConsistenceChecker::TheoreticLenStats(vector<size_t> theoretic_lens) const {
+    size_t total_len = std::accumulate(theoretic_lens.begin(), theoretic_lens.end(),
+                                       0, std::plus<size_t>());
+
+    std::sort(theoretic_lens.begin(), theoretic_lens.end());
+    std::reverse(theoretic_lens.begin(), theoretic_lens.end());
+    size_t cur = 0;
+    size_t i = 0;
+    while (cur < total_len / 2) {
+        cur += theoretic_lens[i];
+        i++;
+    }
+    INFO("Assuming gaps of length > " << storage_.min_length() << " unresolvable..");
+    if (theoretic_lens.size() > 0)
+        INFO("Rough estimates on N50/L50:" << theoretic_lens[i - 1] << " / " << i - 1 << " with len " << total_len);
+}
+
+map<string, size_t>
+GenomeConsistenceChecker::TotalAlignedLengths(const EdgesPositionHandler<Graph> &tmp_edge_pos, EdgeId e) const {
+    map<string, size_t> chr2len;
+    for (const auto &edge_pos: tmp_edge_pos.GetEdgePositions(e)) {
+        chr2len[edge_pos.contigId] += edge_pos.mr.initial_range.size();
+    }
+    return chr2len;
+}
+
+vector<size_t> GenomeConsistenceChecker::MappedRegions(const GenomeConsistenceChecker::MappingPathT &mapping_path) const {
+    vector<size_t> mapped_regions;
+    if (mapping_path.size() == 0)
+        return mapped_regions;
+    size_t pos = mapping_path.front().second.initial_range.start_pos;
+    for (size_t i = 0; i < mapping_path.size(); i++) {
+        auto current_range = mapping_path[i].second;
+        INFO("Pos: " << i << " init_range " << current_range.initial_range
+                     << " mapped to edge " << gp_.g.str(mapping_path[i].first)
+                     << " range " << current_range.mapped_range);
+
+        size_t curr_start = current_range.initial_range.start_pos;
+        if (i > 0) {
+            auto prev_range = mapping_path[i - 1].second;
+            size_t prev_end = prev_range.initial_range.end_pos;
+            if (curr_start - prev_end > unresolvable_len_) {
+                INFO ("Large gap " << current_range.initial_range.start_pos -
+                                      prev_range.initial_range.end_pos);
+                mapped_regions.push_back(prev_end - pos);
+                pos = curr_start;
+            }
+        }
+    }
+    mapped_regions.push_back(mapping_path.back().second.initial_range.end_pos - pos);
+    return mapped_regions;
+}
+
+void GenomeConsistenceChecker::FillPos(EdgeId e, const EdgesPositionHandler<Graph> &tmp_edge_pos) {
+    size_t total_mapped;
+    string chr = ChromosomeByUniqueEdge(e, tmp_edge_pos, total_mapped);
+    if (chr.empty())
+        return;
+
+    auto mapping_info = Merge(FindBestRangeSequence(tmp_edge_pos.GetEdgePositions(e, chr)));
+
+    //FIXME what is the logic here?
+    //used less that 0.9 of aligned length
+    VERIFY(total_mapped >= mapping_info.second);
+    if ((total_mapped - mapping_info.second) * 10 >=  gp_.g.length(e)) {
         INFO ("Edge " << gp_.g.int_id(e) << " length "<< gp_.g.length(e)  << "is potentially misassembled! mappings: ");
-        for (auto mp:old_mappings) {
-            INFO("mp_range "<< mp.mapped_range.start_pos << " - " << mp.mapped_range.end_pos << " init_range " << mp.initial_range.start_pos << " - " << mp.initial_range.end_pos );
+        for (auto mp : tmp_edge_pos.GetEdgePositions(e, chr)) {
+            INFO("mp_range "<< mp.mapped_range.start_pos << " - " << mp.mapped_range.end_pos
+                 << " init_range " << mp.initial_range.start_pos << " - " << mp.initial_range.end_pos );
             if (mp.initial_range.start_pos < absolute_max_gap_) {
                 INFO ("Fake(linear order) misassembly on edge "<< e.int_id());
-                if (strand == "0") {
-                    circular_edge_ = e;
-                }
+                circular_edges_.insert(e);
             }
         }
+    }
+    gp_.edge_pos.AddEdgePosition(e, chr, mapping_info.first);
+}
+
+pair<MappingRange, size_t> GenomeConsistenceChecker::Merge(const vector<MappingRange> &mappings) const {
+    VERIFY(mappings.size() > 0);
 
+    MappingRange mr = mappings.front();
+    size_t total_mapped = mr.initial_range.size();
+    for (size_t i = 1; i < mappings.size(); ++i) {
+        total_mapped += mappings[i].initial_range.size();
+        //FIXME why do we need merge?
+        mr = mr.Merge(mappings[i]);
     }
-    gp_.edge_pos.AddEdgePosition(e, new_strand, new_mapping);
+    return make_pair(mr, total_mapped);
 }
 
+string GenomeConsistenceChecker::ChromosomeByUniqueEdge(const EdgeId &e,
+                                                        const EdgesPositionHandler<Graph> &tmp_edge_pos,
+                                                        size_t &total) const {
+    DEBUG("Positioning edge " << gp_.g.str(e));
+    map<string, size_t> total_al_lens = TotalAlignedLengths(tmp_edge_pos, e);
+    total = 0;
+    for (size_t c : utils::value_set(total_al_lens))
+        total += c;
+
+    if (total > size_t(math::round((double) gp_.g.length(e) * 1.5))) {
+        INFO("Edge " << gp_.g.int_id(e) <<" was not unique due to the references, excluding ");
+        return "";
+    }
+
+    string chr = "";
+    size_t max_l = 0;
+    for (const auto &p : total_al_lens) {
+        if (p.second > max_l) {
+            max_l = p.second;
+            chr = p.first;
+        }
+    }
 
+    DEBUG("Most likely chromosome " << chr << ". Mapped bp: " << max_l);
+    //TODO: support non-unique edges;
+    if (max_l < size_t(math::round((double) gp_.g.length(e) * 0.5))) {
+        DEBUG("Too small a portion mapped. Edge not used");
+        return "";
+    }
+
+    return chr;
+};
 
 }
diff --git a/src/common/modules/genome_consistance_checker.hpp b/src/common/modules/genome_consistance_checker.hpp
index 0fcf115..274e60c 100644
--- a/src/common/modules/genome_consistance_checker.hpp
+++ b/src/common/modules/genome_consistance_checker.hpp
@@ -14,13 +14,15 @@
 #include "pipeline/graph_pack.hpp"
 #include "visualization/position_filler.hpp"
 #include "assembly_graph/paths/bidirectional_path.hpp"
+#include "assembly_graph/paths/mapping_path.hpp"
 #include "assembly_graph/graph_support/scaff_supplementary.hpp"
+#include "modules/path_extend/pe_utils.hpp"
 
 namespace debruijn_graph {
 
-
 using path_extend::BidirectionalPath;
 using path_extend::ScaffoldingUniqueEdgeStorage;
+using omnigraph::MappingPath;
 
 struct PathScore{
     size_t misassemblies;
@@ -28,52 +30,201 @@ struct PathScore{
     size_t mapped_length;
     PathScore(size_t m, size_t w, size_t ml): misassemblies(m), wrong_gap_size(w), mapped_length(ml) {}
 };
-class GenomeConsistenceChecker {
 
-private:
+class ChromosomeInfo {
+    std::string name_;
+    std::vector<EdgeId> path_;
+    std::multimap<EdgeId, size_t> edge_idxs_;
+
+public:
+    ChromosomeInfo() {}
+
+    explicit ChromosomeInfo(const string &name, const MappingPath<EdgeId> &mapping_path) :
+            name_(name),
+            path_(mapping_path.simple_path()) {
+        for (size_t i = 0; i < path_.size(); ++i) {
+            edge_idxs_.insert(std::make_pair(path_[i], i));
+        }
+    }
+
+    size_t Multiplicity(EdgeId e) const {
+        return edge_idxs_.count(e);
+    }
+
+    size_t IsUnique(EdgeId e) const {
+        return Multiplicity(e) == 1;
+    }
+
+    EdgeId EdgeAt(size_t idx) const {
+        VERIFY(idx < path_.size());
+        return path_[idx];
+    }
+
+    vector<size_t> EdgeIdxs(EdgeId e) const {
+        return utils::get_all(edge_idxs_, e);
+    }
+
+    size_t UniqueEdgeIdx(EdgeId e) const {
+        vector<size_t> idxs = EdgeIdxs(e);
+        VERIFY(idxs.size() == 1);
+        return idxs.front();
+    }
+
+    const std::string& name() const {
+        return name_;
+    }
+
+    size_t size() const {
+        return path_.size();
+    }
+};
+
+class GenomeInfo {
+    std::map<string, ChromosomeInfo> chr_infos_;
+public:
+    void AddInfo(ChromosomeInfo info) {
+        VERIFY(!chr_infos_.count(info.name()));
+        chr_infos_[info.name()] = std::move(info);
+    }
+
+    const ChromosomeInfo& ChrInfo(const string &name) const {
+        return utils::get(chr_infos_, name);
+    }
+
+    vector<string> ChromosomesByEdge(EdgeId e) const {
+        vector<string> answer;
+        for (const auto& chr_info: chr_infos_)
+            if (chr_info.second.Multiplicity(e))
+                answer.push_back(chr_info.first);
+        return answer;
+    }
+
+    size_t Multiplicity(EdgeId e) const {
+        size_t ans = 0;
+        for (const auto& chr_info: chr_infos_)
+            ans += chr_info.second.Multiplicity(e);
+        return ans;
+    }
+
+    bool IsUnique(EdgeId e) const {
+        return Multiplicity(e) == 1;
+    }
+
+    bool InUniqueChromosome(EdgeId e) const {
+        return ChromosomesByEdge(e).size() == 1;
+    }
+
+    const ChromosomeInfo& UniqueChromosomeInfo(EdgeId e) const {
+        auto chr_names = ChromosomesByEdge(e);
+        VERIFY(chr_names.size() == 1);
+        return ChrInfo(chr_names.front());
+    }
+
+    pair<string, size_t> UniqueChromosomeIdx(EdgeId e) const {
+        VERIFY(IsUnique(e));
+        auto chrs = ChromosomesByEdge(e);
+        VERIFY(chrs.size() == 1);
+        return std::make_pair(chrs.front(), ChrInfo(chrs.front()).UniqueEdgeIdx(e));
+    }
+
+    vector<string> Chromosomes() const {
+        vector<string> answer;
+        utils::push_back_all(answer, utils::key_set(chr_infos_));
+        return answer;
+    }
+};
+
+class GenomeConsistenceChecker {
+    typedef omnigraph::MappingPath<EdgeId> MappingPathT;
     const conj_graph_pack &gp_;
-    //EdgesPositionHandler<Graph> &position_handler_;
-    Sequence genome_;
+    const size_t absolute_max_gap_;
+    const double relative_max_gap_;
+    const size_t unresolvable_len_;
+
     const ScaffoldingUniqueEdgeStorage &storage_;
-    size_t absolute_max_gap_;
-    double relative_max_gap_;
-    set<EdgeId> excluded_unique_;
-    EdgeId circular_edge_;
-//map from unique edges to their order in genome spelling;
-    mutable map<EdgeId, size_t> genome_spelled_;
-    bool consequent(const Range &mr1, const Range &mr2) const;
-    bool consequent(const MappingRange &mr1, const MappingRange &mr2) const ;
-
-    PathScore CountMisassembliesWithStrand(const BidirectionalPath &path, const string strand) const;
+    const vector<path_extend::GraphCoverageMap> &long_reads_cov_map_;
+    static const size_t SIGNIFICANT_LENGTH_LOWER_LIMIT = 10000;
+    GenomeInfo genome_info_;
+    //Edges containing zero point for each reference
+    //TODO: do we need circular/linear chromosomes support?
+    set<EdgeId> circular_edges_;
+
+    io::DataSet<config::DataSetData> reads_;
+    bool Consequent(const MappingRange &mr1, const MappingRange &mr2) const ;
+
+    void PrintMisassemblyInfo(EdgeId e1, EdgeId e2) const;
+
+    void ClassifyPosition(size_t prev_pos, size_t cur_pos, const BidirectionalPath & path, PathScore &res) const;
+
+    PathScore InternalCountMisassemblies(const BidirectionalPath &path) const;
+
 //constructs longest sequence of consequetive ranges, stores result in used_mappings
-    void FindBestRangeSequence(const set<MappingRange>& old_mappings, vector<MappingRange>& used_mappings) const;
-//Refills genomic positions uniting alingments separated with small gaps
-    void RefillPos();
-    void RefillPos(const string &strand);
-    void RefillPos(const string &strand, const EdgeId &e);
-DECL_LOGGER("GenomeConsistenceChecker");
+    vector<MappingRange> FindBestRangeSequence(const set<MappingRange>& mappings) const;
+
+    string ChromosomeByUniqueEdge(const EdgeId &e,
+                                  const EdgesPositionHandler<Graph> &tmp_edge_pos,
+                                  size_t &total) const;
+
+    pair<MappingRange, size_t> Merge(const vector<MappingRange>& mappings) const;
 
+    void FillPos(EdgeId e, const EdgesPositionHandler<Graph> &tmp_edge_pos);
 
+    void ReportPathEndByPairedLib(const shared_ptr<path_extend::PairedInfoLibrary> paired_lib, EdgeId current_edge) const;
+
+    void ReportPathEndByLongLib(const path_extend::BidirectionalPathSet &covering_paths, EdgeId current_edge) const;
+
+    void ReportEdge(EdgeId e, double w) const;
+
+    void ReportVariants(std::vector<pair<double, EdgeId>> &sorted_w) const;
+
+    size_t GetSupportingPathCount(EdgeId e1, EdgeId e2, size_t lib_index) const;
+
+    void TheoreticLenStats(vector<size_t> theoretic_lens) const;
+
+    map<string, size_t> TotalAlignedLengths(const EdgesPositionHandler<Graph> &tmp_edge_pos, EdgeId e) const;;
+
+    MappingPathT ConstructEdgeOrder(const std::string &chr_name) const;
+
+    //returns lengths of mapped regions, divided by "unresolvable_len_"
+    vector<size_t> MappedRegions(const MappingPathT &mapping_path) const;
+
+    bool IsCloseToEnd(MappingRange range, const ChromosomeInfo &chr_info) const {
+        auto last_range = gp_.edge_pos.GetUniqueEdgePosition(chr_info.EdgeAt(chr_info.size() - 1), chr_info.name());
+        return range.initial_range.end_pos + SIGNIFICANT_LENGTH_LOWER_LIMIT > last_range.initial_range.end_pos;
+    }
+
+    bool IsCloseToStart(MappingRange range, const ChromosomeInfo &) const {
+        return range.initial_range.start_pos <= SIGNIFICANT_LENGTH_LOWER_LIMIT;
+    }
+
+    DECL_LOGGER("GenomeConsistenceChecker");
 public:
-    GenomeConsistenceChecker(const conj_graph_pack &gp, const ScaffoldingUniqueEdgeStorage &storage, size_t max_gap, double relative_max_gap /*= 0.2*/) : gp_(gp),
-            genome_(gp.genome.GetSequence()), storage_(storage),
-        absolute_max_gap_(max_gap), relative_max_gap_(relative_max_gap), excluded_unique_(), circular_edge_() {
-        if (!gp.edge_pos.IsAttached()) {
-            gp.edge_pos.Attach();
-        }
-        gp.edge_pos.clear();
-        visualization::position_filler::FillPos(gp_, gp_.genome.GetSequence(), "0");
-        visualization::position_filler::FillPos(gp_, !gp_.genome.GetSequence(), "1");
-        RefillPos();
+    GenomeConsistenceChecker(const conj_graph_pack &gp,
+                             size_t max_gap,
+                             double relative_max_gap /*= 0.2*/,
+                             size_t unresolvable_len,
+                             const ScaffoldingUniqueEdgeStorage &storage,
+                             const vector<path_extend::GraphCoverageMap> &long_reads_cov_map,
+                             const io::DataSet<config::DataSetData> reads) :
+            gp_(gp),
+            absolute_max_gap_(max_gap),
+            relative_max_gap_(relative_max_gap),
+            unresolvable_len_(unresolvable_len),
+            storage_(storage),
+            long_reads_cov_map_(long_reads_cov_map),
+            reads_(reads) {
+        //Fixme call outside
+        Fill();
     }
+
+    void Fill();
+
     PathScore CountMisassemblies(const BidirectionalPath &path) const;
-    vector<pair<EdgeId, MappingRange> > ConstructEdgeOrder() const;
 
-//spells genome in language of long unique edges from storage;
-    void SpellGenome();
+    void CheckPathEnd(const BidirectionalPath &path) const;
 
+    map<EdgeId, string> EdgeLabels() const;
 
 };
 
-
 }
diff --git a/src/common/modules/graph_construction.hpp b/src/common/modules/graph_construction.hpp
index c862956..ad7246d 100644
--- a/src/common/modules/graph_construction.hpp
+++ b/src/common/modules/graph_construction.hpp
@@ -18,17 +18,19 @@
 #include "io/reads/io_helper.hpp"
 #include "assembly_graph/core/graph.hpp"
 
-#include "utils/debruijn_graph/debruijn_graph_constructor.hpp"
-#include "utils/debruijn_graph/early_simplification.hpp"
+#include "assembly_graph/construction/debruijn_graph_constructor.hpp"
+#include "assembly_graph/construction/early_simplification.hpp"
 
-#include "utils/perfcounter.hpp"
+#include "utils/perf/perfcounter.hpp"
 #include "io/dataset_support/read_converter.hpp"
 
 #include "assembly_graph/handlers/edges_position_handler.hpp"
 #include "assembly_graph/graph_support/coverage_filling.hpp"
-#include "utils/indices/storing_traits.hpp"
-#include "utils/indices/edge_index_builders.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/ph_map/storing_traits.hpp"
+#include "assembly_graph/index/edge_index_builders.hpp"
+#include "utils/parallel/openmp_wrapper.h"
+#include "utils/extension_index/kmer_extension_index_builder.hpp"
+
 
 namespace debruijn_graph {
 
@@ -37,7 +39,7 @@ struct CoverageCollector {
 };
 
 template<>
-struct CoverageCollector<SimpleStoring> {
+struct CoverageCollector<utils::SimpleStoring> {
     template<class Info>
     static void CollectCoverage(Info edge_info) {
         edge_info.edge_id->IncCoverage(edge_info.count);
@@ -45,7 +47,7 @@ struct CoverageCollector<SimpleStoring> {
 };
 
 template<>
-struct CoverageCollector<InvertableStoring> {
+struct CoverageCollector<utils::InvertableStoring> {
     template<class Info>
     static void CollectCoverage(Info edge_info) {
         edge_info.edge_id->IncCoverage(edge_info.count);
@@ -110,10 +112,8 @@ void EarlyClipTips(size_t k, const config::debruijn_config::construction& params
     }
 }
 
-#include "utils/indices/kmer_extension_index_builder.hpp"
-
 template<class Graph, class Read, class Index>
-ReadStatistics ConstructGraphUsingExtentionIndex(const config::debruijn_config::construction params,
+utils::ReadStatistics ConstructGraphUsingExtentionIndex(const config::debruijn_config::construction params,
                                                  io::ReadStreamList<Read>& streams, Graph& g,
                                                  Index& index, io::SingleStreamPtr contigs_stream = io::SingleStreamPtr()) {
     size_t k = g.k();
@@ -124,19 +124,19 @@ ReadStatistics ConstructGraphUsingExtentionIndex(const config::debruijn_config::
 
     TRACE("... in parallel");
     // FIXME: output_dir here is damn ugly!
-    typedef DeBruijnExtensionIndex<> ExtensionIndex;
-    typedef typename ExtensionIndexHelper<ExtensionIndex>::DeBruijnExtensionIndexBuilderT ExtensionIndexBuilder;
+    typedef utils::DeBruijnExtensionIndex<> ExtensionIndex;
+    typedef typename utils::ExtensionIndexHelper<ExtensionIndex>::DeBruijnExtensionIndexBuilderT ExtensionIndexBuilder;
     ExtensionIndex ext((unsigned) k, index.inner_index().workdir());
 
     //fixme hack
-    ReadStatistics stats = ExtensionIndexBuilder().BuildExtensionIndexFromStream(ext, streams, (contigs_stream == 0) ? 0 : &(*contigs_stream), params.read_buffer_size);
+    utils::ReadStatistics stats = ExtensionIndexBuilder().BuildExtensionIndexFromStream(ext, streams, (contigs_stream == 0) ? 0 : &(*contigs_stream), params.read_buffer_size);
 
     EarlyClipTips(k, params, stats.max_read_length_, ext);
 
     INFO("Condensing graph");
     VERIFY(!index.IsAttached());
     DeBruijnGraphExtentionConstructor<Graph> g_c(g, ext);
-    g_c.ConstructGraph(100, 10000, 1.2, params.keep_perfect_loops);//TODO move these parameters to config
+    g_c.ConstructGraph(params.keep_perfect_loops);
 
     INFO("Building index with from graph")
     //todo pass buffer size
@@ -147,7 +147,7 @@ ReadStatistics ConstructGraphUsingExtentionIndex(const config::debruijn_config::
 }
 
 template<class Graph, class Index, class Streams>
-ReadStatistics ConstructGraph(const config::debruijn_config::construction &params,
+utils::ReadStatistics ConstructGraph(const config::debruijn_config::construction &params,
                               Streams& streams, Graph& g,
                               Index& index, io::SingleStreamPtr contigs_stream = io::SingleStreamPtr()) {
     if (params.con_mode == config::construction_mode::extention) {
@@ -162,11 +162,11 @@ ReadStatistics ConstructGraph(const config::debruijn_config::construction &param
 }
 
 template<class Graph, class Index, class Streams>
-ReadStatistics ConstructGraphWithCoverage(const config::debruijn_config::construction &params,
+utils::ReadStatistics ConstructGraphWithCoverage(const config::debruijn_config::construction &params,
                                   Streams& streams, Graph& g,
                                   Index& index, FlankingCoverage<Graph>& flanking_cov,
                                   io::SingleStreamPtr contigs_stream = io::SingleStreamPtr()) {
-    ReadStatistics rs = ConstructGraph(params, streams, g, index, contigs_stream);
+    utils::ReadStatistics rs = ConstructGraph(params, streams, g, index, contigs_stream);
 
     typedef typename Index::InnerIndex InnerIndex;
     typedef typename EdgeIndexHelper<InnerIndex>::CoverageAndGraphPositionFillingIndexBuilderT IndexBuilder;
diff --git a/src/common/modules/graph_read_correction.hpp b/src/common/modules/graph_read_correction.hpp
index 892cfb8..c7e8655 100644
--- a/src/common/modules/graph_read_correction.hpp
+++ b/src/common/modules/graph_read_correction.hpp
@@ -29,9 +29,9 @@ class TipsProjector {
     optional<EdgeId> UniqueAlternativeEdge(EdgeId tip, bool outgoing_tip) {
         vector<EdgeId> edges;
         if (outgoing_tip) {
-            push_back_all(edges, gp_.g.OutgoingEdges(gp_.g.EdgeStart(tip)));
+            utils::push_back_all(edges, gp_.g.OutgoingEdges(gp_.g.EdgeStart(tip)));
         } else {
-            push_back_all(edges, gp_.g.IncomingEdges(gp_.g.EdgeEnd(tip)));
+            utils::push_back_all(edges, gp_.g.IncomingEdges(gp_.g.EdgeEnd(tip)));
         }
         restricted::set<EdgeId> edges_set(edges.begin(), edges.end());
         edges_set.erase(tip);
diff --git a/src/common/modules/path_extend/extension_chooser.hpp b/src/common/modules/path_extend/extension_chooser.hpp
index cfd1e98..d7b2e7e 100644
--- a/src/common/modules/path_extend/extension_chooser.hpp
+++ b/src/common/modules/path_extend/extension_chooser.hpp
@@ -222,14 +222,13 @@ public:
         return wc_ != nullptr;
     }
 
-    const WeightCounter& wc() const {
-        VERIFY(wc_);
-        return *wc_;
+    shared_ptr<WeightCounter> wc() const {
+        return wc_;
     }
 
 protected:
     bool HasIdealInfo(EdgeId e1, EdgeId e2, size_t dist) const {
-        return math::gr(wc_->lib().IdealPairedInfo(e1, e2, (int) dist), 0.);
+        return math::gr(wc_->PairedLibrary().IdealPairedInfo(e1, e2, (int) dist), 0.);
     }
 
     bool HasIdealInfo(const BidirectionalPath& p, EdgeId e, size_t gap) const {
@@ -286,6 +285,158 @@ public:
     }
 };
 
+
+class SimpleCoverageExtensionChooser: public ExtensionChooser {
+    const SSCoverageStorage& coverage_storage_;
+    //less than 1
+    double coverage_delta_;
+    //larger than 1
+    double inverted_coverage_delta_;
+
+    double min_upper_coverage_;
+
+public:
+    SimpleCoverageExtensionChooser(const SSCoverageStorage& coverage_storage, const Graph& g,
+                                   double coverage_delta, double min_upper_coverage = 0) :
+        ExtensionChooser(g), coverage_storage_(coverage_storage),
+        coverage_delta_(coverage_delta),
+        inverted_coverage_delta_(0),
+        min_upper_coverage_(min_upper_coverage) {
+        VERIFY(math::le(coverage_delta_, 1.0));
+        VERIFY(!math::eq(coverage_delta_, 0.0));
+        inverted_coverage_delta_ = 1.0 / coverage_delta_;
+    }
+
+    EdgeContainer Filter(const BidirectionalPath& path, const EdgeContainer& edges) const override {
+        if (edges.size() != 2)
+            return EdgeContainer();
+
+        size_t index = path.Size() - 1;
+        while (index > 0) {
+            if (g_.IncomingEdgeCount(g_.EdgeStart(path[index])) == 2)
+                break;
+            index--;
+        }
+
+        if (index == 0) {
+            return EdgeContainer();
+        }
+        DEBUG("Split found at " << index);
+        EdgeId path_edge_at_split = path[index - 1];
+
+        return Filter(path, edges, math::ls(coverage_storage_.GetCoverage(path_edge_at_split), coverage_storage_.GetCoverage(path_edge_at_split, true)));
+    }
+
+private:
+    EdgeContainer Filter(const BidirectionalPath& path, const EdgeContainer& edges, bool reverse) const {
+        DEBUG("COVERAGE extension chooser");
+        VERIFY(edges.size() == 2);
+        if (!IsEnoughCoverage(edges.front().e_, edges.back().e_, reverse)) {
+            DEBUG("Candidates are not covered enough: e1 = " << coverage_storage_.GetCoverage(edges.front().e_, reverse) <<
+                ", e2 = " << coverage_storage_.GetCoverage(edges.back().e_, reverse));
+            return EdgeContainer();
+        }
+
+        if (IsCoverageSimilar(edges.front().e_, edges.back().e_, reverse)) {
+            DEBUG("Candidates coverage is too similar: e1 = " << coverage_storage_.GetCoverage(edges.front().e_, reverse) <<
+                ", e2 = " << coverage_storage_.GetCoverage(edges.back().e_, reverse));
+            return EdgeContainer();
+        }
+
+        size_t index = path.Size() - 1;
+        while (index > 0) {
+            if (g_.IncomingEdgeCount(g_.EdgeStart(path[index])) == 2)
+                break;
+            index--;
+        }
+
+        EdgeContainer result;
+        if (index > 0) {
+            DEBUG("Split found at " << index);
+            EdgeId path_edge_at_split = path[index - 1];
+            EdgeId other_edge_at_split = GetOtherEdgeAtSplit(g_.EdgeEnd(path_edge_at_split), path_edge_at_split);
+            VERIFY(other_edge_at_split != EdgeId());
+
+            if (IsCoverageSimilar(path_edge_at_split, other_edge_at_split, reverse)) {
+                DEBUG("Path edge and alternative is too similar: path = " << coverage_storage_.GetCoverage(path_edge_at_split, reverse) <<
+                    ", other = " << coverage_storage_.GetCoverage(other_edge_at_split, reverse));
+
+                return EdgeContainer();
+            }
+            if (!IsEnoughCoverage(path_edge_at_split, other_edge_at_split, reverse)) {
+                DEBUG("Path edge and alternative  coverage is too low: path = " << coverage_storage_.GetCoverage(path_edge_at_split, reverse) <<
+                    ", other = " << coverage_storage_.GetCoverage(other_edge_at_split, reverse));
+
+                return EdgeContainer();
+            }
+
+            EdgeId candidate1 = edges.front().e_;
+            EdgeId candidate2 = edges.back().e_;
+
+            if (math::gr(coverage_storage_.GetCoverage(path_edge_at_split, reverse), coverage_storage_.GetCoverage(other_edge_at_split, reverse))) {
+                DEBUG("path coverage is high, edge " << g_.int_id(path_edge_at_split) << ", path cov = "
+                          << coverage_storage_.GetCoverage(path_edge_at_split, reverse) << ", other " << coverage_storage_.GetCoverage(other_edge_at_split, reverse));
+
+                result.emplace_back(math::gr(coverage_storage_.GetCoverage(candidate1, reverse), coverage_storage_.GetCoverage(candidate2, reverse)) ? candidate1 : candidate2, 0);
+            } else {
+                DEBUG("path coverage is low, edge " << g_.int_id(path_edge_at_split) << ", path cov = "
+                          << coverage_storage_.GetCoverage(path_edge_at_split, reverse) << ", other " << coverage_storage_.GetCoverage(other_edge_at_split, reverse));
+
+                result.emplace_back(math::ls(coverage_storage_.GetCoverage(candidate1, reverse), coverage_storage_.GetCoverage(candidate2, reverse)) ? candidate1 : candidate2, 0);
+            }
+
+            if (!IsCoverageSimilar(path_edge_at_split, result.front().e_, reverse)) {
+                DEBUG("Coverage is NOT similar: path = " << coverage_storage_.GetCoverage(path_edge_at_split, reverse) <<
+                    ", candidate = " << coverage_storage_.GetCoverage(result.front().e_, reverse))
+                result.clear();
+            }
+            else {
+                DEBUG("Coverage is similar: path = " << coverage_storage_.GetCoverage(path_edge_at_split, reverse) <<
+                    ", candidate = " << coverage_storage_.GetCoverage(result.front().e_, reverse))
+                DEBUG("Coverage extension chooser helped, adding " << g_.int_id(result.front().e_));
+            }
+        }
+
+        VERIFY(result.size() <= 1);
+        return result;
+    }
+
+    bool IsEnoughCoverage(EdgeId e1, EdgeId e2, bool reverse) const {
+        double cov1 = coverage_storage_.GetCoverage(e1, reverse);
+        double cov2 = coverage_storage_.GetCoverage(e2, reverse);
+        return math::ge(max(cov1, cov2), min_upper_coverage_) || math::eq(min(cov1, cov2), 0.0);
+    }
+
+    bool IsCoverageSimilar(EdgeId e1, EdgeId e2, bool reverse) const {
+        double cov1 = coverage_storage_.GetCoverage(e1, reverse);
+        double cov2 = coverage_storage_.GetCoverage(e2, reverse);
+
+        if (math::eq(cov2, 0.0) || math::eq(cov1, 0.0)) {
+            return false;
+        }
+
+        double diff = cov1 / cov2;
+        if (math::ls(diff, 1.0))
+            return math::gr(diff, coverage_delta_);
+        else
+            return math::ls(diff, inverted_coverage_delta_);
+    }
+
+    EdgeId GetOtherEdgeAtSplit(VertexId split, EdgeId e) const {
+        VERIFY(g_.IncomingEdgeCount(split) == 2);
+        for (auto other : g_.IncomingEdges(split)) {
+            if (e != other)
+                return other;
+        }
+        return EdgeId();
+    }
+
+    DECL_LOGGER("SimpleCoverageExtensionChooser");
+
+};
+
+
+
 class ExcludingExtensionChooser: public ExtensionChooser {
     PathAnalyzer analyzer_;
     double prior_coeff_;
@@ -346,7 +497,7 @@ public:
             return edges;
         }
         std::set<size_t> to_exclude;
-        path.Print();
+        path.PrintDEBUG();
         EdgeContainer result = edges;
         ExcludeEdges(path, result, to_exclude);
         result = FindFilteredEdges(path, result, to_exclude);
@@ -509,8 +660,6 @@ public:
 };
 
 class ScaffoldingExtensionChooser : public ExtensionChooser {
-
-protected:
     typedef ExtensionChooser base;
     double raw_weight_threshold_;
     double cl_weight_threshold_;
@@ -539,7 +688,7 @@ protected:
 
     void GetDistances(EdgeId e1, EdgeId e2, std::vector<int>& dist,
             std::vector<double>& w) const {
-        wc_->lib().CountDistances(e1, e2, dist, w);
+        wc_->PairedLibrary().CountDistances(e1, e2, dist, w);
     }
 
     void CountAvrgDists(const BidirectionalPath& path, EdgeId e, std::vector<pair<int, double>> & histogram) const {
@@ -580,7 +729,7 @@ protected:
 
     set<EdgeId> FindCandidates(const BidirectionalPath& path) const {
         set<EdgeId> jumping_edges;
-        const auto& lib = wc_->lib();
+        const auto& lib = wc_->PairedLibrary();
         //todo lib (and FindJumpEdges) knows its var so it can be counted there
         int is_scatter = int(math::round(lib.GetIsVar() * is_scatter_coeff_));
         for (int i = (int) path.Size() - 1; i >= 0 && path.LengthAt(i) - g_.length(path.At(i)) <= lib.GetISMax(); --i) {
@@ -630,7 +779,6 @@ inline bool EdgeWithWeightCompareReverse(const pair<EdgeId, double>& p1,
 }
 
 class LongReadsUniqueEdgeAnalyzer {
-private:
     DECL_LOGGER("LongReadsUniqueEdgeAnalyzer")
 public:
     LongReadsUniqueEdgeAnalyzer(const Graph& g, const GraphCoverageMap& cov_map,
@@ -812,51 +960,51 @@ private:
     bool uneven_depth_;
 };
 
-class SimpleScaffolding {
-public:
-    SimpleScaffolding(const Graph& g) : g_(g) {}
-
-    BidirectionalPath FindMaxCommonPath(const vector<BidirectionalPath*>& paths,
-                                        size_t max_diff_len) const {
-        BidirectionalPath max_end(g_);
-        for (auto it1 = paths.begin(); it1 != paths.end(); ++it1) {
-            BidirectionalPath* p1 = *it1;
-            for (size_t i = 0; i < p1->Size(); ++i) {
-                if (p1->Length() - p1->LengthAt(i) > max_diff_len) {
-                    break;
-                }
-                bool contain_all = true;
-                for (size_t i1 = i + 1; i1 <= p1->Size() && contain_all; ++i1) {
-                    BidirectionalPath subpath = p1->SubPath(i, i1);
-                    for (auto it2 = paths.begin();  it2 != paths.end() && contain_all; ++it2) {
-                        BidirectionalPath* p2 = *it2;
-                        vector<size_t> positions2 = p2->FindAll(subpath.At(0));
-                        bool contain = false;
-                        for (size_t ipos2 = 0; ipos2 < positions2.size(); ++ipos2) {
-                            size_t pos2 = positions2[ipos2];
-                            if (p2->Length() - p2->LengthAt(pos2) <= max_diff_len
-                                    && EqualEnds(subpath, 0, *p2, pos2, false)) {
-                                contain = true;
-                                break;
-                            }
-                        }
-                        if (!contain) {
-                            contain_all = false;
-                        }
-                    }
-                    if (contain_all && (i1 - i) >= max_end.Size()) {
-                        max_end.Clear();
-                        max_end.PushBack(subpath);
-                    }
-                }
-            }
-        }
-        return max_end;
-    }
-
-private:
-    const Graph& g_;
-};
+//class SimpleScaffolding {
+//public:
+//    SimpleScaffolding(const Graph& g) : g_(g) {}
+//
+//    BidirectionalPath FindMaxCommonPath(const vector<BidirectionalPath*>& paths,
+//                                        size_t max_diff_len) const {
+//        BidirectionalPath max_end(g_);
+//        for (auto it1 = paths.begin(); it1 != paths.end(); ++it1) {
+//            BidirectionalPath* p1 = *it1;
+//            for (size_t i = 0; i < p1->Size(); ++i) {
+//                if (p1->Length() - p1->LengthAt(i) > max_diff_len) {
+//                    break;
+//                }
+//                bool contain_all = true;
+//                for (size_t i1 = i + 1; i1 <= p1->Size() && contain_all; ++i1) {
+//                    BidirectionalPath subpath = p1->SubPath(i, i1);
+//                    for (auto it2 = paths.begin();  it2 != paths.end() && contain_all; ++it2) {
+//                        BidirectionalPath* p2 = *it2;
+//                        vector<size_t> positions2 = p2->FindAll(subpath.At(0));
+//                        bool contain = false;
+//                        for (size_t ipos2 = 0; ipos2 < positions2.size(); ++ipos2) {
+//                            size_t pos2 = positions2[ipos2];
+//                            if (p2->Length() - p2->LengthAt(pos2) <= max_diff_len
+//                                    && EqualEnds(subpath, 0, *p2, pos2, false)) {
+//                                contain = true;
+//                                break;
+//                            }
+//                        }
+//                        if (!contain) {
+//                            contain_all = false;
+//                        }
+//                    }
+//                    if (contain_all && (i1 - i) >= max_end.Size()) {
+//                        max_end.Clear();
+//                        max_end.PushBack(subpath);
+//                    }
+//                }
+//            }
+//        }
+//        return max_end;
+//    }
+//
+//private:
+//    const Graph& g_;
+//};
 
 class LongReadsExtensionChooser : public ExtensionChooser {
 public:
@@ -875,8 +1023,7 @@ public:
               cov_map_(read_paths_cov_map),
               unique_edge_analyzer_(g, cov_map_, filtering_threshold,
                                     unique_edge_priority_threshold,
-                                    max_repeat_length, uneven_depth),
-              simple_scaffolding_(g)
+                                    max_repeat_length, uneven_depth)
     {
     }
 
@@ -889,13 +1036,12 @@ public:
         if (edges.empty()) {
             return edges;
         }DEBUG("We in Filter of LongReadsExtensionChooser");
-        path.Print();
+        path.PrintDEBUG();
         map<EdgeId, double> weights_cands;
         for (auto it = edges.begin(); it != edges.end(); ++it) {
             weights_cands.insert(make_pair(it->e_, 0.0));
         }
         set<EdgeId> filtered_cands;
-        map<EdgeId, BidirectionalPathSet > support_paths_ends;
         auto support_paths = cov_map_.GetCoveringPaths(path.Back());
         DEBUG("Found " << support_paths.size() << " covering paths!!!");
         for (auto it = support_paths.begin(); it != support_paths.end(); ++it) {
@@ -912,10 +1058,6 @@ public:
                         EdgeId next = (*it)->At(positions[i] + 1);
                         weights_cands[next] += (*it)->GetWeight();
                         filtered_cands.insert(next);
-                        if (support_paths_ends.count(next) == 0){
-                            support_paths_ends[next] = BidirectionalPathSet();
-                        }
-                        support_paths_ends[next].insert(new BidirectionalPath((*it)->SubPath(positions[i] + 1)));
                     }
                 }
             }
@@ -974,7 +1116,6 @@ private:
     size_t min_significant_overlap_;
     const GraphCoverageMap& cov_map_;
     LongReadsUniqueEdgeAnalyzer unique_edge_analyzer_;
-    SimpleScaffolding simple_scaffolding_;
 
     DECL_LOGGER("LongReadsExtensionChooser");
 };
diff --git a/src/common/modules/path_extend/loop_traverser.hpp b/src/common/modules/path_extend/loop_traverser.hpp
index 40e451c..04365aa 100644
--- a/src/common/modules/path_extend/loop_traverser.hpp
+++ b/src/common/modules/path_extend/loop_traverser.hpp
@@ -22,23 +22,22 @@
 namespace path_extend {
 
 class LoopTraverser {
-
     const Graph& g_;
-    GraphCoverageMap& covMap_;
-    size_t long_edge_limit_;
-    size_t component_size_limit_;
-    size_t shortest_path_limit_;
+    const GraphCoverageMap& cov_map_;
+    const size_t long_edge_limit_;
+    const size_t component_size_limit_;
+    const size_t shortest_path_limit_;
     static const size_t DIJKSTRA_LIMIT = 3000;
-private:
-    bool AnyTipsInComponent(const GraphComponent<Graph>& component) const{
-        for(auto e : component.edges()) {
+    static const size_t BASIC_N_CNT = 100;
+
+    bool AnyTipsInComponent(const GraphComponent<Graph>& component) const {
+        for (auto e : component.edges())
             if (g_.IncomingEdgeCount(g_.EdgeStart(e)) == 0 || g_.OutgoingEdgeCount(g_.EdgeEnd(e)) == 0)
                 return true;
-        }
         return false;
     }
 
-    EdgeId FindStart(const set<VertexId>& component_set) const{
+    EdgeId FindStart(const set<VertexId>& component_set) const {
         EdgeId result;
         for (auto it = component_set.begin(); it != component_set.end(); ++it) {
             for (auto eit = g_.in_begin(*it); eit != g_.in_end(*it); ++eit) {
@@ -72,19 +71,19 @@ private:
 
     bool IsEndInsideComponent(const BidirectionalPath &path,
                               const set <VertexId> &component_set) {
-        if (component_set.count(g_.EdgeStart(path.Front())) == 0) {
+        if (component_set.count(g_.EdgeStart(path.Front())) == 0)
             return false;
-        }
-        for (size_t i = 0; i < path.Size(); ++i) {
+
+        for (size_t i = 0; i < path.Size(); ++i)
             if (component_set.count(g_.EdgeEnd(path.At(i))) == 0)
                 return false;
-        }
+
         return true;
     }
 
 
     bool IsEndInsideComponent(const BidirectionalPath &path, EdgeId component_entrance,
-                              const set <VertexId> &component_set,
+                              const set<VertexId> &component_set,
                               bool conjugate = false) {
         int i = path.FindLast(component_entrance);
         VERIFY_MSG(i != -1, "Component edge is not found in the path")
@@ -92,106 +91,96 @@ private:
         if ((size_t) i == path.Size() - 1) {
             if (conjugate)
                 return component_set.count(g_.conjugate(g_.EdgeEnd(path.Back()))) > 0;
-            else
-                return component_set.count(g_.EdgeEnd(path.Back())) > 0;
+            return component_set.count(g_.EdgeEnd(path.Back())) > 0;
         }
 
         if (conjugate)
             return IsEndInsideComponent(path.SubPath((size_t) i + 1).Conjugate(), component_set);
-        else
-            return IsEndInsideComponent(path.SubPath((size_t) i + 1), component_set);
+        return IsEndInsideComponent(path.SubPath((size_t) i + 1), component_set);
     }
 
     bool TraverseLoop(EdgeId start, EdgeId end, const set<VertexId>& component_set) {
         DEBUG("start " << g_.int_id(start) << " end " << g_.int_id(end));
-        BidirectionalPathSet coveredStartPaths =
-                covMap_.GetCoveringPaths(start);
-        BidirectionalPathSet coveredEndPaths =
-                covMap_.GetCoveringPaths(end);
-
-        for (auto it_path = coveredStartPaths.begin();
-                it_path != coveredStartPaths.end(); ++it_path) {
-            if ((*it_path)->FindAll(end).size() > 0) {
+        BidirectionalPathSet start_cover_paths = cov_map_.GetCoveringPaths(start);
+        BidirectionalPathSet end_cover_paths = cov_map_.GetCoveringPaths(end);
+
+        for (auto path_ptr : start_cover_paths)
+            if (path_ptr->FindAll(end).size() > 0)
                 return false;
-            }
-        }
-        if (coveredStartPaths.size() < 1 or coveredEndPaths.size() < 1) {
-            DEBUG("TraverseLoop STRANGE SITUATION: start " << coveredStartPaths.size() << " end " << coveredEndPaths.size());
+
+        if (start_cover_paths.size() < 1 || end_cover_paths.size() < 1) {
+            DEBUG("TraverseLoop STRANGE SITUATION: start " << start_cover_paths.size() << " end " << end_cover_paths.size());
             return false;
         }
 
-        if (coveredStartPaths.size() > 1 or coveredEndPaths.size() > 1) {
+        if (start_cover_paths.size() > 1 || end_cover_paths.size() > 1) {
             DEBUG("Ambiguous situation in path joining, quitting");
             return false;
         }
 
-        BidirectionalPath* startPath = *coveredStartPaths.begin();
-        BidirectionalPath* endPath = *coveredEndPaths.begin();
-        if ((*startPath) == endPath->Conjugate()){
+        BidirectionalPath& start_path = **start_cover_paths.begin();
+        BidirectionalPath& end_path = **end_cover_paths.begin();
+
+        //TODO isn't it enough to check pointer equality?
+        if (start_path == end_path.Conjugate()){
             return false;
         }
 
         //Checking that paths ends are within component
-        if (!IsEndInsideComponent(*startPath, start, component_set) ||
-                !IsEndInsideComponent(*endPath->GetConjPath(), g_.conjugate(end), component_set, true)) {
+        if (!IsEndInsideComponent(start_path, start, component_set) ||
+                !IsEndInsideComponent(*end_path.GetConjPath(), g_.conjugate(end), component_set, true)) {
             DEBUG("Some path goes outside of the component")
             return false;
         }
 
-        size_t commonSize = startPath->CommonEndSize(*endPath);
-        size_t nLen = 0;
-        DEBUG("Str " << startPath->Size() << ", end" << endPath->Size());
-        if (commonSize == 0 && !startPath->Empty() > 0 && !endPath->Empty()) {
+        size_t common_size = start_path.CommonEndSize(end_path);
+        DEBUG("Str " << start_path.Size() << ", end" << end_path.Size());
+        if (common_size == 0 && !start_path.Empty() && !end_path.Empty()) {
             DEBUG("Estimating gap size");
-            VertexId lastVertex = g_.EdgeEnd(startPath->Back());
-            VertexId firstVertex = g_.EdgeStart(endPath->Front());
+            VertexId last_vertex = g_.EdgeEnd(start_path.Back());
+            VertexId first_vertex = g_.EdgeStart(end_path.Front());
 
-            if (firstVertex == lastVertex) {
-                nLen = 0;
-            } else {
-                DijkstraHelper<Graph>::BoundedDijkstra dijkstra(DijkstraHelper<Graph>::CreateBoundedDijkstra(g_, shortest_path_limit_,
-                                                                                                             DIJKSTRA_LIMIT));
-                dijkstra.Run(lastVertex);
-                vector<EdgeId> shortest_path = dijkstra.GetShortestPathTo(g_.EdgeStart(endPath->Front()));
+            if (first_vertex != last_vertex) {
+                auto dijkstra = DijkstraHelper<Graph>::CreateBoundedDijkstra(g_, shortest_path_limit_, DIJKSTRA_LIMIT);
+                dijkstra.Run(last_vertex);
+                vector<EdgeId> shortest_path = dijkstra.GetShortestPathTo(first_vertex);
 
                 if (shortest_path.empty()) {
                     DEBUG("Failed to find closing path");
                     return false;
-                } else if (!IsEndInsideComponent(BidirectionalPath(g_, shortest_path), component_set)) {
+                } 
+                if (!IsEndInsideComponent(BidirectionalPath(g_, shortest_path), component_set)) {
                     DEBUG("Closing path is outside the component");
                     return false;
-                } else {
-                    nLen = CumulativeLength(g_, shortest_path);
                 }
             }
         }
-        if (commonSize < endPath->Size()){
-            startPath->PushBack(endPath->At(commonSize), (int) nLen);
-        }
-        for (size_t i = commonSize + 1; i < endPath->Size(); ++i) {
-            startPath->PushBack(endPath->At(i), endPath->GapAt(i), endPath->TrashPreviousAt(i), endPath->TrashCurrentAt(i));
-        }
+        start_path.PushBack(end_path.SubPath(common_size), Gap(int(g_.k() + BASIC_N_CNT)));
+
         DEBUG("travers");
-        startPath->Print();
-        endPath->Print();
+        start_path.PrintDEBUG();
+        end_path.PrintDEBUG();
         DEBUG("conj");
-        endPath->GetConjPath()->Print();
-        endPath->Clear();
+        end_path.GetConjPath()->PrintDEBUG();
+        end_path.Clear();
         return true;
     }
 
     bool ContainsLongEdges(const GraphComponent<Graph>& component) const {
-        for(auto e : component.edges()) {
-            if(g_.length(e) > long_edge_limit_) {
+        for (auto e : component.edges())
+            if (g_.length(e) > long_edge_limit_)
                 return true;
-            }
-        }
         return false;
     }
 
 public:
-    LoopTraverser(const Graph& g, GraphCoverageMap& coverageMap, size_t long_edge_limit, size_t component_size_limit, size_t shortest_path_limit) :
-            g_(g), covMap_(coverageMap), long_edge_limit_(long_edge_limit), component_size_limit_(component_size_limit), shortest_path_limit_(shortest_path_limit) {
+    LoopTraverser(const Graph& g, GraphCoverageMap& coverage_map,
+                  size_t long_edge_limit, size_t component_size_limit,
+                  size_t shortest_path_limit) :
+            g_(g), cov_map_(coverage_map),
+            long_edge_limit_(long_edge_limit),
+            component_size_limit_(component_size_limit),
+            shortest_path_limit_(shortest_path_limit) {
     }
 
     size_t TraverseAllLoops() {
diff --git a/src/common/modules/path_extend/overlap_analysis.hpp b/src/common/modules/path_extend/overlap_analysis.hpp
index 3c3178f..9d9f13f 100644
--- a/src/common/modules/path_extend/overlap_analysis.hpp
+++ b/src/common/modules/path_extend/overlap_analysis.hpp
@@ -1,11 +1,10 @@
 #pragma once
 
 #include "utils/logger/logger.hpp"
-#include "utils/range.hpp"
+#include "sequence/range.hpp"
 #include "ssw/ssw_cpp.h"
 
 namespace debruijn_graph {
-using omnigraph::Range;
 
 struct OverlapInfo {
     Range r1;
@@ -89,6 +88,7 @@ public:
               /*mismatch_penalty*/3,
                        /*gap_opening_penalty*/4,
                        /*gap_extending_penalty*/3) {
+        DEBUG("Considered max overlap " << flank_length);
     }
 
 
diff --git a/src/common/modules/path_extend/path_extender.hpp b/src/common/modules/path_extend/path_extender.hpp
index 14ce6e4..b7ad73a 100644
--- a/src/common/modules/path_extend/path_extender.hpp
+++ b/src/common/modules/path_extend/path_extender.hpp
@@ -14,6 +14,7 @@
 #pragma once
 
 #include "extension_chooser.hpp"
+#include "assembly_graph/paths/bidirectional_path_container.hpp"
 #include "path_filter.hpp"
 #include "overlap_analysis.hpp"
 #include "assembly_graph/graph_support/scaff_supplementary.hpp"
@@ -21,347 +22,430 @@
 
 namespace path_extend {
 
-class ShortLoopResolver {
-public:
-    ShortLoopResolver(const Graph& g)
-            : g_(g) { }
-
-    virtual ~ShortLoopResolver() { }
+inline BidirectionalPath OptimizedConjugate(const BidirectionalPath &path) {
+    return path.GetConjPath() ? *path.GetConjPath() : path.Conjugate();
+}
 
-    virtual void ResolveShortLoop(BidirectionalPath& path) const = 0;
+//TODO think about symmetry and what if it breaks?
+class OverlapFindingHelper {
+    const Graph &g_;
+    const GraphCoverageMap &coverage_map_;
+    const size_t min_edge_len_;
+    const size_t max_diff_;
+    const bool try_extend_;
+
+    //TODO think of the cases when (gap + length) < 0
+    //Changes second argument on success
+    void TryExtendToEnd(const BidirectionalPath &path, size_t &pos) const {
+        if (pos < path.Size() &&
+                path.GapAt(pos).gap + path.LengthAt(pos) <= max_diff_)
+            pos = path.Size();
+    }
+
+    //Changes second argument on success
+    void TryExtendToStart(const BidirectionalPath &path, size_t &pos) const {
+        if (pos > 0 && path.Length() - path.LengthAt(pos) <= max_diff_)
+            pos = 0;
+    }
+
+    pair<Range, Range> ComparePaths(const BidirectionalPath &path1,
+                                    const BidirectionalPath &path2,
+                                    size_t start2) const {
+        TRACE("Comparing paths " << path1.GetId() << " and " << path2.GetId());
+        //TODO change to edit distance?
+        int shift1 = 0;
+        //path1 is always matched from the start
+        const size_t start1 = 0;
+        size_t end1 = start1;
+        size_t end2 = start2;
+
+        for (size_t i = start1; i < path1.Size(); ++i) {
+            if (abs(shift1) > int(max_diff_))
+                break;
 
-protected:
-    DECL_LOGGER("PathExtender")
-    const Graph& g_;
+            bool match = false;
+            size_t j = end2;
+            int shift2 = 0;
+            for (; j < path2.Size(); ++j) {
+                if (end1 == 0) {
+                    //Force first match to start with pos2
+                    if (j > start2) {
+                        break;
+                    }
+                }
 
-    void UndoCycles(BidirectionalPath& p, EdgeId next_edge) const {
-        if (p.Size() <= 2) {
-            return;
-        }
-        EdgeId first_edge = p.Back();
-        EdgeId second_edge = next_edge;
-        while (p.Size() > 2) {
-            if (p.At(p.Size() - 1) == first_edge && p.At(p.Size() - 2) == second_edge) {
-                p.PopBack(2);
+                if (abs(shift2) > int(max_diff_))
+                    break;
+                if (path1.At(i) == path2.At(j) &&
+                        (end1 == 0 ||
+                            abs(shift1 + path1.GapAt(i).gap - shift2 - path2.GapAt(j).gap) <= int(max_diff_))) {
+                    match = true;
+                    break;
+                } else {
+                    shift2 += path2.ShiftLength(j);
+                }
+            }
+            if (match) {
+                end1 = i+1;
+                end2 = j+1;
+                shift1 = 0;
             } else {
-                return;;
+                shift1 += path1.ShiftLength(i);
             }
         }
-    }
 
-    void MakeCycleStep(BidirectionalPath& path, EdgeId e) const {
-        if (path.Size() == 0) {
-            return;
+        //Extending the ends of the paths if possible
+        if (try_extend_ && end1 > 0) {
+            TryExtendToEnd(path1, end1);
+            TryExtendToEnd(path2, end2);
+            //no need to extend path1 left
+            VERIFY(start1 == 0);
+            TryExtendToStart(path2, start2);
         }
-        EdgeId pathEnd = path.Back();
-        path.PushBack(e);
-        path.PushBack(pathEnd);
+
+        return make_pair(Range(start1, end1), Range(start2, end2));
     }
-};
 
-class CovShortLoopResolver : public ShortLoopResolver {
 public:
-    CovShortLoopResolver(const conj_graph_pack& gp)
-            : ShortLoopResolver(gp.g), gp_(gp) {
-
-    }
-
-    void ResolveShortLoop(BidirectionalPath& path) const override {
-        DEBUG("resolve short loop by coverage");
-        path.Print();
-
-        pair<EdgeId, EdgeId> edges;
-        if (path.Size() >= 1 && GetLoopAndExit(g_, path.Back(), edges)) {
-            DEBUG("Coverage Short Loop Resolver");
-            UndoCycles(path, edges.first);
-            EdgeId e1 = path.Back();
-            EdgeId e2 = edges.first;
-            EdgeId e_out = edges.second;
-            auto prob_e_in = g_.IncomingEdges(g_.EdgeEnd(e2));
-            EdgeId e_in = *prob_e_in.begin();
-            size_t count = 0;
-            for (auto edge = prob_e_in.begin(); edge != prob_e_in.end(); ++edge) {
-                if (*edge != e2)
-                    e_in = *edge;
-                count++;
-            }
-            if (count != 2) {
-                return;
-            }
-            double in_cov = gp_.flanking_cov.GetOutCov(e_in); //g_.coverage(e_in);
-            double out_cov = gp_.flanking_cov.GetInCov(e_out); //g_.coverage(e_out);
-            double cov = (in_cov + out_cov) / 2.0;
-            //what are time variables???
-            double time1 = math::round(gp_.g.coverage(e1) / cov);
-            double time2 = math::round(gp_.g.coverage(e2) / cov);
-            size_t time = (size_t) std::max(0.0, std::min(time1 - 1.0, time2));
-            for (size_t i = 0; i < time; ++i) {
-                MakeCycleStep(path, edges.first);
+    OverlapFindingHelper(const Graph &g,
+                         const GraphCoverageMap &coverage_map,
+                         size_t min_edge_len,
+                         size_t max_diff) :
+            g_(g),
+            coverage_map_(coverage_map),
+            min_edge_len_(min_edge_len),
+            max_diff_(max_diff),
+            //had to enable try_extend, otherwise equality lost symmetry
+            try_extend_(max_diff_ > 0) {
+    }
+
+    bool IsSubpath(const BidirectionalPath &path,
+                   const BidirectionalPath &other) const {
+        for (size_t j = 0; j < other.Size(); ++j) {
+            auto range_pair = ComparePaths(path, other, j);
+            if (range_pair.first.end_pos == path.Size()) {
+                return true;
             }
-            path.PushBack(edges.second);
-            DEBUG("loop with start " << g_.int_id(e_in)
-                    <<" e1 " << g_.int_id(e1)
-                    << " e2 " << g_.int_id(e2)
-                    << " out " <<g_.int_id(e_out)
-                    << " cov in = " << in_cov
-                    << " cov out " << out_cov
-                    << " cov " << cov
-                  << " cov e1 = " << gp_.g.coverage(e1)
-                  << " cov e2 = " << gp_.g.coverage(e2)
-                  << " time1 = " << time1
-                  << " time2 = " << time2
-                  << " time = " << time);
         }
+        return false;
     }
-private:
-    const conj_graph_pack& gp_;
-};
-
-class SimpleLoopResolver : public ShortLoopResolver {
 
-public:
-    SimpleLoopResolver(Graph& g) : ShortLoopResolver(g) { }
-
-    void ResolveShortLoop(BidirectionalPath& path) const override {
-        pair<EdgeId, EdgeId> edges;
-        if (path.Size() >= 1 && GetLoopAndExit(g_, path.Back(), edges)) {
-            DEBUG("Resolving short loop...");
-            EdgeId e = path.Back();
-            path.PushBack(edges.first);
-            path.PushBack(e);
-            path.PushBack(edges.second);
-            DEBUG("Resolving short loop done");
-        }
+    //NB! Equality is not transitive if max_diff is > 0
+    bool IsEqual(const BidirectionalPath &path,
+                 const BidirectionalPath &other) const {
+        auto ends_pair = CommonPrefix(path, other);
+        return ends_pair.first == path.Size()
+               && ends_pair.second == other.Size();
     }
 
-protected:
-    DECL_LOGGER("PathExtender")
-};
 
-class LoopResolver : public ShortLoopResolver {
-    static const size_t ITER_COUNT = 10;
-    const WeightCounter& wc_;
+    pair<size_t, size_t> CommonPrefix(const BidirectionalPath &path1,
+                                      const BidirectionalPath &path2) const {
+        auto answer = make_pair(0, 0);
+        size_t cum = 0;
+        size_t max_overlap = 0;
+        for (size_t j = 0; j < path2.Size(); ++j) {
+            auto range_pair = ComparePaths(path1, path2, j);
+            if (range_pair.second.start_pos == 0 && range_pair.first.size() > max_overlap) {
+                answer = make_pair(range_pair.first.end_pos, range_pair.second.end_pos);
+                max_overlap = range_pair.first.size();
+            }
 
-private:
-    bool CheckLoopPlausible(EdgeId froward_loop_edge, EdgeId backward_loop_edge) const {
-        size_t single_loop_length = 2 * g_.length(froward_loop_edge) + g_.length(backward_loop_edge);
-        return single_loop_length <= wc_.get_libptr()->GetISMax();
-    }
+            if (!try_extend_)
+                break;
 
-public:
-    LoopResolver(const Graph& g, const WeightCounter& wc)
-            : ShortLoopResolver(g),
-              wc_(wc) { }
-    //This code works only if loop wasn't fairly resolved
-    //
-    //Weird interface; need comments
-    void MakeBestChoice(BidirectionalPath& path, pair<EdgeId, EdgeId>& edges) const {
-        UndoCycles(path, edges.first);
-        BidirectionalPath experiment(path);
-        double max_weight = wc_.CountWeight(experiment, edges.second);
-        double diff = max_weight - wc_.CountWeight(experiment, edges.first);
-        size_t maxIter = 0;
-        for (size_t i = 1; i <= ITER_COUNT; ++i) {
-            double weight = wc_.CountWeight(experiment, edges.first);
-            if (weight > 0) {
-                MakeCycleStep(experiment, edges.first);
-                weight = wc_.CountWeight(experiment, edges.second);
-                double weight2 = wc_.CountWeight(experiment, edges.first);
-                if (weight > max_weight || (weight == max_weight && weight - weight2 > diff)
-                    || (weight == max_weight && weight - weight2 == diff && i == 1)) {
-                    max_weight = weight;
-                    maxIter = i;
-                    diff = weight - weight2;
-                }
-            }
+            cum += path2.ShiftLength(j);
+            if (cum > max_diff_)
+                break;
         }
+        return answer;
+    };
+
+    //overlap is forced to start from the beginning of path1
+    pair<Range, Range> FindOverlap(const BidirectionalPath &path1,
+                                   const BidirectionalPath &path2,
+                                   bool end_start_only) const {
+        size_t max_overlap = 0;
+        pair<Range, Range> matching_ranges;
+        for (size_t j = 0; j < path2.Size(); ++j) {
+            auto range_pair = ComparePaths(path1, path2, j);
+            VERIFY(range_pair.first.start_pos == 0);
+            //checking if overlap is valid
+            if (end_start_only && range_pair.second.end_pos != path2.Size())
+                continue;
 
-        if (!CheckLoopPlausible(path.Back(), edges.first) && maxIter > 0) {
-            MakeCycleStep(path, edges.first);
-            path.PushBack(edges.second, int(g_.k() + 100));
-        }
-        else {
-            for (size_t i = 0; i < maxIter; ++i) {
-                MakeCycleStep(path, edges.first);
+            size_t overlap_size = range_pair.first.size();
+            if (overlap_size > max_overlap ||
+                //prefer overlaps with end of path2
+                (overlap_size == max_overlap &&
+                 range_pair.second.end_pos == path2.Size())) {
+                max_overlap = overlap_size;
+                matching_ranges = range_pair;
             }
-            path.PushBack(edges.second);
         }
-
+        return matching_ranges;
     }
 
-    void ResolveShortLoop(BidirectionalPath& path) const override {
-        pair<EdgeId, EdgeId> edges;
-        if (path.Size() >=1 && GetLoopAndExit(g_, path.Back(), edges)) {
-            DEBUG("Resolving short loop...");
-            MakeBestChoice(path, edges);
-            DEBUG("Resolving short loop done");
+    vector<const BidirectionalPath*> FindCandidatePaths(const BidirectionalPath &path) const {
+        set<const BidirectionalPath*> candidates;
+        size_t cum_len = 0;
+        for (size_t i = 0; i < path.Size(); ++i) {
+            if (cum_len > max_diff_)
+                break;
+            EdgeId e = path.At(i);
+            if (g_.length(e) >= min_edge_len_) {
+                utils::insert_all(candidates, coverage_map_.GetCoveringPaths(e));
+                cum_len += path.ShiftLength(i);
+            }
         }
+        return vector<const BidirectionalPath*>(candidates.begin(), candidates.end());
     }
 
+private:
+    DECL_LOGGER("OverlapFindingHelper");
 };
 
-class GapJoiner {
+inline void SubscribeCoverageMap(BidirectionalPath * path, GraphCoverageMap &coverage_map) {
+    path->Subscribe(&coverage_map);
+    for (size_t i = 0; i < path->Size(); ++i) {
+        coverage_map.BackEdgeAdded(path->At(i), path, path->GapAt(i));
+    }
+}
+
+inline BidirectionalPath* AddPath(PathContainer &paths,
+                                  const BidirectionalPath &path,
+                                  GraphCoverageMap &coverage_map) {
+    BidirectionalPath* p = new BidirectionalPath(path);
+    BidirectionalPath* conj_p = new BidirectionalPath(OptimizedConjugate(path));
+    SubscribeCoverageMap(p, coverage_map);
+    SubscribeCoverageMap(conj_p, coverage_map);
+    paths.AddPair(p, conj_p);
+    return p;
+}
 
+class ShortLoopEstimator {
 public:
-    static const int INVALID_GAP = -1000000;
-    GapJoiner(const Graph& g)
-            : g_(g) { }
-
-    virtual Gap FixGap( EdgeId source, EdgeId sink, int initial_gap) const = 0;
+    //Path must end with forward cycle edge, contain at least 2 edges and must not contain backward cycle edges
+    //Returns 0 (for no loops), 1 (for a single loop) or 2 (for many loops)
+    virtual size_t EstimateSimpleCycleCount(const BidirectionalPath& path, EdgeId backward_edge, EdgeId exit_edge) const = 0;
 
-    virtual ~GapJoiner() { }
-protected:
-    const Graph& g_;
+    virtual ~ShortLoopEstimator() {};
 };
 
-class SimpleGapJoiner : public GapJoiner {
-
+class ShortLoopResolver {
 public:
-    SimpleGapJoiner(const Graph& g) : GapJoiner(g) { }
+    static const size_t BASIC_N_CNT = 100;
 
-    Gap FixGap(EdgeId source, EdgeId sink, int initial_gap) const override {
-        if (initial_gap > 2 * (int) g_.k()) {
-            return Gap(initial_gap);
-        }
-        for (int l = (int) g_.k(); l > 0; --l) {
-            if (g_.EdgeNucls(sink).Subseq(g_.length(source) + g_.k() - l) == g_.EdgeNucls(sink).Subseq(0, l)) {
-                DEBUG("Found correct gap length");
-                DEBUG("Inintial: " << initial_gap << ", new gap: " << g_.k() - l);
-                return Gap((int) g_.k() - l);
-            }
+    ShortLoopResolver(const Graph& g, shared_ptr<ShortLoopEstimator> loop_estimator)
+            : g_(g), loop_estimator_(loop_estimator) { }
+
+    void ResolveShortLoop(BidirectionalPath& path) const {
+        EdgeId back_cycle_edge;
+        EdgeId loop_exit;
+        if (path.Size() >=1 && GetLoopAndExit(g_, path.Back(), back_cycle_edge, loop_exit)) {
+            DEBUG("Resolving short loop...");
+            MakeBestChoice(path, back_cycle_edge, loop_exit);
+            DEBUG("Resolving short loop done");
         }
-        DEBUG("Perfect overlap is not found, inintial: " << initial_gap);
-        return Gap(initial_gap);
     }
-};
-
-class HammingGapJoiner: public GapJoiner {
-    const double min_gap_score_;
-    const size_t short_overlap_threshold_;
-    const size_t basic_overlap_length_;
 
-    vector<size_t> DiffPos(const Sequence& s1, const Sequence& s2) const {
-        VERIFY(s1.size() == s2.size());
-        vector < size_t > answer;
-        for (size_t i = 0; i < s1.size(); ++i)
-            if (s1[i] != s2[i])
-                answer.push_back(i);
-        return answer;
-    }
+private:
+    DECL_LOGGER("PathExtender")
+    const Graph& g_;
+    shared_ptr<ShortLoopEstimator> loop_estimator_;
 
-    size_t HammingDistance(const Sequence& s1, const Sequence& s2) const {
-        VERIFY(s1.size() == s2.size());
-        size_t dist = 0;
-        for (size_t i = 0; i < s1.size(); ++i) {
-            if (s1[i] != s2[i]) {
-                dist++;
+    void UndoCycles(BidirectionalPath& p, EdgeId next_edge) const {
+        if (p.Size() <= 2) {
+            return;
+        }
+        EdgeId first_edge = p.Back();
+        EdgeId second_edge = next_edge;
+        while (p.Size() > 2) {
+            if (p.At(p.Size() - 1) == first_edge && p.At(p.Size() - 2) == second_edge) {
+                p.PopBack(2);
+            } else {
+                return;;
             }
         }
-        return dist;
     }
 
-//    double ScoreGap(const Sequence& s1, const Sequence& s2, int gap, int initial_gap) const {
-//        VERIFY(s1.size() == s2.size());
-//        return 1.0 - (double) HammingDistance(s1, s2) / (double) s1.size()
-//                - (double) abs(gap - initial_gap) / (double) (2 * g_.k());
-//    }
-
+    //edges -- first edge is loop's back edge, second is loop exit edge
+    void MakeBestChoice(BidirectionalPath& path, EdgeId back_cycle_edge, EdgeId loop_exit) const {
+        EdgeId forward_cycle_edge = path.Back();
+        UndoCycles(path, back_cycle_edge);
 
-    double ScoreGap(const Sequence& s1, const Sequence& s2) const {
-        VERIFY(s1.size() == s2.size());
-        return 1.0 - (double) HammingDistance(s1, s2) / (double) s1.size();
+        //Expects 0 (for no loops), 1 (for a single loop) or 2 (for many loops, will insert back_cycle_edge and Ns)
+        size_t loop_count = loop_estimator_->EstimateSimpleCycleCount(path, back_cycle_edge, loop_exit);
+        if (loop_count > 0) {
+            path.PushBack(back_cycle_edge);
+            if (loop_count == 1) {
+                DEBUG("Single loop");
+                path.PushBack(forward_cycle_edge);
+                path.PushBack(loop_exit);
+            }
+            else {
+                DEBUG("Multiple cycles");
+                path.PushBack(loop_exit, Gap(int(g_.k() + BASIC_N_CNT)));
+            }
+        }
+        else {
+            path.PushBack(loop_exit);
+        }
     }
+};
 
+class CoverageLoopEstimator : public ShortLoopEstimator {
 public:
+    CoverageLoopEstimator(const Graph& g, const FlankingCoverage<Graph>& flanking_cov)
+            : g_(g), flanking_cov_(flanking_cov) {
+
+    }
+
+    //Path must end with forward cycle edge, contain at least 2 edges and must not contain backward cycle edges
+    //Returns 0 (for no loops), 1 (for a single loop) or 2 (for many loops)
+    size_t EstimateSimpleCycleCount(const BidirectionalPath& path, EdgeId backward_edge, EdgeId exit_edge) const override {
+        VERIFY(path.Size() > 1);
+        EdgeId forward_edge = path.Back();
+        EdgeId incoming_edge = path[path.Size() - 2];
+        double in_cov = flanking_cov_.GetOutCov(incoming_edge);
+        double out_cov = flanking_cov_.GetInCov(exit_edge);
+        double avg_coverage = (in_cov + out_cov) / 2.0;
+
+        double fwd_count = math::round(g_.coverage(forward_edge) / avg_coverage);
+        double back_count = math::round(g_.coverage(backward_edge) / avg_coverage);
+        size_t result = (size_t) math::round(std::max(0.0, std::min(fwd_count - 1.0, back_count)));
+
+        DEBUG("loop with start " << g_.int_id(incoming_edge)
+                <<" e1 " << g_.int_id(forward_edge)
+                << " e2 " << g_.int_id(backward_edge)
+                << " out " <<g_.int_id(exit_edge)
+                << " cov in = " << in_cov
+                << " cov out " << out_cov
+                << " cov " << avg_coverage
+                << " cov e1 = " << g_.coverage(forward_edge)
+                << " cov e2 = " << g_.coverage(backward_edge)
+                << " fwd_count = " << fwd_count
+                << " back_count = " << back_count
+                << " result = " << result);
 
-    //todo review parameters in usages
-    HammingGapJoiner(const Graph& g,
-            double min_gap_score,
-            size_t short_overlap_threshold,
-            size_t basic_overlap_length):
-                GapJoiner(g),
-                min_gap_score_(min_gap_score),
-                short_overlap_threshold_(short_overlap_threshold),
-                basic_overlap_length_(basic_overlap_length)
-    {
-        DEBUG("HammingGapJoiner params: \n min_gap_score " << min_gap_score_ <<
-              "\n short_overlap_threshold " << short_overlap_threshold_ <<
-              "\n basic_overlap_length " << basic_overlap_length_);
+        return result;
     }
 
-    //estimated_gap is in k-mers
-    Gap FixGap(EdgeId source, EdgeId sink, int estimated_gap) const override {
-
-        size_t corrected_start_overlap = basic_overlap_length_;
-        if (estimated_gap < 0) {
-            corrected_start_overlap -= estimated_gap;
-        }
-
-        corrected_start_overlap = min(corrected_start_overlap,
-                                      g_.k() + min(g_.length(source), g_.length(sink)));
-
-        DEBUG("Corrected max overlap " << corrected_start_overlap);
+private:
+    const Graph& g_;
+    const FlankingCoverage<Graph>& flanking_cov_;
+};
 
-        double best_score = min_gap_score_;
-        int fixed_gap = INVALID_GAP;
+class PairedInfoLoopEstimator: public ShortLoopEstimator {
+    const Graph& g_;
+    shared_ptr<WeightCounter> wc_;
+    double weight_threshold_;
 
-        double overlap_coeff = 0.3;
-        size_t min_overlap = 1ul;
-        if (estimated_gap < 0) {
-            size_t estimated_overlap = g_.k() - estimated_gap;
-            min_overlap = max(size_t(math::round(overlap_coeff * double(estimated_overlap))), 1ul);
+public:
+    PairedInfoLoopEstimator(const Graph& g, shared_ptr<WeightCounter> wc, double weight_threshold = 0.0)
+            : g_(g),
+              wc_(wc),
+              weight_threshold_(weight_threshold) { }
+
+    //Path must end with forward cycle edge, contain at least 2 edges and must not contain backward cycle edges
+    //Returns 0 (for no loops), 1 (for a single loop) or 2 (for many loops)
+    size_t EstimateSimpleCycleCount(const BidirectionalPath& path, EdgeId backward_edge, EdgeId /*exit_edge*/) const override {
+        VERIFY(path.Size() > 1);
+        VERIFY(wc_ != nullptr);
+        EdgeId forward_cycle_edge = path.Back();
+
+        size_t result = 0;
+        double lopp_edge_weight = wc_->CountWeight(path, backward_edge);
+        if (math::gr(lopp_edge_weight, weight_threshold_)) {
+            //Paired information on loop back edges exits => at leat one iteration
+            //Looking for paired information supporting more than 1 cycle
+            if (NoSelfPairedInfo(backward_edge, forward_cycle_edge)) {
+                //More likely to be a single cycle
+                DEBUG("Single loop");
+                result = 1;
+            }
+            else {
+                DEBUG("Multiple cycles");
+                //More likely to be a 2 or more cycles
+                result = 2;
+            }
         }
-        //todo better usage of estimated overlap
-        DEBUG("Min overlap " << min_overlap);
+        return result;
+    }
 
-        for (size_t l = corrected_start_overlap; l >= min_overlap; --l) {
-            //TRACE("Sink: " << g_.EdgeNucls(sink).Subseq(g_.length(sink) + g_.k() - l).str());
-            //TRACE("Source: " << g_.EdgeNucls(source).Subseq(0, l));
-            double score = 0;
-            score = ScoreGap(g_.EdgeNucls(source).Subseq(g_.length(source) + g_.k() - l),
-                                    g_.EdgeNucls(sink).Subseq(0, l));
-            if (math::gr(score, best_score)) {
-                TRACE("Curr overlap " << l);
-                TRACE("Score: " << score);
-                best_score = score;
-                fixed_gap = int(g_.k() - l);
-            }
+private:
 
-            if (l == short_overlap_threshold_ && fixed_gap != INVALID_GAP) {
-                //look at "short" overlaps only if long overlaps couldn't be found
-                DEBUG("Not looking at short overlaps");
+    bool NoSelfPairedInfo(EdgeId back_cycle_edge, EdgeId forward_cycle_edge) const {
+        size_t is = wc_->PairedLibrary().GetISMax();
+        int forward_len = (int) g_.length(forward_cycle_edge);
+        bool exists_pi = true;
+
+        BidirectionalPath cycle(g_, back_cycle_edge);
+        while (cycle.Length() < is + g_.length(back_cycle_edge)) {
+            auto w = wc_->CountWeight(cycle, back_cycle_edge, std::set<size_t>(), forward_len);
+            if (math::gr(w, weight_threshold_)) {
+                //Paired information found within loop
+                DEBUG("Found PI with back weight " << w << ", weight threshold " << weight_threshold_);
+                exists_pi = false;
                 break;
             }
+            cycle.PushBack(back_cycle_edge, Gap(forward_len));
         }
 
-        if (fixed_gap != INVALID_GAP) {
-            DEBUG("Found candidate gap length with score " << best_score);
-            DEBUG("Estimated gap: " << estimated_gap <<
-                  ", fixed gap: " << fixed_gap << " (overlap " << g_.k() - fixed_gap<< ")");
+        return exists_pi;
+    }
+};
+
+class CombinedLoopEstimator: public ShortLoopEstimator {
+public:
+    CombinedLoopEstimator(const Graph& g,
+                          const FlankingCoverage<Graph>& flanking_cov,
+                          shared_ptr<WeightCounter> wc,
+                          double weight_threshold = 0.0)
+        : pi_estimator_(g, wc, weight_threshold),
+          cov_estimator_(g, flanking_cov) {}
+
+    //Path must end with forward cycle edge, contain at least 2 edges and must not contain backward cycle edges
+    //Returns 0 (for no loops), 1 (for a single loop) or 2 (for many loops)
+    size_t EstimateSimpleCycleCount(const BidirectionalPath& path, EdgeId backward_edge, EdgeId exit_edge) const override {
+        size_t result = pi_estimator_.EstimateSimpleCycleCount(path, backward_edge, exit_edge);
+        if (result == 1) {
+            //Verify using coverage
+            if (cov_estimator_.EstimateSimpleCycleCount(path, backward_edge, exit_edge) > 1)
+                result = 2;
         }
-        return Gap(fixed_gap);
+        return result;
     }
 
 private:
-    DECL_LOGGER("HammingGapJoiner");
+    PairedInfoLoopEstimator pi_estimator_;
+    CoverageLoopEstimator cov_estimator_;
 };
 
-//deprecated!
-//fixme reduce code duplication with HammingGapJoiner
-class LikelihoodHammingGapJoiner: public GapJoiner {
-    static const size_t DEFAULT_PADDING_LENGTH = 10;
+
+
+//TODO move to gap_closing.hpp
+typedef omnigraph::GapDescription<Graph> GapDescription;
+class GapAnalyzer {
+
+public:
+    static const int INVALID_GAP = GapDescription::INVALID_GAP;
+    GapAnalyzer(const Graph& g)
+            : g_(g) { }
+
+    virtual GapDescription FixGap(const GapDescription &gap) const = 0;
+
+    virtual ~GapAnalyzer() { }
+protected:
+    const Graph& g_;
+};
+
+class HammingGapAnalyzer: public GapAnalyzer {
     const double min_gap_score_;
     const size_t short_overlap_threshold_;
     const size_t basic_overlap_length_;
 
-    vector<size_t> DiffPos(const Sequence& s1, const Sequence& s2) const {
-        VERIFY(s1.size() == s2.size());
-        vector < size_t > answer;
-        for (size_t i = 0; i < s1.size(); ++i)
-            if (s1[i] != s2[i])
-                answer.push_back(i);
-        return answer;
-    }
+    static constexpr double MIN_OVERLAP_COEFF = 0.05;
 
     size_t HammingDistance(const Sequence& s1, const Sequence& s2) const {
         VERIFY(s1.size() == s2.size());
@@ -374,80 +458,65 @@ class LikelihoodHammingGapJoiner: public GapJoiner {
         return dist;
     }
 
-//    double ScoreGap(const Sequence& s1, const Sequence& s2, int gap, int initial_gap) const {
-//        VERIFY(s1.size() == s2.size());
-//        return 1.0 - (double) HammingDistance(s1, s2) / (double) s1.size()
-//                - (double) abs(gap - initial_gap) / (double) (2 * g_.k());
-//    }
-
-    //FIXME use GC content, change match prob and use partition of tip sequence into bad and good part
     double ScoreGap(const Sequence& s1, const Sequence& s2) const {
-        static double match_prob = 0.9;
-        static double log_match_prob = log2(match_prob);
-        static double log_mismatch_prob = log2(1. - match_prob);
         VERIFY(s1.size() == s2.size());
-        size_t n = s1.size();
-        size_t mismatches = HammingDistance(s1, s2);
-        VERIFY(mismatches <= n);
-        return 2.*double(n) + double(n - mismatches) * log_match_prob + double(mismatches) * log_mismatch_prob;
+        return 1.0 - (double) HammingDistance(s1, s2) / (double) s1.size();
     }
 
 public:
 
     //todo review parameters in usages
-    LikelihoodHammingGapJoiner(const Graph& g,
+    HammingGapAnalyzer(const Graph& g,
             double min_gap_score,
             size_t short_overlap_threshold,
             size_t basic_overlap_length):
-                GapJoiner(g),
+                GapAnalyzer(g),
                 min_gap_score_(min_gap_score),
                 short_overlap_threshold_(short_overlap_threshold),
                 basic_overlap_length_(basic_overlap_length)
     {
-        DEBUG("LikelihoodHammingGapJoiner params: \n min_gap_score " << min_gap_score_ <<
+        DEBUG("HammingGapAnalyzer params: \n min_gap_score " << min_gap_score_ <<
               "\n short_overlap_threshold " << short_overlap_threshold_ <<
               "\n basic_overlap_length " << basic_overlap_length_);
     }
 
-    //estimated_gap is in k-mers
-    Gap FixGap(EdgeId source, EdgeId sink, int estimated_gap) const override {
+    GapDescription FixGap(const GapDescription &gap) const override {
+        VERIFY_MSG(gap.no_trim(), "Trims not supported yet");
 
-        size_t corrected_start_overlap = basic_overlap_length_;
-        if (estimated_gap < 0) {
-            corrected_start_overlap -= estimated_gap;
+        size_t max_overlap = basic_overlap_length_;
+        if (gap.estimated_dist() < 0) {
+            max_overlap -= gap.estimated_dist();
         }
 
-        corrected_start_overlap = min(corrected_start_overlap,
-                                      g_.k() + min(g_.length(source), g_.length(sink)));
+        max_overlap = min(max_overlap,
+                                      g_.k() + min(g_.length(gap.left()), g_.length(gap.right())));
 
-        DEBUG("Corrected max overlap " << corrected_start_overlap);
+        DEBUG("Corrected max overlap " << max_overlap);
 
         double best_score = min_gap_score_;
-        int fixed_gap = INVALID_GAP;
+        int fixed_gap = GapDescription::INVALID_GAP;
 
-        double overlap_coeff = 0.3;
-        size_t min_overlap = 1ul;
-        if (estimated_gap < 0) {
-            size_t estimated_overlap = g_.k() - estimated_gap;
-            min_overlap = max(size_t(math::round(overlap_coeff * double(estimated_overlap))), 1ul);
+        size_t min_overlap = 1;
+        if (gap.estimated_dist() < 0) {
+            min_overlap = max(min_overlap, size_t(math::round(MIN_OVERLAP_COEFF * double(-gap.estimated_dist()))));
         }
         //todo better usage of estimated overlap
         DEBUG("Min overlap " << min_overlap);
 
-        for (size_t l = corrected_start_overlap; l >= min_overlap; --l) {
+        for (size_t l = max_overlap; l >= min_overlap; --l) {
             //TRACE("Sink: " << g_.EdgeNucls(sink).Subseq(g_.length(sink) + g_.k() - l).str());
             //TRACE("Source: " << g_.EdgeNucls(source).Subseq(0, l));
             double score = 0;
-            score = ScoreGap(g_.EdgeNucls(source).Subseq(g_.length(source) + g_.k() - l),
-                                    g_.EdgeNucls(sink).Subseq(0, l));
+            score = ScoreGap(g_.EdgeNucls(gap.left()).Subseq(g_.length(gap.left()) + g_.k() - l),
+                                    g_.EdgeNucls(gap.right()).Subseq(0, l));
             if (math::gr(score, best_score)) {
                 TRACE("Curr overlap " << l);
                 TRACE("Score: " << score);
                 best_score = score;
-                fixed_gap = int(g_.k() - l);
+                fixed_gap = -int(l);
             }
 
-            if (l == short_overlap_threshold_ && fixed_gap != INVALID_GAP) {
+            if (l == short_overlap_threshold_ && fixed_gap != GapDescription::INVALID_GAP) {
                 //look at "short" overlaps only if long overlaps couldn't be found
                 DEBUG("Not looking at short overlaps");
                 break;
@@ -456,165 +525,144 @@ public:
 
         if (fixed_gap != INVALID_GAP) {
             DEBUG("Found candidate gap length with score " << best_score);
-            DEBUG("Estimated gap: " << estimated_gap <<
-                  ", fixed gap: " << fixed_gap << " (overlap " << g_.k() - fixed_gap<< ")");
+            DEBUG("Estimated gap: " << gap.estimated_dist() <<
+                  ", fixed gap: " << fixed_gap << " (overlap " << (-fixed_gap) << ")");
+
+            auto answer = gap;
+            answer.set_estimated_dist(fixed_gap);
+            return answer;
+        } else {
+            return GapDescription();
         }
-        return Gap(fixed_gap);
     }
 
 private:
-    DECL_LOGGER("LikelihoodHammingGapJoiner");
+    DECL_LOGGER("HammingGapAnalyzer");
 };
 
-//if I was in LA
-class LAGapJoiner: public GapJoiner {
+//LA stands for Local Alignment
+//TODO if current setting will work -- get rid of flank_*_coefficient params
+class LAGapAnalyzer: public GapAnalyzer {
 public:
-    LAGapJoiner(const Graph& g, size_t min_la_length,
+    LAGapAnalyzer(const Graph& g, size_t min_la_length,
             double flank_multiplication_coefficient,
-            double flank_addition_coefficient) :
-            GapJoiner(g), min_la_length_(min_la_length), flank_addition_coefficient_(
-                    flank_addition_coefficient), flank_multiplication_coefficient_(
-                    flank_multiplication_coefficient) {
-        DEBUG("flank_multiplication_coefficient - " << flank_multiplication_coefficient_); 
-        DEBUG("flank_addition_coefficient_  - " << flank_addition_coefficient_ );
+            int flank_addition_coefficient) :
+            GapAnalyzer(g),
+            min_la_length_(min_la_length),
+            flank_multiplication_coefficient_(flank_multiplication_coefficient),
+            flank_addition_coefficient_(flank_addition_coefficient) {
+        DEBUG("flank_multiplication_coefficient - " << flank_multiplication_coefficient_);
+        DEBUG("flank_addition_coefficient  - " << flank_addition_coefficient_ );
     }
 
-    Gap FixGap(EdgeId source, EdgeId sink, int initial_gap) const override {
-
-        DEBUG("Overlap doesn't exceed " << size_t(abs(initial_gap) * ESTIMATED_GAP_MULTIPLIER) + GAP_ADDITIONAL_COEFFICIENT);
-        SWOverlapAnalyzer overlap_analyzer(
-                size_t(abs(initial_gap) * ESTIMATED_GAP_MULTIPLIER) + GAP_ADDITIONAL_COEFFICIENT);
+    GapDescription FixGap(const GapDescription &gap) const override {
+        VERIFY_MSG(gap.no_trim(), "Trims not supported yet");
+        //estimated_gap is in k-mers
 
-        auto overlap_info = overlap_analyzer.AnalyzeOverlap(g_, source,
-                sink);
+        size_t estimated_overlap = gap.estimated_dist() < 0 ? size_t(abs(gap.estimated_dist())) : 0;
+        SWOverlapAnalyzer overlap_analyzer(size_t(math::round(double(estimated_overlap) * ESTIMATED_GAP_MULTIPLIER))
+                                           + GAP_ADDITIONAL_COEFFICIENT);
 
+        auto overlap_info = overlap_analyzer.AnalyzeOverlap(g_, gap.left(), gap.right());
         DEBUG(overlap_info);
 
         if (overlap_info.size() < min_la_length_) {
             DEBUG("Low alignment size");
-            return Gap(INVALID_GAP);
+            return GapDescription();
         }
 
         size_t max_flank_length = max(overlap_info.r2.start_pos,
-                g_.length(source) + g_.k() - overlap_info.r1.end_pos);
+                g_.length(gap.left()) + g_.k() - overlap_info.r1.end_pos);
         DEBUG("Max flank length - " << max_flank_length);
 
-        if ((double) max_flank_length * flank_multiplication_coefficient_
-                + flank_addition_coefficient_ > (double) overlap_info.size()) {
+        if (int(math::round(double(max_flank_length) * flank_multiplication_coefficient_))
+                + flank_addition_coefficient_ > int(overlap_info.size())) {
             DEBUG("Too long flanks for such alignment");
-            return Gap(INVALID_GAP);
+            return GapDescription();
         }
 
         if (math::ls(overlap_info.identity(), IDENTITY_RATIO)) {
             DEBUG("Low identity score");
-            return Gap(INVALID_GAP);
+            return GapDescription();
         }
 
-        if (g_.k() + 1 > overlap_info.r1.end_pos) {
-            DEBUG("Save kmers. Don't want to have edges shorter than k");
-            return Gap(INVALID_GAP);
+        if (overlap_info.r1.end_pos <= g_.k() || overlap_info.r2.start_pos >= g_.length(gap.right())) {
+            DEBUG("Less than k+1 nucleotides were left of one of the edges");
+            return GapDescription();
         }
 
-        if (overlap_info.r2.start_pos > g_.length(sink)) {
-            DEBUG("Save kmers. Don't want to have edges shorter than k");
-            return Gap(INVALID_GAP);
-        }
-
-        return Gap(
-                (int) (-overlap_info.r1.size() - overlap_info.r2.start_pos
-                        + g_.k()),
-                (uint32_t) (g_.length(source) + g_.k()
-                        - overlap_info.r1.end_pos),
-                (uint32_t) overlap_info.r2.start_pos);
+        //TODO Is it ok to have a non-symmetric overlap gap description
+        return GapDescription(gap.left(), gap.right(),
+                              -int(overlap_info.r2.size()),
+                              g_.length(gap.left()) + g_.k() - overlap_info.r1.end_pos,
+                              overlap_info.r2.start_pos);
     }
 
 private:
-    DECL_LOGGER("LAGapJoiner");
+    DECL_LOGGER("LAGapAnalyzer");
     const size_t min_la_length_;
-    const double flank_addition_coefficient_;
     const double flank_multiplication_coefficient_;
-    constexpr static double IDENTITY_RATIO = 0.9;
-    constexpr static double ESTIMATED_GAP_MULTIPLIER = 2.0;
-    const size_t GAP_ADDITIONAL_COEFFICIENT = 30;
+    const int flank_addition_coefficient_;
+
+    static constexpr double IDENTITY_RATIO = 0.9;
+    static constexpr double ESTIMATED_GAP_MULTIPLIER = 2.0;
+    static constexpr size_t GAP_ADDITIONAL_COEFFICIENT = 30;
 };
 
 
-class CompositeGapJoiner: public GapJoiner {
+class CompositeGapAnalyzer: public GapAnalyzer {
 public:
 
-    CompositeGapJoiner(const Graph& g, 
-                       const vector<shared_ptr<GapJoiner>>& joiners, 
-                       size_t may_overlap_threhold, 
-                       int must_overlap_threhold, 
-                       size_t artificail_gap) :
-            GapJoiner(g), 
-            joiners_(joiners), 
-            may_overlap_threshold_(may_overlap_threhold), 
-            must_overlap_threshold_(must_overlap_threhold), 
-            artificial_gap_(artificail_gap)
-            {  }
-
-    Gap FixGap(EdgeId source, EdgeId sink, int estimated_gap) const override {
-        DEBUG("Trying to fix estimated gap " << estimated_gap <<
-                " between " << g_.str(source) << " and " << g_.str(sink));
-
-        if (estimated_gap > int(g_.k() + may_overlap_threshold_)) {
+    CompositeGapAnalyzer(const Graph& g,
+                       const vector<shared_ptr<GapAnalyzer>>& joiners,
+                       size_t may_overlap_threshold,
+                       int must_overlap_threshold,
+                       size_t artificial_gap) :
+            GapAnalyzer(g),
+            joiners_(joiners),
+            may_overlap_threshold_(may_overlap_threshold),
+            must_overlap_threshold_(must_overlap_threshold),
+            artificial_gap_(artificial_gap)
+    {  }
+
+    GapDescription FixGap(const GapDescription &gap) const override {
+        VERIFY_MSG(gap.right_trim() == 0 && gap.left_trim() == 0, "Not supported yet");
+        DEBUG("Trying to fix estimated gap " << gap.estimated_dist() <<
+              " between " << g_.str(gap.left()) << " and " << g_.str(gap.right()));
+
+        if (gap.estimated_dist() > int(may_overlap_threshold_)) {
             DEBUG("Edges are supposed to be too far to check overlaps");
-            return Gap(estimated_gap);
+            return gap;
         }
 
         for (auto joiner : joiners_) {
-            Gap gap = joiner->FixGap(source, sink, estimated_gap);
-            if (gap.gap_ != GapJoiner::INVALID_GAP) {
-                return gap;
+            GapDescription fixed_gap = joiner->FixGap(gap);
+            if (fixed_gap != GapDescription()) {
+                return fixed_gap;
             }
         }
 
         //couldn't find decent overlap
-        if (estimated_gap < must_overlap_threshold_) {
+        if (gap.estimated_dist() < must_overlap_threshold_) {
             DEBUG("Estimated gap looks unreliable");
-            return Gap(INVALID_GAP);
+            return GapDescription();
         } else {
             DEBUG("Overlap was not found");
-            return Gap(max(estimated_gap, int(g_.k() + artificial_gap_)));
+            auto answer = gap;
+            answer.set_estimated_dist(max(gap.estimated_dist(), int(artificial_gap_)));
+            return answer;
         }
     }
 
 private:
-    vector<shared_ptr<GapJoiner>> joiners_;
+    vector<shared_ptr<GapAnalyzer>> joiners_;
     const size_t may_overlap_threshold_;
     const int must_overlap_threshold_;
     const size_t artificial_gap_;
 
-    DECL_LOGGER("CompositeGapJoiner");
+    DECL_LOGGER("CompositeGapAnalyzer");
 };
 
-//FIXME move to tests
-//Just for test. Look at overlap_analysis_tests
-inline Gap MimicLAGapJoiner(Sequence& s1, Sequence& s2) {
-    const int INVALID_GAP = -1000000;
-    constexpr static double IDENTITY_RATIO = 0.9;
-
-    SWOverlapAnalyzer overlap_analyzer_(10000);
-    auto overlap_info = overlap_analyzer_.AnalyzeOverlap(s1, s2);
-    size_t min_la_length_ = 4;
-    if (overlap_info.size() < min_la_length_) {
-        DEBUG("Low alignment size");
-        return Gap(INVALID_GAP);
-    }
-    if (overlap_info.identity() < IDENTITY_RATIO) {
-        DEBUG("Low identity score");
-        return Gap(INVALID_GAP);
-    }
-    std::cout << overlap_info;
-
-    return Gap(
-            (int) (-overlap_info.r1.size() - overlap_info.r2.start_pos),
-            (uint32_t) (s1.size() - overlap_info.r1.end_pos),
-            (uint32_t) overlap_info.r2.start_pos);
-}
-
-
 //Detects a cycle as a minsuffix > IS present earlier in the path. Overlap is allowed.
 class InsertSizeLoopDetector {
 protected:
@@ -722,11 +770,11 @@ public:
                 VERIFY (last_cycle_pos == start_cycle_pos);
                 DEBUG("find cycle " << last_cycle_pos);
                 DEBUG("path");
-                path.Print();
+                path.PrintDEBUG();
                 DEBUG("last subpath");
-                path.SubPath(last_cycle_pos).Print();
+                path.SubPath(last_cycle_pos).PrintDEBUG();
                 DEBUG("cycle");
-                cycle->Print();
+                cycle->PrintDEBUG();
                 DEBUG("last_cycle_pos " << last_cycle_pos << " path size " << path.Size());
                 VERIFY(last_cycle_pos <= (int)path.Size());
                 DEBUG("last cycle pos + cycle " << last_cycle_pos + (int)cycle->Size());
@@ -748,254 +796,58 @@ public:
         visited_cycles_coverage_map_.Subscribe(p);
         visited_cycles_coverage_map_.Subscribe(cp);
         DEBUG("add cycle");
-        p->Print();
-    }
-};
-
-class RepeatDetector {
-public:
-    RepeatDetector(const Graph& g, const GraphCoverageMap& cov_map, size_t max_repeat_len)
-            : g_(g),
-              cov_map_(cov_map),
-              used_paths_(),
-              repeat_len_(max_repeat_len){
-        empty_ = new BidirectionalPath(g_);
-    }
-    ~RepeatDetector() {
-        delete empty_;
-    }
-
-    BidirectionalPath* RepeatPath(const BidirectionalPath& p) {
-        if (p.Size() == 0) {
-            return empty_;
-        }
-        EdgeId last_e = p.Back();
-        BidirectionalPathSet cov_paths = cov_map_.GetCoveringPaths(last_e);
-        DEBUG("cov paths for e " << g_.int_id(last_e) << " size " << cov_paths.size());
-        size_t max_common_size = 0;
-        BidirectionalPath* result_p = empty_;
-        for (BidirectionalPath* cov_p : cov_paths) {
-            if (used_paths_.find(cov_p) == used_paths_.end() || cov_p == &p || cov_p == p.GetConjPath()) {
-                continue;
-            }
-            size_t common_size = MaxCommonSize(p, *cov_p);
-            DEBUG("max comon size with path " << cov_p->GetId() << " is " << common_size);
-            if (common_size == 0) {
-                continue;
-            }
-            VERIFY(common_size <= p.Size());
-            if (p.LengthAt(p.Size() - common_size) > repeat_len_) {
-                DEBUG("repeat from " << (p.Size() - common_size) << " length " << p.LengthAt(p.Size() - common_size) << " repeat length " << repeat_len_);
-                max_common_size = max(common_size, max_common_size);
-                result_p = cov_p;
-            }
-        }
-        used_paths_.insert(&p);
-        DEBUG("max common size " << max_common_size);
-        return result_p;
-    }
-    size_t MaxCommonSize(const BidirectionalPath& p1, const BidirectionalPath& p2) const {
-        DEBUG("max coomon size ")
-        EdgeId last_e = p1.Back();
-        vector<size_t> positions2 = p2.FindAll(last_e);
-        DEBUG("pos size " << positions2.size())
-        size_t max_common_size = 0;
-        for (size_t pos2 : positions2) {
-            size_t common_size = MaxCommonSize(p1, p1.Size() - 1, p2, pos2);
-            DEBUG("max common size from " << pos2 << " is " << common_size);
-            max_common_size = max(max_common_size, common_size);
-        }
-        return max_common_size;
-    }
-private:
-    size_t MaxCommonSize(const BidirectionalPath& p1, size_t pos1, const BidirectionalPath& p2, size_t pos2) const {
-        int i1 = (int) pos1;
-        int i2 = (int) pos2;
-        while (i1 >= 0 && i2 >= 0 &&
-                p1.At((size_t) i1) == p2.At((size_t) i2) &&
-                p1.GapAt((size_t) i1) == p2.GapAt((size_t) i2)) {
-            i1--;
-            i2--;
-        }
-        if (i1 >=0 && i2>=0 && p1.At((size_t) i1) == p2.At((size_t) i2)) {
-            i1--;
-            i2--;
-        }
-
-        VERIFY(i1 <= (int)pos1);
-        return std::max(size_t((int) pos1 - i1), (size_t)1);
-    }
-    const Graph& g_;
-    const GraphCoverageMap& cov_map_;
-    set<const BidirectionalPath*> used_paths_;
-    size_t repeat_len_;
-    BidirectionalPath* empty_;
-};
-
-class ContigsMaker {
-public:
-    ContigsMaker(const Graph & g)
-            : g_(g) { }
-
-    virtual ~ContigsMaker() { }
-
-    virtual void GrowPath(BidirectionalPath& path, PathContainer* paths_storage = nullptr) = 0;
-
-    virtual void GrowPathSimple(BidirectionalPath& path, PathContainer* paths_storage = nullptr) = 0;
-
-    virtual void GrowAll(PathContainer & paths, PathContainer& paths_storage) = 0;
-
-protected:
-    const Graph& g_;
-    DECL_LOGGER("PathExtender")
-};
-
-struct UsedUniqueStorage {
-    set<EdgeId> used_;
-
-    const ScaffoldingUniqueEdgeStorage& unique_;
-
-    UsedUniqueStorage(const ScaffoldingUniqueEdgeStorage& unique ):used_(), unique_(unique) {}
-
-    void insert(EdgeId e) {
-        if (unique_.IsUnique(e)) {
-            used_.insert(e);
-            used_.insert(e->conjugate());
-        }
+        p->PrintDEBUG();
     }
-
-    bool IsUsedAndUnique(EdgeId e) const {
-        return (unique_.IsUnique(e) && used_.find(e) != used_.end());
-    }
-
-    bool UniqueCheckEnabled() const {
-        return unique_.size() > 0;
-    }
-
-
 };
 
 class PathExtender {
 public:
-    PathExtender(const Graph & g):
-        g_(g){ }
+    explicit PathExtender(const Graph &g):
+        g_(g) { }
 
     virtual ~PathExtender() { }
 
     virtual bool MakeGrowStep(BidirectionalPath& path, PathContainer* paths_storage = nullptr) = 0;
 
-    void AddUniqueEdgeStorage(shared_ptr<UsedUniqueStorage> used_storage) {
-        used_storage_ = used_storage;
-    }
 protected:
-    const Graph& g_;
-    shared_ptr<UsedUniqueStorage> used_storage_;
+    const Graph &g_;
     DECL_LOGGER("PathExtender")
 };
 
-class CompositeExtender : public ContigsMaker {
+class CompositeExtender {
 public:
+
     CompositeExtender(const Graph &g, GraphCoverageMap& cov_map,
-                      size_t max_diff_len,
-                      size_t max_repeat_length,
-                      bool detect_repeats_online)
-            : ContigsMaker(g),
-              cover_map_(cov_map),
-              repeat_detector_(g, cover_map_, 2 * max_repeat_length),
-              extenders_(),
-              max_diff_len_(max_diff_len),
-              max_repeat_len_(max_repeat_length),
-              detect_repeats_online_(detect_repeats_online) {
-    }
-
-    CompositeExtender(const Graph & g, GraphCoverageMap& cov_map,
-                      vector<shared_ptr<PathExtender> > pes,
-                      const ScaffoldingUniqueEdgeStorage& unique,
-                      size_t max_diff_len,
-                      size_t max_repeat_length,
-                      bool detect_repeats_online)
-            : ContigsMaker(g),
+                      UsedUniqueStorage &unique,
+                      const vector<shared_ptr<PathExtender>> &pes,
+                      size_t max_diff_len)
+            : g_(g),
               cover_map_(cov_map),
-              repeat_detector_(g, cover_map_, 2 * max_repeat_length),
-              extenders_(),
-              max_diff_len_(max_diff_len),
-              max_repeat_len_(max_repeat_length),
-              detect_repeats_online_(detect_repeats_online) {
-        extenders_ = pes;
-        used_storage_ = make_shared<UsedUniqueStorage>(UsedUniqueStorage(unique));
-        for (auto ex: extenders_) {
-            ex->AddUniqueEdgeStorage(used_storage_);
-        }
+              used_storage_(unique),
+              extenders_(pes),
+              max_diff_len_(max_diff_len) {
     }
 
-    void AddExtender(shared_ptr<PathExtender> pe) {
-        extenders_.push_back(pe);
-        pe->AddUniqueEdgeStorage(used_storage_);
-    }
-
-    void GrowAll(PathContainer& paths, PathContainer& result) override {
+    void GrowAll(PathContainer& paths, PathContainer& result) {
         result.clear();
         GrowAllPaths(paths, result);
-        LengthPathFilter filter(g_, 0);
-        filter.filter(result);
+        result.FilterEmptyPaths();
     }
 
-    void GrowPath(BidirectionalPath& path, PathContainer* paths_storage) override {
+    void GrowPath(BidirectionalPath& path, PathContainer* paths_storage) {
         while (MakeGrowStep(path, paths_storage)) { }
     }
 
-    void GrowPathSimple(BidirectionalPath& path, PathContainer* paths_storage) override {
-        while (MakeGrowStep(path, paths_storage, false)) { }
-    }
 
+private:
+    const Graph &g_;
+    GraphCoverageMap &cover_map_;
+    UsedUniqueStorage &used_storage_;
+    vector<shared_ptr<PathExtender>> extenders_;
+    size_t max_diff_len_;
 
-    bool MakeGrowStep(BidirectionalPath& path, PathContainer* paths_storage,
-                      bool detect_repeats_online_local = true) {
+    bool MakeGrowStep(BidirectionalPath& path, PathContainer* paths_storage) {
         DEBUG("make grow step composite extender");
-        if (detect_repeats_online_ && detect_repeats_online_local) {
-            BidirectionalPath *repeat_path = repeat_detector_.RepeatPath(path);
-            size_t repeat_size = repeat_detector_.MaxCommonSize(path, *repeat_path);
-
-            if (repeat_size > 0) {
-                DEBUG("repeat with length " << repeat_size);
-                path.Print();
-                repeat_path->Print();
-                BidirectionalPath repeat = path.SubPath(path.Size() - repeat_size);
-                int begin_repeat = repeat_path->FindLast(repeat);
-                VERIFY(begin_repeat > -1);
-                size_t end_repeat = (size_t) begin_repeat + repeat_size;
-                DEBUG("not consistent subpaths ");
-                BidirectionalPath begin1 = path.SubPath(0, path.Size() - repeat_size);
-                begin1.Print();
-                BidirectionalPath begin2 = repeat_path->SubPath(0, begin_repeat);
-                begin2.Print();
-                int gpa_in_repeat_path = repeat_path->GapAt(begin_repeat);
-                BidirectionalPath end2 = repeat_path->SubPath(end_repeat);
-                BidirectionalPath begin1_conj = path.SubPath(0, path.Size() - repeat_size + 1).Conjugate();
-                BidirectionalPath begin2_conj = repeat_path->SubPath(0, begin_repeat + 1).Conjugate();
-                pair<size_t, size_t> last = ComparePaths(0, 0, begin1_conj, begin2_conj, max_diff_len_);
-                DEBUG("last " << last.first << " last2 " << last.second);
-                path.Clear();
-                repeat_path->Clear();
-                int gap_len = repeat.GapAt(0);
-
-                if (begin2.Size() == 0 || last.second != 0) { //TODO: incorrect: common edges, but then different ends
-                    path.PushBack(begin1);
-                    repeat_path->PushBack(begin2);
-                } else {
-                    gap_len = gpa_in_repeat_path;
-                    path.PushBack(begin2);
-                    repeat_path->PushBack(begin1);
-                }
-
-                path.PushBack(repeat.At(0), gap_len);
-                path.PushBack(repeat.SubPath(1));
-                path.PushBack(end2);
-                DEBUG("new path");
-                path.Print();
-                return false;
-            }
-        }
 
         size_t current = 0;
         while (current < extenders_.size()) {
@@ -1008,22 +860,6 @@ public:
         return false;
     }
     
-private:
-    GraphCoverageMap& cover_map_;
-    RepeatDetector repeat_detector_;
-    vector<shared_ptr<PathExtender> > extenders_;
-    size_t max_diff_len_;
-    size_t max_repeat_len_;
-    bool detect_repeats_online_;
-    shared_ptr<UsedUniqueStorage> used_storage_;
-
-    void SubscribeCoverageMap(BidirectionalPath * path) {
-        path->Subscribe(&cover_map_);
-        for (size_t i = 0; i < path->Size(); ++i) {
-            cover_map_.BackEdgeAdded(path->At(i), path, path->GapInfoAt(i));
-        }
-    }
-
     void GrowAllPaths(PathContainer& paths, PathContainer& result) {
         for (size_t i = 0; i < paths.size(); ++i) {
             VERBOSE_POWER_T2(i, 100, "Processed " << i << " paths from " << paths.size() << " (" << i * 100 / paths.size() << "%)");
@@ -1031,16 +867,17 @@ private:
                 INFO("Processed " << i << " paths from " << paths.size() << " (" << i * 100 / paths.size() << "%)");
             }
             //In 2015 modes do not use a seed already used in paths.
-            if (used_storage_->UniqueCheckEnabled()) {
+            //FIXME what is the logic here?
+            if (used_storage_.UniqueCheckEnabled()) {
                 bool was_used = false;
                 for (size_t ind =0; ind < paths.Get(i)->Size(); ind++) {
                     EdgeId eid = paths.Get(i)->At(ind);
-                    if (used_storage_->IsUsedAndUnique(eid)) {
+                    if (used_storage_.IsUsedAndUnique(eid)) {
                         DEBUG("Used edge " << g_.int_id(eid));
                         was_used = true;
                         break;
                     } else {
-                        used_storage_->insert(eid);
+                        used_storage_.insert(eid);
                     }
                 }
                 if (was_used) {
@@ -1050,11 +887,12 @@ private:
             }
 
             if (!cover_map_.IsCovered(*paths.Get(i))) {
+                AddPath(result, *paths.Get(i), cover_map_);
                 BidirectionalPath * path = new BidirectionalPath(*paths.Get(i));
                 BidirectionalPath * conjugatePath = new BidirectionalPath(*paths.GetConjugate(i));
+                SubscribeCoverageMap(path, cover_map_);
+                SubscribeCoverageMap(conjugatePath, cover_map_);
                 result.AddPair(path, conjugatePath);
-                SubscribeCoverageMap(path);
-                SubscribeCoverageMap(conjugatePath);
                 size_t count_trying = 0;
                 size_t current_path_len = 0;
                 do {
@@ -1063,9 +901,8 @@ private:
                     GrowPath(*path, &result);
                     GrowPath(*conjugatePath, &result);
                 } while (count_trying < 10 && (path->Length() != current_path_len));
-                path->CheckConjugateEnd(max_repeat_len_);
                 DEBUG("result path " << path->GetId());
-                path->Print();
+                path->PrintDEBUG();
             }
         }
     }
@@ -1074,36 +911,23 @@ private:
 
 //All Path-Extenders inherit this one
 class LoopDetectingPathExtender : public PathExtender {
-
-protected:
-    bool investigate_short_loops_;
-    bool use_short_loop_cov_resolver_;
-    CovShortLoopResolver cov_loop_resolver_;
+    const bool use_short_loop_cov_resolver_;
+    ShortLoopResolver cov_loop_resolver_;
 
     InsertSizeLoopDetector is_detector_;
-    const GraphCoverageMap& cov_map_;
+    UsedUniqueStorage &used_storage_;
 
-public:
-    LoopDetectingPathExtender(const conj_graph_pack &gp,
-                                  const GraphCoverageMap &cov_map,
-                                  bool investigate_short_loops,
-                                  bool use_short_loop_cov_resolver,
-                                  size_t is)
-            : PathExtender(gp.g),
-              investigate_short_loops_(investigate_short_loops),
-              use_short_loop_cov_resolver_(use_short_loop_cov_resolver),
-              cov_loop_resolver_(gp),
-              is_detector_(gp.g, is),
-              cov_map_(cov_map) {
-
-    }
-
-    bool isInvestigateShortLoops() const {
-        return investigate_short_loops_;
-    }
-
-    void setInvestigateShortLoops(bool investigateShortLoops) {
-        this->investigate_short_loops_ = investigateShortLoops;
+protected:
+    const bool investigate_short_loops_;
+    const GraphCoverageMap &cov_map_;
+
+    bool TryUseEdge(BidirectionalPath &path, EdgeId e, const Gap &gap) {
+        bool success = used_storage_.TryUseEdge(path, e, gap);
+        if (success) {
+            DEBUG("Adding edge. PathId: " << path.GetId() << " path length: " << path.Length() - 1 << ", fixed gap : "
+                                          << gap.gap << ", trash length: " << gap.trash_previous << "-" << gap.trash_current);
+        }
+        return success;
     }
 
     bool DetectCycle(BidirectionalPath& path) {
@@ -1120,13 +944,29 @@ public:
         return false;
     }
 
-    bool DetectCycleScaffolding(BidirectionalPath& path) {
-          return is_detector_.CheckCycledNonIS(path);
+    bool DetectCycleScaffolding(BidirectionalPath& path, EdgeId e) {
+        BidirectionalPath temp_path(path);
+        temp_path.PushBack(e);
+        return is_detector_.CheckCycledNonIS(temp_path);
     }
 
     virtual bool MakeSimpleGrowStep(BidirectionalPath& path, PathContainer* paths_storage = nullptr) = 0;
 
-    virtual bool ResolveShortLoopByCov(BidirectionalPath& path) = 0;
+    virtual bool ResolveShortLoopByCov(BidirectionalPath& path) {
+        LoopDetector loop_detector(&path, cov_map_);
+        size_t init_len = path.Length();
+        bool result = false;
+        while (path.Size() >= 1 && loop_detector.EdgeInShortLoop(path.Back())) {
+            cov_loop_resolver_.ResolveShortLoop(path);
+            if (init_len == path.Length()) {
+                return result;
+            } else {
+                result = true;
+            }
+            init_len = path.Length();
+        }
+        return true;
+    }
 
     virtual bool ResolveShortLoopByPI(BidirectionalPath& path) = 0;
 
@@ -1134,11 +974,30 @@ public:
         return false;
     }
 
+public:
+    LoopDetectingPathExtender(const conj_graph_pack &gp,
+                              const GraphCoverageMap &cov_map,
+                              UsedUniqueStorage &unique,
+                              bool investigate_short_loops,
+                              bool use_short_loop_cov_resolver,
+                              size_t is)
+            : PathExtender(gp.g),
+              use_short_loop_cov_resolver_(use_short_loop_cov_resolver),
+              cov_loop_resolver_(gp.g, make_shared<CoverageLoopEstimator>(gp.g, gp.flanking_cov)),
+              is_detector_(gp.g, is),
+              used_storage_(unique),
+              investigate_short_loops_(investigate_short_loops),
+              cov_map_(cov_map) {
+
+    }
+
+
     bool MakeGrowStep(BidirectionalPath& path, PathContainer* paths_storage) override {
         if (is_detector_.InExistingLoop(path)) {
             DEBUG("in existing loop");
             return false;
         }
+        DEBUG("un ch enabled " << used_storage_.UniqueCheckEnabled());
         bool result;
         LoopDetector loop_detector(&path, cov_map_);
         if (DetectCycle(path)) {
@@ -1187,15 +1046,16 @@ protected:
 class SimpleExtender: public LoopDetectingPathExtender {
 
 protected:
-
     shared_ptr<ExtensionChooser> extensionChooser_;
+    ShortLoopResolver loop_resolver_;
+    double weight_threshold_;
 
     void FindFollowingEdges(BidirectionalPath& path, ExtensionChooser::EdgeContainer * result) {
         DEBUG("Looking for the following edges")
         result->clear();
         vector<EdgeId> edges;
         DEBUG("Pushing back")
-        push_back_all(edges, g_.OutgoingEdges(g_.EdgeEnd(path.Back())));
+        utils::push_back_all(edges, g_.OutgoingEdges(g_.EdgeEnd(path.Back())));
         result->reserve(edges.size());
         for (auto iter = edges.begin(); iter != edges.end(); ++iter) {
             DEBUG("Adding edge w distance " << g_.int_id(*iter));
@@ -1208,14 +1068,17 @@ protected:
 public:
 
     SimpleExtender(const conj_graph_pack &gp,
-                       const GraphCoverageMap &cov_map,
-                       shared_ptr<ExtensionChooser> ec,
-                       size_t is,
-                       bool investigate_short_loops,
-                       bool use_short_loop_cov_resolver) :
-        LoopDetectingPathExtender(gp, cov_map, investigate_short_loops, use_short_loop_cov_resolver, is),
-        extensionChooser_(ec) {
-    }
+                   const GraphCoverageMap &cov_map,
+                   UsedUniqueStorage &unique,
+                   shared_ptr<ExtensionChooser> ec,
+                   size_t is,
+                   bool investigate_short_loops,
+                   bool use_short_loop_cov_resolver,
+                   double weight_threshold = 0.0):
+        LoopDetectingPathExtender(gp, cov_map, unique, investigate_short_loops, use_short_loop_cov_resolver, is),
+        extensionChooser_(ec),
+        loop_resolver_(gp.g, make_shared<CombinedLoopEstimator>(gp.g, gp.flanking_cov, extensionChooser_->wc(), weight_threshold)),
+        weight_threshold_(weight_threshold) {}
 
     std::shared_ptr<ExtensionChooser> GetExtensionChooser() const {
         return extensionChooser_;
@@ -1225,30 +1088,13 @@ public:
         return extensionChooser_->WeightCounterBased();
     }
 
-    bool ResolveShortLoopByCov(BidirectionalPath& path) override {
-        LoopDetector loop_detector(&path, cov_map_);
-        size_t init_len = path.Length();
-        bool result = false;
-        while (path.Size() >= 1 && loop_detector.EdgeInShortLoop(path.Back())) {
-            cov_loop_resolver_.ResolveShortLoop(path);
-            if (init_len == path.Length()) {
-                return result;
-            } else {
-                result = true;
-            }
-            init_len = path.Length();
-        }
-        return true;
-    }
-
     bool ResolveShortLoopByPI(BidirectionalPath& path) override {
         if (extensionChooser_->WeightCounterBased()) {
-            LoopResolver loop_resolver(g_, extensionChooser_->wc());
             LoopDetector loop_detector(&path, cov_map_);
             size_t init_len = path.Length();
             bool result = false;
             while (path.Size() >= 1 && loop_detector.EdgeInShortLoop(path.Back())) {
-                loop_resolver.ResolveShortLoop(path);
+                loop_resolver_.ResolveShortLoop(path);
                 if (init_len == path.Length()) {
                     return result;
                 } else {
@@ -1263,7 +1109,7 @@ public:
 
     bool MakeSimpleGrowStep(BidirectionalPath& path, PathContainer* paths_storage) override {
         ExtensionChooser::EdgeContainer candidates;
-        return FilterCandidates(path, candidates) and AddCandidates(path, paths_storage, candidates);
+        return FilterCandidates(path, candidates) && AddCandidates(path, paths_storage, candidates);
     }
 
 protected:
@@ -1272,7 +1118,7 @@ protected:
             return false;
         }
         DEBUG("Simple grow step");
-        path.Print();
+        path.PrintDEBUG();
         FindFollowingEdges(path, &candidates);
         DEBUG("found candidates");
         DEBUG(candidates.size())
@@ -1305,44 +1151,33 @@ protected:
         EdgeId eid = candidates.back().e_;
 //In 2015 modes when trying to use already used unique edge, it is not added and path growing stops.
 //That allows us to avoid overlap removal hacks used earlier.
-        if (used_storage_->UniqueCheckEnabled()) {
-            if (used_storage_->IsUsedAndUnique(eid)) {
-                return false;
-            } else {
-                used_storage_->insert(eid);
-            }
-        }
-        path.PushBack(eid, candidates.back().d_);
-        DEBUG("push done");
-        return true;
+        Gap gap(candidates.back().d_);
+        return TryUseEdge(path, eid, gap);
     }
 
-protected:
     DECL_LOGGER("SimpleExtender")
-
 };
 
 
 class MultiExtender: public SimpleExtender {
-
-protected:
     size_t max_candidates_;
 
 public:
-
     MultiExtender(const conj_graph_pack &gp,
-                      const GraphCoverageMap &cov_map,
-                      shared_ptr<ExtensionChooser> ec,
-                      size_t is,
-                      bool investigate_short_loops,
-                      bool use_short_loop_cov_resolver,
-                      size_t max_candidates = 0) :
-        SimpleExtender(gp, cov_map, ec, is, investigate_short_loops, use_short_loop_cov_resolver),
+                  const GraphCoverageMap &cov_map,
+                  UsedUniqueStorage &unique,
+                  shared_ptr<ExtensionChooser> ec,
+                  size_t is,
+                  bool investigate_short_loops,
+                  bool use_short_loop_cov_resolver,
+                  double weight_threshold,
+                  size_t max_candidates = 0) :
+        SimpleExtender(gp, cov_map, unique, ec, is, investigate_short_loops, use_short_loop_cov_resolver, weight_threshold),
         max_candidates_(max_candidates) {
     }
 
 protected:
-    virtual bool AddCandidates(BidirectionalPath& path, PathContainer* paths_storage, ExtensionChooser::EdgeContainer& candidates) override {
+    bool AddCandidates(BidirectionalPath& path, PathContainer* paths_storage, ExtensionChooser::EdgeContainer& candidates) override {
         if (candidates.size() == 0)
             return false;
 
@@ -1358,7 +1193,7 @@ protected:
         if (candidates.size() == 1) {
             DEBUG("push");
             EdgeId eid = candidates.back().e_;
-            path.PushBack(eid, candidates.back().d_);
+            path.PushBack(eid, Gap(candidates.back().d_));
             DEBUG("push done");
             return true;
         }
@@ -1375,13 +1210,13 @@ protected:
             for (size_t i = 1; i < candidates.size(); ++i) {
                 DEBUG("push other candidates " << i);
                 BidirectionalPath *p = new BidirectionalPath(path);
-                p->PushBack(candidates[i].e_, candidates[i].d_);
+                p->PushBack(candidates[i].e_, Gap(candidates[i].d_));
                 BidirectionalPath *cp = new BidirectionalPath(p->Conjugate());
                 paths_storage->AddPair(p, cp);
             }
 
             DEBUG("push");
-            path.PushBack(candidates.front().e_, candidates.front().d_);
+            path.PushBack(candidates.front().e_, Gap(candidates.front().d_));
             DEBUG("push done");
             res = true;
 
@@ -1400,10 +1235,9 @@ protected:
 
 
 class ScaffoldingPathExtender: public LoopDetectingPathExtender {
-private:
     std::shared_ptr<ExtensionChooser> extension_chooser_;
     ExtensionChooser::EdgeContainer sources_;
-    std::shared_ptr<GapJoiner> gap_joiner_;
+    std::shared_ptr<GapAnalyzer> gap_analyzer_;
     bool avoid_rc_connections_;
 
 //When check_sink_ set to false we can scaffold not only tips
@@ -1419,102 +1253,110 @@ private:
         }
     }
 
-    bool IsSink(EdgeId e) const    {
+    bool IsSink(EdgeId e) const {
         return g_.OutgoingEdgeCount(g_.EdgeEnd(e)) == 0;
     }
 
+    Gap ConvertGapDescription(const GapDescription &gap) const {
+        if (gap == GapDescription()) {
+            return Gap::INVALID();
+        }
+        return Gap(gap.estimated_dist() + int(g_.k())
+                   - int(gap.left_trim()) - int(gap.right_trim()),
+                   uint32_t(gap.left_trim()), uint32_t(gap.right_trim()));
+    }
+
 protected:
-    virtual bool GapSatisfies(int /*gap*/) const {
+    virtual bool CheckGap(const Gap &/*gap*/) const {
         return true;
     }
 
-    bool MakeSimpleGrowStepForChooser(BidirectionalPath& path, std::shared_ptr<ExtensionChooser> ec, bool must_overlap = false) {
+    bool ResolveShortLoopByCov(BidirectionalPath&) override {
+        return false;
+    }
+
+    bool ResolveShortLoopByPI(BidirectionalPath&) override {
+        return false;
+    }
+
+    //TODO fix awful design with virtual CheckGap and must_overlap flag!
+    bool MakeSimpleGrowStepForChooser(BidirectionalPath& path, std::shared_ptr<ExtensionChooser> ec,
+                                      bool must_overlap = false) {
         if (path.Size() < 1 || (check_sink_ && !IsSink(path.Back()))) {
             return false;
         }
-        DEBUG("scaffolding:");
+
         DEBUG("Simple grow step, growing path");
-        path.Print();
+        path.PrintDEBUG();
         ExtensionChooser::EdgeContainer candidates = ec->Filter(path, sources_);
         DEBUG("scaffolding candidates " << candidates.size() << " from sources " << sources_.size());
 
-        //DEBUG("Extension chooser threshold = " << ec->GetThreshold())
         DEBUG("Candidate size = " << candidates.size())
-        if (candidates.size() == 1) {
-            if (candidates[0].e_ == path.Back()
-                || (avoid_rc_connections_ && candidates[0].e_ == g_.conjugate(path.Back()))) {
-                return false;
-            }
-            BidirectionalPath temp_path(path);
-            temp_path.PushBack(candidates[0].e_);
-            if (this->DetectCycleScaffolding(temp_path)) {
-                return false;
-            }
+        if (candidates.size() != 1) {
+            DEBUG("scaffolding end");
+            return false;
+        }
 
-            EdgeId eid = candidates.back().e_;
-            if (check_sink_) {
-                Gap gap = gap_joiner_->FixGap(path.Back(), candidates.back().e_, candidates.back().d_);
-                DEBUG("Gap after fixing " << gap.gap_ << " (was " << candidates.back().d_ << ")");
-                if (gap.gap_ != GapJoiner::INVALID_GAP) {
-                    DEBUG("Scaffolding. PathId: " << path.GetId() << " path length: " << path.Length() <<
-                        ", fixed gap length: " << gap.gap_ << ", trash length: " << gap.trash_previous_ << "-" <<
-                        gap.trash_current_);
-
-                    if (used_storage_->UniqueCheckEnabled()) {
-                        if (used_storage_->IsUsedAndUnique(eid)) {
-                            return false;
-                        } else {
-                            used_storage_->insert(eid);
-                        }
-                    }
+        EdgeId e = candidates.back().e_;
+        if (e == path.Back()
+            || (avoid_rc_connections_ && e == g_.conjugate(path.Back()))) {
+            return false;
+        }
 
-                    if (must_overlap && GapSatisfies(gap.gap_)) {
-                        DEBUG("Overlap is not large enogh")
-                        return false;
-                    }
-                    DEBUG("Overlap is good, success")
-                    path.PushBack(eid, gap);
-                    return true;
-                }
-                else {
-                    DEBUG("Looks like wrong scaffolding. PathId: " << path.GetId() << " path length: " <<
-                        path.Length() << ", fixed gap length: " << candidates.back().d_ << ", fixed = " << gap.gap_);
-                    return false;
-                }
+        if (this->DetectCycleScaffolding(path, e)) {
+            return false;
+        }
+
+        Gap gap;
+        //TODO is it ok that we either force joining or ignore its possibility
+        if (check_sink_) {
+            gap = ConvertGapDescription(gap_analyzer_->FixGap(GapDescription(path.Back(), e,
+                                                                             candidates.back().d_ -
+                                                                             int(g_.k()))));
+
+            if (gap == Gap::INVALID()) {
+                DEBUG("Looks like wrong scaffolding. PathId: "
+                              << path.GetId() << " path length: " << path.Length()
+                              << ", estimated gap length: " << candidates.back().d_);
+                return false;
             }
-            else {
-                DEBUG("Gap joiners off");
-                DEBUG("Scaffolding. PathId: " << path.GetId() << " path length: " << path.Length()
-                          << ", fixed gap length: " << candidates.back().d_);
 
-                if (used_storage_->UniqueCheckEnabled()) {
-                    if (used_storage_->IsUsedAndUnique(eid)) {
-                        return false;
-                    } else {
-                        used_storage_->insert(eid);
-                    }
-                }
-                path.PushBack(candidates.back().e_, candidates.back().d_);
-                return true;
+            DEBUG("Gap after fixing " << gap.gap << " (was " << candidates.back().d_ << ")");
+
+            if (must_overlap && !CheckGap(gap)) {
+                DEBUG("Overlap is not large enough")
+                return false;
             }
+        } else {
+            DEBUG("Gap joiners off");
+            VERIFY(candidates.back().d_ > int(g_.k()));
+            gap = Gap(candidates.back().d_);
         }
-        DEBUG("scaffolding end");
-        return false;
+
+        return TryUseEdge(path, e, NormalizeGap(gap));
+    }
+
+    Gap NormalizeGap(Gap gap) const {
+        VERIFY(gap != Gap::INVALID());
+        if (gap.overlap_after_trim(g_.k()) > 0)
+            gap.trash_current += gap.overlap_after_trim(g_.k());
+        return gap;
     }
 
 public:
 
     ScaffoldingPathExtender(const conj_graph_pack &gp,
                             const GraphCoverageMap &cov_map,
+                            UsedUniqueStorage &unique,
                             std::shared_ptr<ExtensionChooser> extension_chooser,
-                            std::shared_ptr<GapJoiner> gap_joiner,
+                            std::shared_ptr<GapAnalyzer> gap_analyzer,
                             size_t is,
                             bool investigate_short_loops,
                             bool avoid_rc_connections,
                             bool check_sink = true):
-        LoopDetectingPathExtender(gp, cov_map, investigate_short_loops, false, is),
+        LoopDetectingPathExtender(gp, cov_map, unique, investigate_short_loops, false, is),
         extension_chooser_(extension_chooser),
-        gap_joiner_(gap_joiner),
+        gap_analyzer_(gap_analyzer),
         avoid_rc_connections_(avoid_rc_connections),
         check_sink_(check_sink)
     {
@@ -1525,14 +1367,6 @@ public:
         return MakeSimpleGrowStepForChooser(path, extension_chooser_);
     }
 
-    bool ResolveShortLoopByCov(BidirectionalPath&) override {
-        return false;
-    }
-
-    bool ResolveShortLoopByPI(BidirectionalPath&) override {
-        return false;
-    }
-
     std::shared_ptr<ExtensionChooser> GetExtensionChooser() const {
         return extension_chooser_;
     }
@@ -1548,21 +1382,22 @@ class RNAScaffoldingPathExtender: public ScaffoldingPathExtender {
     int min_overlap_;
 
 protected:
-    bool GapSatisfies(int gap) const override {
-        return gap > (int) g_.k() - min_overlap_;
+    bool CheckGap(const Gap &gap) const override {
+        return gap.overlap_after_trim(g_.k()) >= min_overlap_;
     }
 
 public:
 
     RNAScaffoldingPathExtender(const conj_graph_pack &gp,
                                const GraphCoverageMap &cov_map,
+                               UsedUniqueStorage &unique,
                                std::shared_ptr<ExtensionChooser> extension_chooser,
                                std::shared_ptr<ExtensionChooser> strict_extension_chooser,
-                               std::shared_ptr<GapJoiner> gap_joiner,
+                               std::shared_ptr<GapAnalyzer> gap_joiner,
                                size_t is,
                                bool investigate_short_loops,
                                int min_overlap = 0):
-        ScaffoldingPathExtender(gp, cov_map, extension_chooser, gap_joiner, is, investigate_short_loops, true),
+        ScaffoldingPathExtender(gp, cov_map, unique, extension_chooser, gap_joiner, is, investigate_short_loops, true),
         strict_extension_chooser_(strict_extension_chooser), min_overlap_(min_overlap) {}
 
 
diff --git a/src/common/modules/path_extend/path_filter.hpp b/src/common/modules/path_extend/path_filter.hpp
index b012dd3..2f23fd9 100644
--- a/src/common/modules/path_extend/path_filter.hpp
+++ b/src/common/modules/path_extend/path_filter.hpp
@@ -19,163 +19,58 @@
 
 namespace path_extend {
 
-class CopyOnWritePathFilter {
-
-protected:
-    const Graph& g;
-
-public:
-    CopyOnWritePathFilter(const Graph& g_): g(g_) {
-    }
-
-    virtual bool predicate(BidirectionalPath& path) = 0;
-
-    PathContainer filter(PathContainer& paths) {
-        PathContainer result;
-
-        for (size_t i = 0; i < paths.size(); ++i) {
-            if (predicate(*paths.Get(i)) || predicate(*paths.GetConjugate(i))) {
-                result.AddPair(paths.Get(i), paths.GetConjugate(i));
-            }
-        }
-
-        return result;
-    }
-
-};
-
-
-class IdFilter: public CopyOnWritePathFilter {
-
-protected:
-    std::set<size_t> ids;
-
-public:
-
-    IdFilter(const Graph& g_, std::set<size_t> ids_): CopyOnWritePathFilter(g_), ids(ids_) {
-    }
-
-    virtual bool predicate(BidirectionalPath& path) {
-        return ids.count(path.GetId()) > 0;
-    }
-};
-
-
-class DuplicateFilter {
-
-protected:
-    const Graph& g;
+typedef func::AbstractPredicate<const BidirectionalPath&> AbstractPathCondition;
 
+class EmptyPathCondition: public AbstractPathCondition {
 public:
-    DuplicateFilter(const Graph& g_): g(g_) {
-    }
-
-    PathContainer filter(PathContainer& paths) {
-        PathContainer result;
-
-        for (size_t i = 0; i < paths.size(); ++i) {
-            bool duplicate = false;
-            for (size_t j = 0; j < result.size(); ++j) {
-                if (result[j] == paths[j])
-                    duplicate = true;
-            }
-            if (!duplicate) {
-                result.AddPair(paths.Get(i), paths.GetConjugate(i));
-            }
-        }
+    EmptyPathCondition() {}
 
-        return result;
+    bool Check(checked_type p) const override {
+        return p.Empty();
     }
-
 };
 
-class ErasingPathFilter {
-
-protected:
-    const Graph& g;
-
+class LengthPathCondition: public AbstractPathCondition {
+    size_t min_length_;
 public:
-    ErasingPathFilter(const Graph& g_): g(g_) {
-    }
-
-    virtual bool predicate(BidirectionalPath& path) = 0;
+    LengthPathCondition(size_t min_length): min_length_(min_length) {}
 
-    void filter(PathContainer& paths) {
-        for (PathContainer::Iterator iter = paths.begin(); iter != paths.end(); ) {
-            if (predicate(*iter.get()) || predicate(*iter.getConjugate())) {
-                iter = paths.erase(iter);
-            }
-            else {
-                ++iter;
-            }
-        }
+    bool Check(checked_type p) const override {
+        return p.Length() <= min_length_;
     }
-
 };
 
-
-class CoveragePathFilter: public ErasingPathFilter {
-
-protected:
-    double minCoverage;
+class CoveragePathCondition: public AbstractPathCondition {
+    const Graph& g_;
+    double cov_;
 
 public:
-    CoveragePathFilter(Graph& g_, double cov): ErasingPathFilter(g_), minCoverage(cov) {
+    CoveragePathCondition(const Graph& g, double cov): g_(g), cov_(cov) {}
 
-    }
-
-    virtual bool predicate(BidirectionalPath& path) {
-        for (size_t i = 0; i < path.Size(); ++i) {
-            if (math::ls(g.coverage(path[i]), minCoverage)) {
-                return true;
-            }
+    bool Check(checked_type p) const override {
+        for (size_t i = 0; i < p.Size(); ++i) {
+            if (math::gr(g_.coverage(p[i]), cov_))
+                return false;
         }
-        return false;
-    }
-};
-
-
-class LengthPathFilter: public ErasingPathFilter {
-
-protected:
-    size_t minLength;
-
-public:
-    LengthPathFilter(const Graph& g_, size_t len): ErasingPathFilter(g_), minLength(len) {
-    }
-
-    virtual bool predicate(BidirectionalPath& path) {
-        return path.Length() <= minLength;
+        return true;
     }
 };
 
-
-class IsolatedPathFilter: public ErasingPathFilter {
-
-protected:
-    size_t min_length_;
-
-    double min_cov_;
-
+class IsolatedPathCondition: public AbstractPathCondition {
+    const Graph& g_;
 public:
-    IsolatedPathFilter(const Graph& g_, size_t min_length, double min_cov = 10000000.0):
-        ErasingPathFilter(g_),
-        min_length_(min_length),
-        min_cov_(min_cov) {
-    }
+    IsolatedPathCondition(const Graph& g): g_(g) {}
 
-    virtual bool predicate(BidirectionalPath& path) {
-        if (path.Empty())
+    bool Check(checked_type p) const override {
+        if (p.Empty())
             return true;
 
-        if (path.Size() <= 2) {
-            auto v1 = g.EdgeStart(path.Front());
-            auto v2 = g.EdgeEnd(path.Back());
+        if (p.Size() <= 2) {
+            auto v1 = g_.EdgeStart(p.Front());
+            auto v2 = g_.EdgeEnd(p.Back());
 
-            return g.IncomingEdgeCount(v1) == 0 &&
-                g.OutgoingEdgeCount(v2) == 0 &&
-                path.Length() < min_length_ &&
-                math::ls(path.Coverage(), min_cov_);
+            return g_.IncomingEdgeCount(v1) == 0 &&
+                   g_.OutgoingEdgeCount(v2) == 0;
         }
         return false;
     }
diff --git a/src/common/modules/path_extend/path_visualizer.hpp b/src/common/modules/path_extend/path_visualizer.hpp
index b11d4c2..2951de5 100644
--- a/src/common/modules/path_extend/path_visualizer.hpp
+++ b/src/common/modules/path_extend/path_visualizer.hpp
@@ -38,7 +38,7 @@ public:
                 if (labels_.count(path->At(j)) > 0) {
                     labels_[path->At(j)] += ", ";
                 }
-                labels_[path->At(j)] += "(" + ToString(path->GetId()) + " : " + ToString(j) + ")";
+                labels_[path->At(j)] += "(" + std::to_string(path->GetId()) + " : " + std::to_string(j) + ")";
             }
 
             path = paths.GetConjugate(i);
@@ -46,7 +46,7 @@ public:
                 if (labels_.count(path->At(j)) > 0) {
                     labels_[path->At(j)] += ", ";
                 }
-                labels_[path->At(j)] += "(" + ToString(path->GetId()) + " : " + ToString(j) + ")";
+                labels_[path->At(j)] += "(" + std::to_string(path->GetId()) + " : " + std::to_string(j) + ")";
             }
         }
     }
diff --git a/src/common/modules/path_extend/pe_config_struct.cpp b/src/common/modules/path_extend/pe_config_struct.cpp
index cccb95e..9bf01c6 100644
--- a/src/common/modules/path_extend/pe_config_struct.cpp
+++ b/src/common/modules/path_extend/pe_config_struct.cpp
@@ -10,6 +10,28 @@
 
 namespace path_extend {
 
+//convert string to vector of words separated by space
+std::vector<string> StringToVector(const std::string& s) {
+    std::string word =
+        "";
+    std::vector<string> res;
+    for (size_t i = 0; i < s.length(); ++i) {
+        if (s[i] == ' ') {
+            if (word != "") {
+                res.push_back(word);
+                word = "";
+            }
+        }
+        else {
+            word += s[i];
+
+        }
+    }
+    if (word != "") {
+        res.push_back(word);
+    }
+    return res;
+}
 
 void load(scaffolding_mode &sm, boost::property_tree::ptree const& pt, std::string const& key, bool complete) {
     if (complete || pt.find(key) != pt.not_found()) {
@@ -76,14 +98,12 @@ void load(pe_config::ParamSetT::ScaffolderOptionsT& so,
 
     load(so.use_la_gap_joiner      , pt, "use_la_gap_joiner", complete);
     load(so.min_gap_score      , pt, "min_gap_score", complete);
-    load(so.max_must_overlap      , pt, "max_must_overlap", complete);
     load(so.max_can_overlap      , pt, "max_can_overlap", complete);
     load(so.short_overlap      , pt, "short_overlap", complete);
     load(so.artificial_gap      , pt, "artificial_gap", complete);
-    load(so.use_old_score      , pt, "use_old_score", complete);
     load(so.min_overlap_length, pt, "min_overlap_length", complete);
-    load(so.flank_addition_coefficient, pt, "flank_addition_coefficient", complete);
     load(so.flank_multiplication_coefficient, pt, "flank_multiplication_coefficient", complete);
+    load(so.flank_addition_coefficient, pt, "flank_addition_coefficient", complete);
 
     load(so.var_coeff          , pt, "var_coeff", complete);
     load(so.basic_overlap_coeff, pt, "basic_overlap_coeff", complete);
@@ -104,7 +124,11 @@ void load(pe_config::ParamSetT::PathFiltrationT& pf,
     if (pf.enabled) {
         load(pf.min_length      , pt, "min_length"      , complete);
         load(pf.isolated_min_length      , pt, "isolated_min_length"      , complete);
+        load(pf.isolated_min_cov      , pt, "isolated_min_cov"      , complete);
         load(pf.min_length_for_low_covered      , pt, "min_length_for_low_covered"      , complete);
+        load(pf.rel_cutoff      , pt, "rel_cutoff"      , complete);
+        load(pf.rel_isolated_cutoff      , pt, "rel_isolated_cutoff"      , complete);
+        load(pf.rel_low_covered_cutoff      , pt, "rel_low_covered_cutoff"      , complete);
         load(pf.min_coverage      , pt, "min_coverage"      , complete);
     }
 }
@@ -115,14 +139,34 @@ void load(pe_config::ParamSetT::GenomeConsistencyCheckerParamsT& gcc,
     using config_common::load;
     load(gcc.max_gap      , pt, "max_gap"      , complete);
     load(gcc.relative_max_gap      , pt, "relative_max_gap"      , complete);
+    load(gcc.use_main_storage      , pt, "use_main_storage"      , complete);
+    load(gcc.unresolvable_jump      , pt, "unresolvable_jump"      , complete);
+    load(gcc.unique_length      , pt, "unique_length"      , complete);
+
+}
+
+void load(pe_config::ParamSetT::OverlapRemovalOptionsT& ors,
+          boost::property_tree::ptree const& pt, bool complete) {
+    using config_common::load;
+    load(ors.enabled, pt, "enabled"      , complete);
+    load(ors.end_start_only, pt, "end_start_only"      , complete);
+    load(ors.cut_all, pt, "cut_all"      , complete);
+}
+
+void load(pe_config::ParamSetT::SimpleCoverageResolver& scr,
+          boost::property_tree::ptree const& pt, bool complete)
+{
+    using config_common::load;
+    load(scr.enabled      , pt, "enabled"      , complete);
+    load(scr.coverage_delta      , pt, "coverage_delta"      , complete);
+    load(scr.min_upper_coverage      , pt, "min_upper_coverage"      , complete);
 }
 
 void load(pe_config::ParamSetT& p, boost::property_tree::ptree const& pt, bool complete) {
     using config_common::load;
     load(p.sm, pt, "scaffolding_mode", complete);
     load(p.normalize_weight, pt,  "normalize_weight", complete);
-    load(p.cut_all_overlaps, pt, "cut_all_overlaps", complete);
-    load(p.remove_overlaps, pt, "remove_overlaps", complete);
+    load(p.overlap_removal, pt, "overlap_removal", complete);
     load(p.multi_path_extend, pt, "multi_path_extend", complete);
     load(p.split_edge_length, pt, "split_edge_length", complete);
     load(p.extension_options, pt, "extension_options", complete);
@@ -132,10 +176,20 @@ void load(pe_config::ParamSetT& p, boost::property_tree::ptree const& pt, bool c
     load(p.use_coordinated_coverage, pt, "use_coordinated_coverage", complete);
     load(p.scaffolding2015, pt, "scaffolding2015", complete);
     load(p.scaffold_graph_params, pt, "scaffold_graph", complete);
-    load(p.path_filtration, pt, "path_cleaning", complete);
+
+    string path_cleaning_presets;
+    load(path_cleaning_presets, pt, "path_cleaning_presets", complete);
+    auto presets = StringToVector(path_cleaning_presets);
+    for (auto &key : presets) {
+        pe_config::ParamSetT::PathFiltrationT path_filtration;
+        std::string config_key = key == "default" ? "path_cleaning" : key + "_path_cleaning";
+        load(path_filtration, pt, config_key, complete);
+        p.path_filtration[key] = path_filtration;
+    }
     load(p.genome_consistency_checker, pt, "genome_consistency_checker", complete);
     load(p.uniqueness_analyser, pt, "uniqueness_analyser", complete);
     load(p.loop_traversal, pt, "loop_traversal", complete);
+    load(p.simple_coverage_resolver, pt, "simple_coverage_resolver", complete);
 }
 
 void load(pe_config::LongReads& p, boost::property_tree::ptree const& pt,
diff --git a/src/common/modules/path_extend/pe_config_struct.hpp b/src/common/modules/path_extend/pe_config_struct.hpp
index a5b161f..9d88bb0 100644
--- a/src/common/modules/path_extend/pe_config_struct.hpp
+++ b/src/common/modules/path_extend/pe_config_struct.hpp
@@ -108,8 +108,18 @@ struct pe_config {
         size_t split_edge_length;
 
         bool multi_path_extend;
-        bool remove_overlaps;
-        bool cut_all_overlaps;
+
+        struct OverlapRemovalOptionsT {
+            bool enabled;
+            bool end_start_only;
+            bool cut_all;
+        } overlap_removal;
+
+        struct SimpleCoverageResolver {
+            bool enabled;
+            double coverage_delta;
+            double min_upper_coverage;
+        } simple_coverage_resolver;
 
         struct ExtensionOptionsT {
             bool use_default_single_threshold;
@@ -134,19 +144,16 @@ struct pe_config {
 
             bool use_la_gap_joiner;
             double min_gap_score;
-            double max_must_overlap;
             double max_can_overlap;
             int short_overlap;
             size_t artificial_gap;
 
-            bool use_old_score;
-
             double var_coeff;
             double basic_overlap_coeff;
 
             size_t min_overlap_length;
-            double flank_addition_coefficient;
             double flank_multiplication_coefficient;
+            int flank_addition_coefficient;
 
             boost::optional<int> min_overlap_for_rna_scaffolding;
         } scaffolder_options;
@@ -154,11 +161,16 @@ struct pe_config {
         struct PathFiltrationT {
             bool enabled;
             size_t min_length;
+            double rel_cutoff;
             size_t isolated_min_length;
+            double isolated_min_cov;
+            double rel_isolated_cutoff;
             size_t min_length_for_low_covered;
+            double rel_low_covered_cutoff;
             double min_coverage;
-        } path_filtration;
+        };
 
+        std::map<std::string, PathFiltrationT> path_filtration;
 
         bool use_coordinated_coverage;
 
@@ -191,6 +203,9 @@ struct pe_config {
         struct GenomeConsistencyCheckerParamsT {
             size_t max_gap;
             double relative_max_gap;
+            bool use_main_storage;
+            size_t unresolvable_jump;
+            size_t unique_length;
         } genome_consistency_checker;
 
         struct LoopTraversalParamsT {
diff --git a/src/common/modules/path_extend/pe_resolver.hpp b/src/common/modules/path_extend/pe_resolver.hpp
index dfbd4f3..14cfc41 100644
--- a/src/common/modules/path_extend/pe_resolver.hpp
+++ b/src/common/modules/path_extend/pe_resolver.hpp
@@ -12,455 +12,261 @@
  *      Author: andrey
  */
 
-#ifndef PE_RESOLVER_HPP_
-#define PE_RESOLVER_HPP_
+#pragma once
 
 #include "path_extender.hpp"
 
 namespace path_extend {
 
+typedef const BidirectionalPath * PathPtr;
+typedef unordered_map<PathPtr, set<size_t>> SplitsStorage;
 
-class SimpleOverlapRemover {
+inline void PopFront(BidirectionalPath * const path, size_t cnt) {
+    path->GetConjPath()->PopBack(cnt);
+}
 
-public:
-    SimpleOverlapRemover(const Graph& g, GraphCoverageMap& cm)
-            : g_(g), coverage_map_(cm) {
-    }
+class OverlapRemover {
+    const Graph &g_;
+    const PathContainer &paths_;
+    const OverlapFindingHelper helper_;
+    SplitsStorage splits_;
 
-    void RemoveOverlaps(PathContainer& paths) const {
-        for (size_t i = 0; i < paths.size(); i++) {
-            FindAndRemovePathOverlap(paths, paths.Get(i));
-            FindAndRemovePathOverlap(paths, paths.GetConjugate(i));
-        }
+    bool AlreadyAdded(PathPtr ptr, size_t pos) const {
+        auto it = splits_.find(ptr);
+        return it != splits_.end() && it->second.count(pos);
     }
 
-    size_t NonUniqueCommon(BidirectionalPath * path, int pos1, int pos2) {
-        size_t answer = 0;
-        while (pos1 >= 0) {
-            if (path->At(pos1) == path->At(pos2)) {
-                pos1--;
-                pos2--;
-                answer++;
-            } else {
-                break;
-            }
-        }
-        return answer;
+    //TODO if situation start ==0 && end==p.Size is not interesting then code can be simplified
+    bool AlreadyAdded(const BidirectionalPath &p, size_t start, size_t end) const {
+        if (start == 0 && AlreadyAdded(&p, end))
+            return true;
+        if (end == p.Size() && AlreadyAdded(p.GetConjPath(), p.Size() - start))
+            return true;
+        return false;
     }
 
-    size_t MaximumNonUniqueSuffix(BidirectionalPath * path) {
-        if (path->Size() == 0) {
+    //NB! This can only be launched over paths taken from path container!
+    size_t AnalyzeOverlaps(const BidirectionalPath &path, const BidirectionalPath &other,
+                           bool end_start_only, bool retain_one_copy) const {
+        VERIFY(!retain_one_copy || !end_start_only);
+        auto range_pair = helper_.FindOverlap(path, other, end_start_only);
+        size_t overlap = range_pair.first.size();
+        auto other_range = range_pair.second;
+
+        if (overlap == 0) {
             return 0;
         }
 
-        size_t answer = 0;
-        EdgeId back = path->Back();
-        vector<size_t> all_pos = path->FindAll(back);
-        for (size_t i = 0; i < all_pos.size() - 1; ++i) {
-            answer = std::max(answer, NonUniqueCommon(path, (int) all_pos[i], (int) path->Size() - 1));
+        //checking if region on the other path has not been already added
+        //TODO discuss if the logic is needed/correct. It complicates the procedure and prevents trivial parallelism.
+        if (retain_one_copy &&
+                AlreadyAdded(other,
+                             other_range.start_pos,
+                             other_range.end_pos) &&
+                /*forcing "cut_all" behavior on conjugate paths*/
+                &other != path.GetConjPath() &&
+                /*certain overkill*/
+                &other != &path) {
+            return 0;
         }
-        return answer;
-    }
 
-    void CutNonUniqueSuffix(PathContainer& paths) {
-        vector<pair<BidirectionalPath *, BidirectionalPath *>> tmp_paths(paths.begin(), paths.end());
-        for (auto it = tmp_paths.begin(); it != tmp_paths.end(); ++it) {
-            BidirectionalPath * path1 = it->first;
-            BidirectionalPath * path2 = it->second;
-            size_t longest_suffix1 = MaximumNonUniqueSuffix(path1);
-            path1->PopBack(longest_suffix1);
-            size_t longest_suffix2 = MaximumNonUniqueSuffix(path2);
-            path2->PopBack(longest_suffix2);
+        if (&other == &path) {
+            if (overlap == path.Size())
+                return 0;
+            overlap = std::min(overlap, other_range.start_pos);
         }
-    }
 
-    void CutPseudoSelfConjugatePaths(PathContainer& paths) {
-        vector<pair<BidirectionalPath *, BidirectionalPath *>> tmp_paths(paths.begin(), paths.end());
-        for (auto it = tmp_paths.begin(); it != tmp_paths.end(); ++it) {
-            BidirectionalPath * path1 = it->first;
-            BidirectionalPath * path2 = it->second;
-            bool ups = false;
-            if(path1 != path2) {
-                size_t last = 0;
-                while(last < path1->Size() && path1->operator [](last) == path2->operator [](last)) {
-                    last++;
-                }
-                if(last > 0) {
-                    AddOverlap(paths, path1, 0, last - 1);
-                    path1->PopBack(last);
-                    path2->PopBack(last);
-                }
-            }
-            if(ups) path1->Print();
+        if (&other == path.GetConjPath()) {
+            overlap = std::min(overlap, other.Size() - other_range.end_pos);
         }
+
+        DEBUG("First " << overlap << " edges of the path will be removed");
+        DEBUG(path.str());
+        DEBUG("Due to overlap with path");
+        DEBUG(other.str());
+        DEBUG("Range " << other_range);
+
+        return overlap;
     }
 
-    void RemoveSimilarPaths(PathContainer& paths, size_t min_edge_len, size_t max_path_diff, bool del_only_equal, bool del_subpaths, bool del_begins, bool del_all, bool add_overlap_begins) const {
-        DEBUG("== Removing similar paths ==");
-        DEBUG("Min edge len " << min_edge_len << ", max path diff " << max_path_diff)
-        DEBUG("Only equal " << del_only_equal << ", subpaths " << del_subpaths << ", starts " << del_begins << ", all " << del_all << ", add starts " << add_overlap_begins);
-        std::vector<EdgeId> edges = GetSortedEdges();
-        for (size_t edgeIndex = 0; edgeIndex < edges.size(); ++edgeIndex) {
-            EdgeId edge = edges.at(edgeIndex);
-            BidirectionalPathSet cov_paths = coverage_map_.GetCoveringPaths(edge);
-            std::vector<BidirectionalPath*> cov_vect(cov_paths.begin(), cov_paths.end());
-            std::sort(cov_vect.begin(), cov_vect.end(), PathIdCompare);
-            for (size_t vect_i = 0; vect_i < cov_vect.size(); ++vect_i) {
-                BidirectionalPath* path1 = cov_vect.at(vect_i);
-                if (cov_paths.find(path1) == cov_paths.end()) {
-                    continue;
-                }
-                for (size_t vect_i1 = vect_i + 1; vect_i1 < cov_vect.size(); ++vect_i1) {
-                    BidirectionalPath* path2 = cov_vect.at(vect_i1);
-                    if (path1 == path2 || path1 == path2->GetConjPath()) {
-                        continue;
-                    }
-                    if (cov_paths.find(path2) == cov_paths.end())
-                        continue;
-                    if ((*path1) == (*path2)) {
-                        if (path2->IsOverlap()) {
-                            path1->SetOverlap(true);
-                        }
-                        DEBUG("Removing path " << path2->GetId() << " because of path " << path1->GetId());
-                        path2->Print();
-                        path1->Print();
-                        path2->Clear();
-                        cov_paths = coverage_map_.GetCoveringPaths(edge);
-                        continue;
-                    }
-                    if (g_.length(edge) <= min_edge_len || path1->IsOverlap() || path2->IsOverlap() || del_only_equal) {
-                        continue;
-                    }
-                    CompareAndCut(paths, edge, path1, path2, max_path_diff,
-                                  del_subpaths, del_begins, del_all, add_overlap_begins);
-                    cov_paths = coverage_map_.GetCoveringPaths(edge);
-                }
+    void MarkStartOverlaps(const BidirectionalPath &path, bool end_start_only, bool retain_one_copy) {
+        set<size_t> overlap_poss;
+        for (PathPtr candidate : helper_.FindCandidatePaths(path)) {
+            size_t overlap = AnalyzeOverlaps(path, *candidate,
+                                             end_start_only, retain_one_copy);
+            if (overlap > 0) {
+                overlap_poss.insert(overlap);
             }
         }
-        DEBUG("== Emd removing similar paths ==");
-    }
-
-private:
-    
-    void SubscribeCoverageMap(BidirectionalPath* path) const {
-        path->Subscribe(&coverage_map_);
-        for (size_t i = 0; i < path->Size(); ++i) {
-            coverage_map_.BackEdgeAdded(path->At(i), path, path->GapAt(i));
+        if (!overlap_poss.empty()) {
+            utils::insert_all(splits_[&path], overlap_poss);
         }
     }
 
-    void CompareAndCut(PathContainer& paths, EdgeId edge, BidirectionalPath* path1, BidirectionalPath* path2,
-                       size_t max_path_diff,
-                       bool del_subpaths, bool del_begins,
-                       bool del_all, bool add_overlap_begins) const {
-        vector<size_t> positions1 = path1->FindAll(edge);
-        vector<size_t> positions2 = path2->FindAll(edge);
-        size_t i1 = 0;
-        size_t i2 = 0;
-        bool renewed = false;
-        while (i1 < positions1.size()) {
-            while (i2 < positions2.size()) {
-                DEBUG("CompareAndCutFromPos paths " << g_.int_id(edge));
-                CompareAndCutFromPos(paths, path1, (int) positions1[i1], path2,
-                                     (int) positions2[i2], max_path_diff,
-                                     del_subpaths, del_begins, del_all, add_overlap_begins);
-
-                if (positions1[i1] >= path1->Size() || path1->At(positions1[i1]) != edge || positions2[i2] >= path2->Size() || path2->At(positions2[i2]) != edge) {
-                    vector<size_t> new_positions1 = path1->FindAll(edge);
-                    vector<size_t> new_positions2 = path2->FindAll(edge);
-
-                    if (new_positions1.size() == positions1.size() && new_positions2.size() == positions2.size()) {
-                        return;
-                    }
-                    else {
-                        positions1 = new_positions1;
-                        positions2 = new_positions2;
-                        i1 = 0;
-                        i2 = 0;
-                        renewed = true;
-                        break;
-                    }
-                    ++i2;
-                }
-                ++i2;
-            }
-
-            if (renewed) {
-                renewed = false;
+    void InnerMarkOverlaps(bool end_start_only, bool retain_one_copy) {
+        for (auto path_pair: paths_) {
+            //TODO think if this "optimization" is necessary
+            if (path_pair.first->Size() == 0)
                 continue;
-            }
-            ++i1;
+            MarkStartOverlaps(*path_pair.first, end_start_only, retain_one_copy);
+            MarkStartOverlaps(*path_pair.second, end_start_only, retain_one_copy);
         }
     }
 
-    void CompareAndCutFromPos(PathContainer& paths, BidirectionalPath* path1, int pos1,
-                              BidirectionalPath* path2, int pos2,
-                              size_t max_path_diff,
-                              bool delete_subpaths, bool delete_begins,
-                              bool delete_all, bool add_overlap_begins) const {
-        int last2 = pos2;
-        int last1 = pos1;
-        if (last1 >= (int) path1->Size() || last2 >= (int) path2->Size()) {
-            return;
-        }
-        vector<int> other_path_end;
-        pair<int, int> posRes = ComparePaths(last1, last2, *path1, *path2, max_path_diff);
-        last1 = posRes.first;
-        last2 = posRes.second;
-        BidirectionalPath* conj1 = path1->GetConjPath();
-        BidirectionalPath* conj2 = path2->GetConjPath();
-        size_t first1 = conj1->Size() - pos1 - 1;
-        size_t first2 = conj2->Size() - pos2 - 1;
-        posRes = ComparePaths(first1, first2, *conj1, *conj2, max_path_diff);
-        first2 = conj2->Size() - posRes.second - 1;
-        first1 = conj1->Size() - posRes.first - 1;
-        if ((int)path2->LengthAt(last2) - (int)g_.length(path2->At(last2)) < (int) max_path_diff) {
-            last2 = (int)path2->Size() - 1;
-        }
-        if ((int)path2->Length() - (int)path2->LengthAt(first2) < (int) max_path_diff) {
-            first2 = 0;
-        }
-        if ((int)path1->LengthAt(last1) - (int)g_.length(path1->At(last1)) < (int) max_path_diff) {
-            last1 = (int)path1->Size() - 1;
-        }
-        if ((int)path1->Length() - (int)path1->LengthAt(first1) < (int) max_path_diff) {
-            first1 = 0;
-        }
+public:
+    OverlapRemover(const Graph &g,
+                         const PathContainer &paths,
+                         GraphCoverageMap &coverage_map,
+                         size_t min_edge_len,// = 0,
+                         size_t max_diff) :// = 0) :
+            g_(g),
+            paths_(paths),
+            helper_(g, coverage_map,
+                    min_edge_len, max_diff) {
+    }
 
-        CutOverlaps(paths, path1, first1, last1, path1->Size(), path2,
-                         first2, last2, path2->Size(), delete_subpaths,
-                         delete_begins, delete_all, add_overlap_begins);
+    //Note that during start/end removal all repeat instance have to be cut
+//    void MarkOverlaps(bool end_start_only = false, bool retain_one_copy = true) {
+    void MarkOverlaps(bool end_start_only, bool retain_one_copy) {
+        VERIFY(!end_start_only || !retain_one_copy);
+        INFO("Marking start/end overlaps");
+        InnerMarkOverlaps(/*end/start overlaps only*/ true, /*retain one copy*/ false);
+        if (!end_start_only) {
+            INFO("Marking remaining overlaps");
+            InnerMarkOverlaps(/*end/start overlaps only*/ false, retain_one_copy);
+        }
     }
 
-    void AddOverlap(PathContainer& paths, BidirectionalPath* path1, size_t first1, size_t last1) const {
-        BidirectionalPath* overlap = new BidirectionalPath(path1->SubPath(first1, last1 + 1));
-        BidirectionalPath* conj_overlap = new BidirectionalPath(overlap->Conjugate());
-        SubscribeCoverageMap(overlap);
-        SubscribeCoverageMap(conj_overlap);
-        paths.AddPair(overlap, conj_overlap);
+    const SplitsStorage& overlaps() const {
+        return splits_;
     }
 
-    bool CutOverlaps(PathContainer& paths, BidirectionalPath* path1, size_t first1, size_t last1, size_t size1, BidirectionalPath* path2, size_t first2,
-                     size_t last2, size_t size2, bool del_subpaths, bool del_begins, bool del_all, bool add_overlap_begins) const {
-        if (first1 == 0 && last1 == size1 - 1 && del_subpaths) {
-            DEBUG("Removing path " << path1->GetId() << " because of path " << path2->GetId());
-            path1->Print();
-            path2->Print();
-            path1->Clear();
-        } else if (first2 == 0 && last2 == size2 - 1 && del_subpaths) {
-            DEBUG("Removing path " << path2->GetId() << " because of path " << path1->GetId());
-            path2->Print();
-            path1->Print();
-            path2->Clear();
-        } else if (first2 == 0 && first1 == 0 && del_begins) {
-            DEBUG("Path " << path1->GetId() << ", len " << path1->Length() << " and path " << path2->GetId() << ", len " << path2->Length() <<  " have similar starts");
-            DEBUG("Path 1: " << last1 << " edges of length " << path1->Length() - path1->LengthAt(min(last1 + 1, path1->Size() - 1)));
-            DEBUG("Path 2: " << last2 << " edges of length " << path2->Length() - path2->LengthAt(min(last2 + 1, path2->Size() - 1)));
-            DEBUG("Path 1 has overlap start " << path1->HasOverlapedBegin() << ", path 2 has overlap start " <<  path2->HasOverlapedBegin());
-
-            if (add_overlap_begins) {
-                AddOverlap(paths, path1, first1, last1);
-                DEBUG("Detaching overlap " << path2->GetId() << " and " << path1->GetId());
-                path2->Print();
-                path1->Print();
-                path1->GetConjPath()->PopBack(last1 + 1);
-                path2->GetConjPath()->PopBack(last2 + 1);
-            } else if (path1->Length() < path2->Length()) {
-                DEBUG("Detaching overlap from " << path1->GetId() << " because of "<< path2->GetId());
-                path1->Print();
-                path2->Print();
-                path1->GetConjPath()->PopBack(last1 + 1);
-            } else {
-                DEBUG("Detaching overlap from " << path2->GetId() << " because of "<< path1->GetId());
-                path2->Print();
-                path1->Print();
-                path2->GetConjPath()->PopBack(last2 + 1);
-            }
-        } else if ((last1 == size1 - 1 && last2 == size2 - 1) && del_begins) {
-            DEBUG("Path " << path1->GetId() << ", len " << path1->Length() << " and path " << path2->GetId() << ", len " << path2->Length() << " have similar ends");
-            DEBUG("Path 1: " << path1->Size() - first1 << " edges of length " << path1->LengthAt(first1));
-            DEBUG("Path 2: " << path2->Size() - first2 << " edges of length " << path2->LengthAt(first2));
-            DEBUG("Path 1 has overlap end " << path1->HasOverlapedEnd() << ", path 2 has overlap end " <<  path2->HasOverlapedEnd());
-
-            if (add_overlap_begins){
-                AddOverlap(paths, path1, first1, last1);
-                DEBUG("Detaching overlap " << path2->GetId() << " and " << path1->GetId());
-                path2->Print();
-                path1->Print();
-                path1->PopBack(last1 + 1 - first1);
-                path2->PopBack(last2 + 1 - first2);
-            }
-            if (path1->Length() < path2->Length()) {
-                DEBUG("Detaching overlap from " << path1->GetId() << " because of "<< path2->GetId());
-                path1->Print();
-                path2->Print();
-                path1->PopBack(last1 + 1 - first1);
-            } else {
-                DEBUG("Detaching overlap from " << path2->GetId() << " because of "<< path1->GetId());
-                path2->Print();
-                path1->Print();
-                path2->PopBack(last2 + 1 - first2);
+private:
+    DECL_LOGGER("OverlapRemover");
+};
+
+class PathSplitter {
+    const SplitsStorage splits_;
+    PathContainer &paths_;
+    GraphCoverageMap &coverage_map_;
+
+    set<size_t> TransformConjSplits(PathPtr p) const {
+        set<size_t> path_splits;
+        size_t path_len = p->Size();
+        auto it = splits_.find(p);
+        if (it != splits_.end()) {
+//                std::transform(it->second.begin(), it->second.end(),
+//                               std::inserter(path_splits, path_splits.end()),
+//                               [=] (size_t pos) {return path_len - pos;});
+            for (size_t pos : it->second) {
+                path_splits.insert(path_len - pos);
             }
-        } else if (first2 == 0 && del_all) {
-            DEBUG("Detaching overlap from " << path2->GetConjPath()->GetId() << " because of "<< path1->GetId());
-            DEBUG("Does it have overlap in the beginning: " << path2->HasOverlapedBegin());
-            path2->Print();
-            DEBUG(" >>>> ")
-            path1->Print();
-            DEBUG(" ==== ");
-            path2->GetConjPath()->PopBack(last2 + 1);
-        } else if (last2 == size2 - 1 && del_all) {
-            DEBUG("Detaching overlap from " << path2->GetId() << " because of "<< path1->GetId());
-            DEBUG("Does it have overlap in the end: " << path2->HasOverlapedEnd());
-            path2->Print();
-            DEBUG(" >>>> ")
-            path1->Print();
-            DEBUG(" ==== ");
-            path2->PopBack(last1 + 1 - first1);
-        } else if (first1 == 0 && del_all) {
-            DEBUG("Detaching overlap from " << path1->GetConjPath()->GetId() << " because of "<< path2->GetId());
-            DEBUG("Does it have overlap in the end: " << path1->HasOverlapedBegin());
-            path1->Print();
-            DEBUG(" >>>> ")
-            path2->Print();
-            DEBUG(" ==== ");
-            path1->GetConjPath()->PopBack(last1 + 1);
-        } else if (last1 == size1 - 1 && del_all) {
-            DEBUG("Detaching overlap from " << path1->GetId() << " because of "<< path2->GetId());
-            DEBUG("Does it have overlap in the end: " << path1->HasOverlapedBegin());
-            path1->Print();
-            DEBUG(" >>>> ")
-            path2->Print();
-            DEBUG(" ==== ");
-            path1->PopBack(last1 + 1 - first1);
-        } else {
-            return false;
         }
-        return true;
+        return path_splits;
     }
 
-    std::vector<EdgeId> GetSortedEdges() const {
-        std::set<EdgeId> edges_set;
-        for (auto iter = g_.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
-            edges_set.insert(*iter);
-            edges_set.insert(g_.conjugate(*iter));
+    set<size_t> GatherAllSplits(const PathPair &pp) const {
+        VERIFY(pp.first->Size() == pp.second->Size());
+        set<size_t> path_splits = TransformConjSplits(pp.second);
+        auto it = splits_.find(pp.first);
+        if (it != splits_.end()) {
+            utils::insert_all(path_splits, it->second);
         }
-        std::vector<EdgeId> edges(edges_set.begin(), edges_set.end());
-        std::sort(edges.begin(), edges.end(), EdgeLengthAndIdComparator(g_));
-        return edges;
-    }
-
-    bool HasAlreadyOverlapedEnd(BidirectionalPath * path) const {
-        return !path->IsOverlap() and path->HasOverlapedEnd();
+        return path_splits;
     }
 
-    bool HasAlreadyOverlapedBegin(BidirectionalPath * path) const {
-        return !path->IsOverlap() and path->HasOverlapedBegin();
+    void SplitPath(BidirectionalPath * const p, const set<size_t> &path_splits) {
+        size_t start_pos = 0;
+        for (size_t split_pos : path_splits) {
+            if (split_pos == 0)
+                continue;
+            if (split_pos == p->Size())
+                break;
+            AddPath(paths_, p->SubPath(start_pos, split_pos), coverage_map_);
+            start_pos = split_pos;
+        }
+        PopFront(p, start_pos);
     }
 
-    bool IsSamePath(BidirectionalPath * path1,
-                    BidirectionalPath * path2) const {
-        return *path2 == *path1 or *path2 == *path1->GetConjPath();
-    }
+public:
+    PathSplitter(const SplitsStorage &splits,
+                 PathContainer &paths,
+                 GraphCoverageMap &coverage_map) :
+            splits_(splits),
+            paths_(paths),
+            coverage_map_(coverage_map) {}
+
+     void Split() {
+         vector<PathPair> tmp_paths(paths_.begin(), paths_.end());
+         for (auto path_pair: tmp_paths) {
+             SplitPath(path_pair.first, GatherAllSplits(path_pair));
+         }
+     }
 
-    void RemoveOverlap(PathContainer& paths, BidirectionalPath* path1,
-                       BidirectionalPath* path2, size_t overlap_size) const {
-        BidirectionalPath* conj2 = path2->GetConjPath();
-        if (path1->IsOverlap() && overlap_size == path1->Size()) {
-            DEBUG("Detaching overlap from " << path2->GetConjPath()->GetId() << " because of "<< path1->GetId());
-            path2->Print();
-            path1->Print();
-            conj2->PopBack(overlap_size);
-            path2->SetOverlapedBeginTo(path1);
-        } else if (path2->IsOverlap() && path2->Size() == overlap_size) {
-            DEBUG("Detaching overlap from " << path1->GetId() << " because of "<< path2->GetId());
-            path1->Print();
-            path2->Print();
-            path1->PopBack(overlap_size);
-            path1->SetOverlapedEndTo(path2);
-        } else if (overlap_size < path2->Size()
-                && overlap_size < path1->Size()) {
-            BidirectionalPath *overlap = new BidirectionalPath(g_, path1->Back());
-            BidirectionalPath *conj_overlap = new BidirectionalPath(g_, g_.conjugate(path1->Back()));
-            SubscribeCoverageMap(overlap);
-            SubscribeCoverageMap(conj_overlap);
-            paths.AddPair(overlap, conj_overlap);
-            DEBUG("Detaching overlap " << path1->GetId() << " and " << conj2->GetId());
-            path1->Print();
-            conj2->Print();
-            path1->PopBack();
-            conj2->PopBack();
-
-            for (size_t i = 1; i < overlap_size; ++i) {
-                conj_overlap->PushBack(g_.conjugate(path1->Back()));
-                path1->PopBack();
-                conj2->PopBack();
-            }
-            overlap->SetOverlap(true);
-            path1->SetOverlapedEndTo(overlap);
-            path2->SetOverlapedBeginTo(overlap);
-        }
-    }
+private:
+    DECL_LOGGER("PathSplitter");
+};
 
-    void FindAndRemovePathOverlap(PathContainer& all_paths,
-                                  BidirectionalPath* path1) const {
-        int last = (int) path1->Size() - 1;
-        if (last <= 0 or coverage_map_.GetCoverage(path1->At(last)) <= 1) {
-            return;
-        }
-        BidirectionalPathSet paths =
-                coverage_map_.GetCoveringPaths(path1->At(last));
-        BidirectionalPath* overlap_path = NULL;
-        size_t overlap_size = 0;
-        for (auto path_iter = paths.begin(); path_iter != paths.end();
-                ++path_iter) {
-            if (IsSamePath(*path_iter, path1)) {
+class PathDeduplicator {
+    const Graph& g_;
+    PathContainer &paths_;
+    const bool equal_only_;
+    const OverlapFindingHelper helper_;
+
+    bool IsRedundant(PathPtr path) const {
+        TRACE("Checking if path redundant " << path->GetId());
+        for (auto candidate : helper_.FindCandidatePaths(*path)) {
+            TRACE("Considering candidate " << candidate->GetId());
+//                VERIFY(candidate != path && candidate != path->GetConjPath());
+            if (candidate == path || candidate == path->GetConjPath())
                 continue;
+            if (equal_only_ ? helper_.IsEqual(*path, *candidate) : helper_.IsSubpath(*path, *candidate)) {
+                return true;
             }
-            size_t over_size = path1->OverlapEndSize(*path_iter);
-            if (over_size > overlap_size) {
-                overlap_size = over_size;
-                overlap_path = *path_iter;
-            } else if (over_size == overlap_size &&
-                    (overlap_path == NULL || (*path_iter)->GetId() < overlap_path->GetId())) {
-                overlap_path = *path_iter;
-            }
-        }
-        if (overlap_path == NULL) {
-            return;
-        }
-        if (overlap_size > 0) {
-            RemoveOverlap(all_paths, path1, overlap_path, overlap_size);
         }
+        return false;
     }
 
-    class EdgeLengthAndIdComparator {
-    public:
-        EdgeLengthAndIdComparator(const Graph& g)
-                : g_(g) {
-        }
-        bool operator()(const EdgeId& e1, const EdgeId& e2) const {
-            if (g_.length(e1) > g_.length(e2)) {
-                return true;
-            }
-            if (g_.length(e2) > g_.length(e1)) {
-                return false;
+public:
+
+    PathDeduplicator(const Graph &g,
+                     PathContainer &paths,
+                     GraphCoverageMap &coverage_map,
+                     size_t min_edge_len,
+                     size_t max_diff,
+                     bool equal_only) :
+            g_(g),
+            paths_(paths),
+            equal_only_(equal_only),
+            helper_(g, coverage_map, min_edge_len, max_diff) {}
+
+    //TODO use path container filtering?
+    void Deduplicate() {
+        for (auto path_pair : paths_) {
+            auto path = path_pair.first;
+            if (IsRedundant(path)) {
+                TRACE("Clearing path " << path->str());
+                path->Clear();
             }
-            return e1.int_id() < e2.int_id();
         }
-    private:
-        const Graph& g_;
-    };
+    }
 
-    const Graph& g_;
-    GraphCoverageMap& coverage_map_;
-protected:
-    DECL_LOGGER("PEResolver")
+private:
+    DECL_LOGGER("PathDeduplicator");
 };
 
+inline void Deduplicate(const Graph &g, PathContainer &paths, GraphCoverageMap &coverage_map,
+                 size_t min_edge_len, size_t max_path_diff,
+                 bool equal_only = false) {
+    //add sorting to guarantee survival of longest paths if max_path_diff used
+    //paths.SortByLength(false);
+    PathDeduplicator deduplicator(g, paths, coverage_map, min_edge_len, max_path_diff, equal_only);
+    deduplicator.Deduplicate();
+    paths.FilterEmptyPaths();
+}
+
 class PathExtendResolver {
 
-protected:
     const Graph& g_;
     size_t k_;
 
@@ -471,107 +277,59 @@ public:
     PathContainer MakeSimpleSeeds() const {
         std::set<EdgeId> included;
         PathContainer edges;
-        for (auto iter = g_.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
-            if (g_.int_id(*iter) <= 0 or InTwoEdgeCycle(*iter, g_))
+        for (auto iter = g_.ConstEdgeBegin(/*canonical only*/true); !iter.IsEnd(); ++iter) {
+            EdgeId e = *iter;
+            if (g_.int_id(e) <= 0 || InTwoEdgeCycle(e, g_))
                 continue;
-            if (included.count(*iter) == 0) {
-                BidirectionalPath * first = new BidirectionalPath(g_, *iter);
-                BidirectionalPath * second = new BidirectionalPath(g_, g_.conjugate(*iter));
-                edges.AddPair(first,second);
-                included.insert(*iter);
-                included.insert(g_.conjugate(*iter));
-            }
+            edges.AddPair(new BidirectionalPath(g_, e), new BidirectionalPath(g_, g_.conjugate(e)));
         }
         return edges;
     }
 
-    PathContainer ExtendSeeds(PathContainer &seeds, ContigsMaker &pathExtender) const {
+    PathContainer ExtendSeeds(PathContainer &seeds, CompositeExtender &composite_extender) const {
         PathContainer paths;
-        pathExtender.GrowAll(seeds, paths);
+        composite_extender.GrowAll(seeds, paths);
         return paths;
     }
 
-    void RemoveEqualPaths(PathContainer &paths, GraphCoverageMap &coverage_map,
-                          size_t min_edge_len) const  {
-
-        SimpleOverlapRemover remover(g_, coverage_map);
-        remover.RemoveSimilarPaths(paths, min_edge_len, min_edge_len, true, false, false, false, false);
-    }
-
-    void RemoveRNAOverlaps(PathContainer& paths, GraphCoverageMap& coverage_map,
-                          size_t min_edge_len, size_t max_path_diff) const  {
-
-        SimpleOverlapRemover remover(g_, coverage_map);
-        remover.RemoveSimilarPaths(paths, min_edge_len, max_path_diff, true, false, false, false, false);
-
-        remover.RemoveSimilarPaths(paths, min_edge_len, max_path_diff, false, true, false, false, false);
-
-        remover.RemoveOverlaps(paths);
-
-        remover.RemoveSimilarPaths(paths, min_edge_len, max_path_diff, true, false, false, false, false);
-    }
-
+    //Paths should be deduplicated first!
     void RemoveOverlaps(PathContainer &paths, GraphCoverageMap &coverage_map,
                         size_t min_edge_len, size_t max_path_diff,
-                        bool add_overlaps_begin,
-                        bool cut_preudo_self_conjugate) const {
-        SimpleOverlapRemover remover(g_, coverage_map);
-        if (cut_preudo_self_conjugate)
-            remover.CutPseudoSelfConjugatePaths(paths);
-
-        remover.CutNonUniqueSuffix(paths);
-        //writer.WritePathsToFASTA(paths, output_dir + "/before.fasta");
-        //DEBUG("Removing subpaths");
-        //delete not only eq,
-        remover.RemoveSimilarPaths(paths, min_edge_len, max_path_diff, false, true, false, false, add_overlaps_begin);
-        //writer.WritePathsToFASTA(paths, output_dir + "/remove_similar.fasta");
-        //DEBUG("Remove overlaps")
-        remover.RemoveOverlaps(paths);
-        //writer.WritePathsToFASTA(paths, output_dir + "/after_remove_overlaps.fasta");
-        remover.RemoveSimilarPaths(paths, min_edge_len, max_path_diff, true, false, false, false, add_overlaps_begin);
-        //writer.WritePathsToFASTA(paths, output_dir + "/remove_equal.fasta");
-        //DEBUG("remove similar path. Max difference " << max_overlap);
-        remover.RemoveSimilarPaths(paths, min_edge_len, max_path_diff, false, true, true, true, add_overlaps_begin);
-        DEBUG("end removing");
-    }
-
-    void RemoveMatePairEnds(PathContainer& paths, size_t min_edge_len) const {
-        DEBUG("remove mp ends");
-        for (size_t i = 0; i < paths.size(); ++i) {
-            RemoveMatePairEnd(*paths.Get(i), min_edge_len);
-            RemoveMatePairEnd(*paths.GetConjugate(i), min_edge_len);
+                        bool end_start_only, bool cut_all) const {
+        INFO("Removing overlaps");
+        //VERIFY(min_edge_len == 0 && max_path_diff == 0);
+        if (!cut_all) {
+            INFO("Sorting paths");
+            //sorting is currently needed to retain overlap instance in longest paths
+            paths.SortByLength(false);
         }
+
+        OverlapRemover overlap_remover(g_, paths, coverage_map,
+                                             min_edge_len, max_path_diff);
+        INFO("Marking overlaps");
+        overlap_remover.MarkOverlaps(end_start_only, !cut_all);
+
+        INFO("Splitting paths");
+        PathSplitter splitter(overlap_remover.overlaps(), paths, coverage_map);
+        splitter.Split();
+        //splits are invalidated after this point
+
+        INFO("Deduplicating paths");
+        Deduplicate(g_, paths, coverage_map, min_edge_len, max_path_diff);
+        INFO("Overlaps removed");
     }
 
     void AddUncoveredEdges(PathContainer &paths, GraphCoverageMap &coverageMap) const {
-        std::set<EdgeId> included;
-        for (auto iter = g_.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
-            if (included.count(*iter) == 0 && !coverageMap.IsCovered(*iter)) {
-                BidirectionalPath* path = new BidirectionalPath(g_, *iter);
-                BidirectionalPath* conj = new BidirectionalPath(g_, g_.conjugate(*iter));
-                path->Subscribe(&coverageMap);
-                conj->Subscribe(&coverageMap);
-                coverageMap.BackEdgeAdded(path->At(0), path, path->GapAt(0));
-                coverageMap.BackEdgeAdded(conj->At(0), conj, conj->GapAt(0));
-                paths.AddPair(path, conj);
-                included.insert(*iter);
-                included.insert(g_.conjugate(*iter));
+        for (auto iter = g_.ConstEdgeBegin(true); !iter.IsEnd(); ++iter) {
+            EdgeId e = *iter;
+            if (!coverageMap.IsCovered(e)) {
+                AddPath(paths, BidirectionalPath(g_, e), coverageMap);
             }
         }
     }
 
-private:
-    void RemoveMatePairEnd(BidirectionalPath& path, size_t min_edge_len) const {
-        int pos = int(path.Size()) - 1;
-        while (pos > 0 and g_.length(path.At(pos)) < min_edge_len) {
-            path.PopBack();
-            pos--;
-        }
-    }
 protected:
     DECL_LOGGER("PEResolver")
 };
 
-} /* PE_RESOLVER_HPP_ */
-
-#endif
+}
diff --git a/src/common/modules/path_extend/pe_utils.hpp b/src/common/modules/path_extend/pe_utils.hpp
index 8df0968..6aee126 100644
--- a/src/common/modules/path_extend/pe_utils.hpp
+++ b/src/common/modules/path_extend/pe_utils.hpp
@@ -35,34 +35,19 @@ inline bool InTwoEdgeCycle(EdgeId e, const Graph &g) {
     return false;
 }
 
-inline bool InBuble(EdgeId e, const Graph& g) {
-    auto edges = g.OutgoingEdges(g.EdgeStart(e));
-    auto endVertex = g.EdgeEnd(e);
-    for (auto it = edges.begin(); it != edges.end(); ++it) {
-        if ((g.EdgeEnd(*it) == endVertex) and (*it != e)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-
 // Handles all paths in PathContainer.
 // For each edge output all paths  that _traverse_ this path. If path contains multiple instances - count them. Position of the edge is not reported.
 class GraphCoverageMap: public PathListener {
-
 public:
     typedef BidirectionalPathMultiset MapDataT;
 
-
 private:
     const Graph& g_;
 
-    std::unordered_map <EdgeId, MapDataT * > edge_coverage_;
+    std::unordered_map<EdgeId, MapDataT * > edge_coverage_;
+    const MapDataT empty_;
 
-    MapDataT * empty_;
-
-    virtual void EdgeAdded(EdgeId e, BidirectionalPath * path, Gap /*gap*/) {
+    void EdgeAdded(EdgeId e, BidirectionalPath * path) {
         auto iter = edge_coverage_.find(e);
         if (iter == edge_coverage_.end()) {
             edge_coverage_.insert(std::make_pair(e, new MapDataT()));
@@ -70,7 +55,7 @@ private:
         edge_coverage_[e]->insert(path);
     }
 
-    virtual void EdgeRemoved(EdgeId e, BidirectionalPath * path) {
+    void EdgeRemoved(EdgeId e, BidirectionalPath * path) {
         auto iter = edge_coverage_.find(e);
         if (iter != edge_coverage_.end()) {
             if (iter->second->count(path) == 0) {
@@ -82,6 +67,14 @@ private:
         }
     }
 
+    void ProcessPath(BidirectionalPath * path, bool subscribe) {
+        if (subscribe)
+            path->Subscribe(this);
+        for (size_t i = 0; i < path->Size(); ++i) {
+            EdgeAdded(path->At(i), path);
+        }
+    }
+
     size_t EdgeCount() const {
         size_t result = 0;
         for (auto e = g_.ConstEdgeBegin(); !e.IsEnd(); ++e) {
@@ -91,54 +84,47 @@ private:
     }
 
 public:
-    GraphCoverageMap(const Graph& g) : g_(g), edge_coverage_() {
-        empty_ = new MapDataT();
+    GraphCoverageMap(const GraphCoverageMap&) = delete;
+    GraphCoverageMap& operator=(const GraphCoverageMap&) = delete;
+
+    GraphCoverageMap(GraphCoverageMap&&) = default;
+    GraphCoverageMap& operator=(GraphCoverageMap&&) = default;
+
+    explicit GraphCoverageMap(const Graph& g) : g_(g) {
+        //FIXME heavy constructor
         edge_coverage_.reserve(EdgeCount());
     }
 
-    GraphCoverageMap(const Graph& g, const PathContainer& paths, bool subscribe = false) : g_(g), edge_coverage_() {
-        empty_ = new MapDataT();
-        edge_coverage_.reserve(EdgeCount());
+    GraphCoverageMap(const Graph& g, const PathContainer& paths, bool subscribe = false) :
+            GraphCoverageMap(g) {
         AddPaths(paths, subscribe);
     }
 
-    virtual ~GraphCoverageMap() {
-        delete empty_;
+    ~GraphCoverageMap() {
         for (auto iter = edge_coverage_.begin(); iter != edge_coverage_.end(); ++iter) {
             delete iter->second;
         }
     }
 
     void AddPaths(const PathContainer& paths, bool subscribe = false) {
-        for (size_t i = 0; i < paths.size(); ++i) {
-            if (subscribe)
-                paths.Get(i)->Subscribe(this);
-            for (size_t j = 0; j < paths.Get(i)->Size(); ++j) {
-                EdgeAdded(paths.Get(i)->At(j), paths.Get(i), paths.Get(i)->GapAt(j));
-            }
-            if (subscribe)
-                paths.GetConjugate(i)->Subscribe(this);
-            for (size_t j = 0; j < paths.GetConjugate(i)->Size(); ++j) {
-                EdgeAdded(paths.GetConjugate(i)->At(j), paths.GetConjugate(i), paths.GetConjugate(i)->GapAt(j));
-            }
+        for (auto path_pair : paths) {
+            ProcessPath(path_pair.first, subscribe);
+            ProcessPath(path_pair.second, subscribe);
         }
     }
 
     void Subscribe(BidirectionalPath * path) {
-        path->Subscribe(this);
-        for (size_t i = 0; i < path->Size(); ++i) {
-            BackEdgeAdded(path->At(i), path, path->GapAt(i));
-        }
+        ProcessPath(path, true);
     }
 
     //Inherited from PathListener
-    void FrontEdgeAdded(EdgeId e, BidirectionalPath * path, Gap gap) override {
-        EdgeAdded(e, path, gap);
+    void FrontEdgeAdded(EdgeId e, BidirectionalPath * path, const Gap&) override {
+        EdgeAdded(e, path);
     }
 
     //Inherited from PathListener
-    void BackEdgeAdded(EdgeId e, BidirectionalPath * path, Gap gap) override {
-        EdgeAdded(e, path, gap);
+    void BackEdgeAdded(EdgeId e, BidirectionalPath * path, const Gap&) override {
+        EdgeAdded(e, path);
     }
 
     //Inherited from PathListener
@@ -151,12 +137,12 @@ public:
         EdgeRemoved(e, path);
     }
 
-    MapDataT * GetEdgePaths(EdgeId e) const {
+    const MapDataT *  GetEdgePaths(EdgeId e) const {
         auto iter = edge_coverage_.find(e);
         if (iter != edge_coverage_.end()) {
             return iter->second;
         }
-        return empty_;
+        return &empty_;
     }
 
     int GetCoverage(EdgeId e) const {
@@ -189,33 +175,6 @@ public:
         return edge_coverage_.end();
     }
 
-    // DEBUG output
-    void PrintUncovered() const {
-        DEBUG("Uncovered edges");
-        int s = 0;
-        for (auto iter = g_.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
-            if (!IsCovered(*iter)) {
-                DEBUG(g_.int_id(*iter) << " (" << g_.length(*iter) << ") ~ " << g_.int_id(g_.conjugate(*iter)) << " (" << g_.length(g_.conjugate(*iter)) << ")");
-                s += 1;
-            }
-        }
-        DEBUG("Uncovered edges " << s / 2);
-    }
-
-    void PrintMulticovered() const {
-        DEBUG("Multicovered edges");
-        for (auto iter = g_.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
-            auto paths = GetCoveringPaths(*iter);
-            if (paths.size() > 1 && g_.length(*iter) > 1000) {
-                DEBUG(g_.int_id(*iter) << " (" << g_.length(*iter) << "). " << " Covered: " << paths.size());
-                for (auto path = paths.begin(); path != paths.end(); ++path) {
-                    (*path)->Print();
-                }
-                DEBUG("=====");
-            }
-        }
-    }
-
     size_t size() const {
         return edge_coverage_.size();
     }
@@ -224,31 +183,59 @@ public:
         return g_;
     }
 
-private:
-    GraphCoverageMap(const GraphCoverageMap& t) : g_(t.g_), empty_(t.empty_) {}
 };
 
-inline bool GetLoopAndExit(const Graph& g, EdgeId e, pair<EdgeId, EdgeId>& result) {
+
+class PathContainerCoverageSwitcher {
+    const Graph& g_;
+
+    const SSCoverageStorage& coverage_storage_;
+
+    bool antisense_;
+
+    double CalculateCoverage(const BidirectionalPath& p, bool reverse) const {
+        double res = 0.0;
+        double len = 0;
+        for(auto e : p) {
+            res += coverage_storage_.GetCoverage(e, reverse) * double(g_.length(e));
+            len += (double) g_.length(e);
+        }
+        return res / len;
+    }
+
+public:
+    PathContainerCoverageSwitcher(const Graph& g, const SSCoverageStorage& coverage_storage, bool antisense):
+        g_(g), coverage_storage_(coverage_storage), antisense_(antisense) {}
+
+
+    void Apply(PathContainer& paths) const {
+        for (size_t i = 0; i < paths.size(); ++i) {
+            if (math::ls(CalculateCoverage(*paths.Get(i), antisense_), CalculateCoverage(*paths.GetConjugate(i), antisense_))) {
+                paths.Swap(i);
+            }
+        }
+    }
+};
+
+//result -- first edge is loop's back edge, second is loop exit edge
+inline bool GetLoopAndExit(const Graph& g, EdgeId e, EdgeId& back_cycle_edge, EdgeId& loop_exit) {
     VertexId v = g.EdgeEnd(e);
     VertexId start = g.EdgeStart(e);
     if (g.OutgoingEdgeCount(v) != 2 || g.IncomingEdgeCount(v) != 1 || g.OutgoingEdgeCount(start) != 1 || g.IncomingEdgeCount(start) != 2) {
         return false;
     }
-    EdgeId loop;
-    EdgeId exit;
     bool loop_found = false;
     bool exit_found = false;
     auto edges = g.OutgoingEdges(v);
     for (auto edge = edges.begin(); edge != edges.end(); ++edge) {
         if (g.EdgeEnd(*edge) == g.EdgeStart(e) && *edge != e) {
-            loop = *edge;
+            back_cycle_edge = *edge;
             loop_found = true;
         } else if (*edge != e) {
-            exit = *edge;
+            loop_exit = *edge;
             exit_found = true;
         }
     }
-    result = make_pair(loop, exit);
     return exit_found && loop_found;
 }
 
@@ -373,8 +360,9 @@ inline void LoopDetector::RemoveLoop(size_t skip_identical_edges, bool fullRemov
 }
 
 inline bool LoopDetector::EdgeInShortLoop(EdgeId e) const {
-    pair<EdgeId, EdgeId> temp;
-    return GetLoopAndExit(path_->graph(), e, temp);
+    EdgeId back_cycle_edge;
+    EdgeId loop_exit;
+    return GetLoopAndExit(path_->graph(), e, back_cycle_edge, loop_exit);
 }
 
 inline bool LoopDetector::PrevEdgeInShortLoop() const {
diff --git a/src/common/modules/path_extend/pipeline/extenders_logic.cpp b/src/common/modules/path_extend/pipeline/extenders_logic.cpp
index 7b26fed..65aee4b 100644
--- a/src/common/modules/path_extend/pipeline/extenders_logic.cpp
+++ b/src/common/modules/path_extend/pipeline/extenders_logic.cpp
@@ -31,13 +31,14 @@ shared_ptr<SimpleExtender> ExtendersGenerator::MakeLongReadsExtender(size_t lib_
         resolvable_repeat_length_bound = std::max(resolvable_repeat_length_bound, lib.data().read_length);
     }
     INFO("resolvable_repeat_length_bound set to " << resolvable_repeat_length_bound);
-
+    bool investigate_short_loop = lib.is_contig_lib() || lib.is_long_read_lib() || support_.UseCoverageResolverForSingleReads(lib.type());
 
     auto long_read_ec = MakeLongReadsExtensionChooser(lib_index, read_paths_cov_map);
     return make_shared<SimpleExtender>(gp_, cover_map_,
+                                       used_unique_storage_,
                                        long_read_ec,
                                        resolvable_repeat_length_bound,
-                                       true, /* investigate short loops */
+                                       investigate_short_loop, /* investigate short loops */
                                        support_.UseCoverageResolverForSingleReads(lib.type()));
 }
 
@@ -58,35 +59,36 @@ shared_ptr<SimpleExtender> ExtendersGenerator::MakeLongEdgePEExtender(size_t lib
                                               opts.priority_coeff);
 
     return make_shared<SimpleExtender>(gp_, cover_map_,
+                                       used_unique_storage_,
                                        extension,
                                        paired_lib->GetISMax(),
                                        investigate_loops,
-                                       false /*use short loop coverage resolver*/);
+                                       false /*use short loop coverage resolver*/,
+                                       opts.weight_threshold);
 }
 
-shared_ptr<GapJoiner> ExtendersGenerator::MakeGapJoiners(double is_variation) const {
+shared_ptr<GapAnalyzer> ExtendersGenerator::MakeGapAnalyzer(double is_variation) const {
     const auto &pset = params_.pset;
 
-    vector<shared_ptr<GapJoiner>> joiners;
+    vector<shared_ptr<GapAnalyzer>> joiners;
     if (params_.pset.scaffolder_options.use_la_gap_joiner)
-        joiners.push_back(std::make_shared<LAGapJoiner>(gp_.g, pset.scaffolder_options.min_overlap_length,
+        joiners.push_back(std::make_shared<LAGapAnalyzer>(gp_.g, pset.scaffolder_options.min_overlap_length,
                                                         pset.scaffolder_options.flank_multiplication_coefficient,
                                                         pset.scaffolder_options.flank_addition_coefficient));
 
 
-    joiners.push_back(std::make_shared<HammingGapJoiner>(gp_.g,
+    joiners.push_back(std::make_shared<HammingGapAnalyzer>(gp_.g,
                                                          pset.scaffolder_options.min_gap_score,
                                                          pset.scaffolder_options.short_overlap,
                                                          (int) pset.scaffolder_options.basic_overlap_coeff
                                                              * dataset_info_.RL()));
 
-    return std::make_shared<CompositeGapJoiner>(gp_.g,
+    //todo introduce explicit must_overlap_coeff and rename max_can_overlap -> can_overlap_coeff
+    return std::make_shared<CompositeGapAnalyzer>(gp_.g,
                                                 joiners,
-                                                size_t(pset.scaffolder_options.max_can_overlap
-                                                           * (double) gp_.g.k()), /* may overlap threshold */
-                                                int(math::round(double(gp_.g.k())
-                                                                    - pset.scaffolder_options.var_coeff
-                                                                        * is_variation)),  /* must overlap threshold */
+                                                size_t(math::round(pset.scaffolder_options.max_can_overlap
+                                                           * is_variation)), /* may overlap threshold */
+                                                int(math::round(-pset.scaffolder_options.var_coeff * is_variation)), /* must overlap threshold */
                                                 pset.scaffolder_options.artificial_gap);
 
 }
@@ -103,8 +105,9 @@ shared_ptr<PathExtender> ExtendersGenerator::MakeScaffoldingExtender(size_t lib_
                                                                        pset.scaffolder_options.cl_threshold,
                                                                        pset.scaffolder_options.var_coeff);
 
-    return make_shared<ScaffoldingPathExtender>(gp_, cover_map_, scaff_chooser,
-                                                MakeGapJoiners(paired_lib->GetIsVar()),
+    return make_shared<ScaffoldingPathExtender>(gp_, cover_map_,
+                                                used_unique_storage_, scaff_chooser,
+                                                MakeGapAnalyzer(paired_lib->GetIsVar()),
                                                 paired_lib->GetISMax(),
                                                 false, /* investigate short loops */
                                                 params_.avoid_rc_connections);
@@ -130,17 +133,17 @@ shared_ptr<PathExtender> ExtendersGenerator::MakeRNAScaffoldingExtender(size_t l
 
     VERIFY(pset.scaffolder_options.min_overlap_for_rna_scaffolding.is_initialized());
     return make_shared<RNAScaffoldingPathExtender>(gp_, cover_map_,
+                                                   used_unique_storage_,
                                                    scaff_chooser,
                                                    scaff_chooser2,
-                                                   MakeGapJoiners(paired_lib->GetIsVar()),
+                                                   MakeGapAnalyzer(paired_lib->GetIsVar()),
                                                    paired_lib->GetISMax(),
                                                    false  /* investigate short loops */,
                                                    *pset.scaffolder_options.min_overlap_for_rna_scaffolding);
 }
 
-shared_ptr<PathExtender> ExtendersGenerator::MakeMatePairScaffoldingExtender(
-    size_t lib_index,
-    const ScaffoldingUniqueEdgeStorage &storage) const {
+shared_ptr<PathExtender> ExtendersGenerator::MakeMatePairScaffoldingExtender(size_t lib_index,
+                                                                             const ScaffoldingUniqueEdgeStorage &storage) const {
 
     const auto &lib = dataset_info_.reads[lib_index];
     const auto &pset = params_.pset;
@@ -173,8 +176,9 @@ shared_ptr<PathExtender> ExtendersGenerator::MakeMatePairScaffoldingExtender(
                                                                     <= params_.pset.scaffolding2015.graph_connectivity_max_edges);
 
     return make_shared<ScaffoldingPathExtender>(gp_, cover_map_,
+                                                used_unique_storage_,
                                                 scaff_chooser,
-                                                MakeGapJoiners(paired_lib->GetIsVar()),
+                                                MakeGapAnalyzer(paired_lib->GetIsVar()),
                                                 paired_lib->GetISMax(),
                                                 false, /* investigate short loops */
                                                 params_.avoid_rc_connections,
@@ -204,10 +208,11 @@ shared_ptr<SimpleExtender> ExtendersGenerator::MakeCoordCoverageExtender(size_t
 
     auto chooser = make_shared<JointExtensionChooser>(gp_.g, permissive_pi_chooser, coord_cov_chooser);
 
-    return make_shared<SimpleExtender>(gp_, cover_map_, chooser,
+    return make_shared<SimpleExtender>(gp_, cover_map_, used_unique_storage_, chooser,
                                        -1ul /* insert size is needed only for loop detection, which is not needed in this case */,
                                        false, /* investigate short loops */
-                                       false /*use short loop coverage resolver*/);
+                                       false /*use short loop coverage resolver*/,
+                                       params_.pset.extension_options.weight_threshold);
 }
 
 shared_ptr<SimpleExtender> ExtendersGenerator::MakeRNAExtender(size_t lib_index, bool investigate_loops) const {
@@ -229,12 +234,31 @@ shared_ptr<SimpleExtender> ExtendersGenerator::MakeRNAExtender(size_t lib_index,
                                          opts.priority_coeff);
 
     return make_shared<MultiExtender>(gp_, cover_map_,
+                                      used_unique_storage_,
                                       extension,
                                       paired_lib->GetISMax(),
                                       investigate_loops,
-                                      false /*use short loop coverage resolver*/);
+                                      false /*use short loop coverage resolver*/,
+                                      opts.weight_threshold);
 }
 
+
+shared_ptr<SimpleExtender> ExtendersGenerator::MakeSimpleCoverageExtender(size_t lib_index) const {
+
+    auto extension =
+        make_shared<SimpleCoverageExtensionChooser>(gp_.ss_coverage[lib_index], gp_.g,
+                                                    params_.pset.simple_coverage_resolver.coverage_delta,
+                                                    params_.pset.simple_coverage_resolver.min_upper_coverage);
+
+    return make_shared<SimpleExtender>(gp_, cover_map_,
+                                       used_unique_storage_,
+                                       extension,
+                                       1000, /*insert size for cycle detection*/
+                                       false /*investigate short loops*/,
+                                       false /*use short loop coverage resolver*/);
+}
+
+
 shared_ptr<SimpleExtender> ExtendersGenerator::MakePEExtender(size_t lib_index, bool investigate_loops) const {
     const auto &lib = dataset_info_.reads[lib_index];
     shared_ptr<PairedInfoLibrary> paired_lib = MakeNewLib(gp_.g, lib, gp_.clustered_indices[lib_index]);
@@ -261,12 +285,26 @@ shared_ptr<SimpleExtender> ExtendersGenerator::MakePEExtender(size_t lib_index,
                                                          opts.priority_coeff);
 
     return make_shared<SimpleExtender>(gp_, cover_map_,
+                                       used_unique_storage_,
                                        extension_chooser,
                                        paired_lib->GetISMax(),
                                        investigate_loops,
-                                       false /*use short loop coverage resolver*/);
+                                       false /*use short loop coverage resolver*/,
+                                       opts.weight_threshold);
 }
 
+//FIXME do we need ExtenderTriplets story here?
+//FIXME integrate with MakeBasicExtenders
+Extenders ExtendersGenerator::MakePEExtenders() const {
+    Extenders result;
+    for (size_t lib_index = 0; lib_index < dataset_info_.reads.lib_count(); ++lib_index) {
+        const auto &lib = dataset_info_.reads[lib_index];
+        if (support_.IsForPEExtender(lib)) {
+            result.push_back(MakePEExtender(lib_index, false));
+        }
+    }
+    return result;
+}
 
 void ExtendersGenerator::PrintExtenders(const Extenders &extenders) const {
     DEBUG("Extenders in vector:");
@@ -274,12 +312,12 @@ void ExtendersGenerator::PrintExtenders(const Extenders &extenders) const {
         //TODO: use polymorphism instead of RTTI
         auto ext_ptr = extender.get();
         DEBUG("Extender #i" << typeid(*ext_ptr).name());
-        if (instanceof<SimpleExtender>(ext_ptr)) {
+        if (utils::instanceof<SimpleExtender>(ext_ptr)) {
             auto ec = ((SimpleExtender *) ext_ptr)->GetExtensionChooser();
             auto ec_ptr = ec.get();
             DEBUG("    Extender #i" << typeid(*ec_ptr).name());
         }
-        else if (instanceof<ScaffoldingPathExtender>(ext_ptr)) {
+        else if (utils::instanceof<ScaffoldingPathExtender>(ext_ptr)) {
             auto ec = ((ScaffoldingPathExtender *) ext_ptr)->GetExtensionChooser();
             auto ec_ptr = ec.get();
             DEBUG("    Extender #i" << typeid(*ec_ptr).name());
@@ -287,6 +325,16 @@ void ExtendersGenerator::PrintExtenders(const Extenders &extenders) const {
     }
 }
 
+Extenders ExtendersGenerator::MakeMPExtenders() const {
+    Extenders extenders = MakeMPExtenders(unique_data_.main_unique_storage_);
+    INFO("Using " << extenders.size() << " mate-pair " << support_.LibStr(extenders.size()));
+
+    for (const auto& unique_storage : unique_data_.unique_storages_) {
+        utils::push_back_all(extenders, MakeMPExtenders(unique_storage));
+    }
+    return extenders;
+}
+
 Extenders ExtendersGenerator::MakeMPExtenders(const ScaffoldingUniqueEdgeStorage &storage) const {
     ExtenderTriplets result;
 
@@ -294,7 +342,8 @@ Extenders ExtendersGenerator::MakeMPExtenders(const ScaffoldingUniqueEdgeStorage
         const auto &lib = dataset_info_.reads[lib_index];
 
         if (lib.is_mate_pair()) {
-            result.emplace_back(lib.type(), lib_index, MakeMatePairScaffoldingExtender(lib_index, storage));
+            result.emplace_back(lib.type(), lib_index,
+                                MakeMatePairScaffoldingExtender(lib_index, storage));
         }
     }
     std::stable_sort(result.begin(), result.end());
@@ -302,8 +351,7 @@ Extenders ExtendersGenerator::MakeMPExtenders(const ScaffoldingUniqueEdgeStorage
     return ExtractExtenders(result);
 }
 
-Extenders ExtendersGenerator::MakePBScaffoldingExtenders(const ScaffoldingUniqueEdgeStorage &unique_storage_pb,
-                                                         const vector<shared_ptr<GraphCoverageMap>> &long_reads_cov_map) const {
+Extenders ExtendersGenerator::MakePBScaffoldingExtenders() const {
     const auto &pset = params_.pset;
     ExtenderTriplets result;
 
@@ -312,20 +360,22 @@ Extenders ExtendersGenerator::MakePBScaffoldingExtenders(const ScaffoldingUnique
             INFO("Creating scaffolding extender for lib " << lib_index);
             shared_ptr<ConnectionCondition> condition = make_shared<LongReadsLibConnectionCondition>(gp_.g,
                                                                                                      lib_index, 2,
-                                                                                                     *long_reads_cov_map[lib_index]);
+                                                                                                     unique_data_.long_reads_cov_map_[lib_index]);
             auto scaff_chooser = std::make_shared<ExtensionChooser2015>(gp_.g,
                                                                         nullptr,
                                                                         condition,
-                                                                        unique_storage_pb,
+                                                                        unique_data_.unique_pb_storage_,
                                                                         pset.scaffolder_options.cl_threshold,
                                                                         pset.scaffolder_options.var_coeff,
                                                                         pset.scaffolding2015.relative_weight_cutoff);
 
             result.emplace_back(dataset_info_.reads[lib_index].type(),
                                 lib_index,
+                                //FIXME are utilized constants reasonable?
                                 make_shared<ScaffoldingPathExtender>(gp_, cover_map_,
+                                                                     used_unique_storage_,
                                                                      scaff_chooser,
-                                                                     MakeGapJoiners(1000), /* "IS vatiation" */
+                                                                     MakeGapAnalyzer(1000), /* "IS variation" */
                                                                      10000, /* insert size */
                                                                      false, /* investigate short loops */
                                                                      params_.avoid_rc_connections,
@@ -349,8 +399,7 @@ Extenders ExtendersGenerator::MakeCoverageExtenders() const {
     return result;
 }
 
-Extenders ExtendersGenerator::MakeBasicExtenders(const ScaffoldingUniqueEdgeStorage &storage,
-                                                 const vector<shared_ptr<GraphCoverageMap>> &long_reads_cov_map) const {
+Extenders ExtendersGenerator::MakeBasicExtenders() const {
     ExtenderTriplets basic_extenders;
     ExtenderTriplets loop_resolving_extenders;
     ExtenderTriplets scaffolding_extenders;
@@ -366,7 +415,9 @@ Extenders ExtendersGenerator::MakeBasicExtenders(const ScaffoldingUniqueEdgeStor
 
         //TODO: scaff2015 does not need any single read libs?
         if (support_.IsForSingleReadExtender(lib)) {
-            basic_extenders.emplace_back(lib.type(), lib_index, MakeLongReadsExtender(lib_index, *long_reads_cov_map[lib_index]));
+            basic_extenders.emplace_back(lib.type(), lib_index,
+                                         MakeLongReadsExtender(lib_index,
+                                                               unique_data_.long_reads_cov_map_[lib_index]));
             ++single_read_libs;
         }
         if (support_.IsForPEExtender(lib)) {
@@ -376,12 +427,16 @@ Extenders ExtendersGenerator::MakeBasicExtenders(const ScaffoldingUniqueEdgeStor
                     basic_extenders.emplace_back(lib.type(), lib_index, MakeLongEdgePEExtender(lib_index, false));
                 } else if (pset.multi_path_extend) {
                     basic_extenders.emplace_back(lib.type(), lib_index, MakePEExtender(lib_index, false));
+                    if (pset.simple_coverage_resolver.enabled)
+                        basic_extenders.emplace_back(lib.type(), lib_index, MakeSimpleCoverageExtender(lib_index));
                     basic_extenders.emplace_back(lib.type(), lib_index, MakeRNAExtender(lib_index, false));
                 } else {
                     basic_extenders.emplace_back(lib.type(), lib_index, MakePEExtender(lib_index, false));
                 }
             } else if (pset.sm == sm_2015) {
-                basic_extenders.emplace_back(lib.type(), lib_index, MakeMatePairScaffoldingExtender(lib_index, storage));
+                basic_extenders.emplace_back(lib.type(), lib_index,
+                                             MakeMatePairScaffoldingExtender(lib_index,
+                                                                             unique_data_.main_unique_storage_));
             }
         }
         //TODO logic is very cryptic!
@@ -397,7 +452,9 @@ Extenders ExtendersGenerator::MakeBasicExtenders(const ScaffoldingUniqueEdgeStor
             } else {
                 scaffolding_extenders.emplace_back(lib.type(), lib_index, MakeScaffoldingExtender(lib_index));
                 if (pset.sm == sm_combined) {
-                    scaffolding_extenders.emplace_back(lib.type(), lib_index, MakeMatePairScaffoldingExtender(lib_index, storage));
+                    scaffolding_extenders.emplace_back(lib.type(), lib_index,
+                                                       MakeMatePairScaffoldingExtender(lib_index,
+                                                                                       unique_data_.main_unique_storage_));
                 }
             }
         }
@@ -408,9 +465,9 @@ Extenders ExtendersGenerator::MakeBasicExtenders(const ScaffoldingUniqueEdgeStor
     std::stable_sort(loop_resolving_extenders.begin(), loop_resolving_extenders.end());
 
     Extenders result;
-    push_back_all(result, ExtractExtenders(basic_extenders));
-    push_back_all(result, ExtractExtenders(scaffolding_extenders));
-    push_back_all(result, ExtractExtenders(loop_resolving_extenders));
+    utils::push_back_all(result, ExtractExtenders(basic_extenders));
+    utils::push_back_all(result, ExtractExtenders(scaffolding_extenders));
+    utils::push_back_all(result, ExtractExtenders(loop_resolving_extenders));
 
     INFO("Using " << pe_libs << " paired-end " << support_.LibStr(pe_libs));
     INFO("Using " << scf_pe_libs << " paired-end scaffolding " << support_.LibStr(scf_pe_libs));
diff --git a/src/common/modules/path_extend/pipeline/extenders_logic.hpp b/src/common/modules/path_extend/pipeline/extenders_logic.hpp
index 2f6c190..1eca53a 100644
--- a/src/common/modules/path_extend/pipeline/extenders_logic.hpp
+++ b/src/common/modules/path_extend/pipeline/extenders_logic.hpp
@@ -46,6 +46,8 @@ class ExtendersGenerator {
     const conj_graph_pack &gp_;
 
     const GraphCoverageMap &cover_map_;
+    const UniqueData &unique_data_;
+    UsedUniqueStorage &used_unique_storage_;
 
     const PELaunchSupport &support_;
 
@@ -54,25 +56,33 @@ public:
                        const PathExtendParamsContainer &params,
                        const conj_graph_pack &gp,
                        const GraphCoverageMap &cover_map,
+                       const UniqueData &unique_data,
+                       UsedUniqueStorage &used_unique_storage,
                        const PELaunchSupport& support) :
         dataset_info_(dataset_info),
         params_(params),
         gp_(gp),
         cover_map_(cover_map),
+        unique_data_(unique_data),
+        used_unique_storage_(used_unique_storage),
         support_(support) { }
 
-    Extenders MakePBScaffoldingExtenders(const ScaffoldingUniqueEdgeStorage &unique_storage_pb,
-                                         const vector<shared_ptr<GraphCoverageMap>> &long_reads_cov_map) const;
+    Extenders MakePBScaffoldingExtenders() const;
 
-    Extenders MakeBasicExtenders(const ScaffoldingUniqueEdgeStorage &storage,
-                                 const vector<shared_ptr<GraphCoverageMap>> &long_reads_cov_map) const;
+    Extenders MakeBasicExtenders() const;
 
-    Extenders MakeMPExtenders(const ScaffoldingUniqueEdgeStorage &storage) const;
+    Extenders MakeMPExtenders() const;
 
     Extenders MakeCoverageExtenders() const;
 
+    Extenders MakePEExtenders() const;
+
 private:
 
+    shared_ptr<SimpleExtender> MakePEExtender(size_t lib_index, bool investigate_loops) const;
+
+    Extenders MakeMPExtenders(const ScaffoldingUniqueEdgeStorage &storage) const;
+
     shared_ptr<ExtensionChooser> MakeLongReadsExtensionChooser(size_t lib_index, const GraphCoverageMap& read_paths_cov_map) const;
 
     shared_ptr<SimpleExtender> MakeLongReadsExtender(size_t lib_index, const GraphCoverageMap& read_paths_cov_map) const;
@@ -80,36 +90,20 @@ private:
     shared_ptr<SimpleExtender> MakeLongEdgePEExtender(size_t lib_index,
                                                       bool investigate_loops) const;
 
-    shared_ptr<WeightCounter> MakeMetaWeightCounter(shared_ptr<PairedInfoLibrary> lib,
-                                                    size_t read_length) const;
-
-    shared_ptr<SimpleExtensionChooser> MakeMetaExtensionChooser(shared_ptr<PairedInfoLibrary> lib,
-                                                                size_t read_length) const;
-
-    shared_ptr<SimpleExtender> MakeMetaExtender(size_t lib_index, bool investigate_loops) const;
-
-
-    shared_ptr<SimpleExtender> MakePEExtender(size_t lib_index, bool investigate_loops) const;
-
-
-    shared_ptr<GapJoiner> MakeGapJoiners(double is_variation) const;
-
+    shared_ptr<GapAnalyzer> MakeGapAnalyzer(double is_variation) const;
 
     shared_ptr<PathExtender> MakeScaffoldingExtender(size_t lib_index) const;
 
-
     shared_ptr<PathExtender> MakeRNAScaffoldingExtender(size_t lib_index) const;
 
-
     shared_ptr<PathExtender> MakeMatePairScaffoldingExtender
         (size_t lib_index, const ScaffoldingUniqueEdgeStorage &storage) const;
 
-
     shared_ptr<SimpleExtender> MakeCoordCoverageExtender(size_t lib_index) const;
 
-
     shared_ptr<SimpleExtender> MakeRNAExtender(size_t lib_index, bool investigate_loops) const;
 
+    shared_ptr<SimpleExtender> MakeSimpleCoverageExtender(size_t lib_index) const;
 
     void PrintExtenders(const vector<shared_ptr<PathExtender>> &extenders) const;
 
diff --git a/src/common/modules/path_extend/pipeline/launch_support.cpp b/src/common/modules/path_extend/pipeline/launch_support.cpp
index 3be9ce5..9a4c9a4 100644
--- a/src/common/modules/path_extend/pipeline/launch_support.cpp
+++ b/src/common/modules/path_extend/pipeline/launch_support.cpp
@@ -17,6 +17,15 @@ bool PELaunchSupport::HasOnlyMPLibs() const {
     return true;
 }
 
+bool PELaunchSupport::HasOnlySingleReads() const {
+    for (const auto &lib : dataset_info_.reads) {
+        if (lib.type() != io::LibraryType::SingleReads || !lib.data().single_reads_mapped) {
+            return false;
+        }
+    }
+    return true;
+}
+
 pe_config::ParamSetT::ExtensionOptionsT PELaunchSupport::GetExtensionOpts(shared_ptr<PairedInfoLibrary> lib,
                                                                           const pe_config::ParamSetT &pset) const {
     return lib->IsMp() ? pset.mate_pair_options : pset.extension_options;
@@ -49,7 +58,7 @@ bool PELaunchSupport::IsForScaffoldingExtender(const io::SequencingLibrary<confi
 
 //TODO: review usage
 bool PELaunchSupport::UseCoverageResolverForSingleReads(const io::LibraryType &type) const {
-    return HasOnlyMPLibs() && (type == io::LibraryType::HQMatePairs);
+    return (HasOnlyMPLibs() && type == io::LibraryType::HQMatePairs) || HasOnlySingleReads();
 }
 
 std::string PELaunchSupport::LibStr(size_t count) const {
diff --git a/src/common/modules/path_extend/pipeline/launch_support.hpp b/src/common/modules/path_extend/pipeline/launch_support.hpp
index 53870af..610e740 100644
--- a/src/common/modules/path_extend/pipeline/launch_support.hpp
+++ b/src/common/modules/path_extend/pipeline/launch_support.hpp
@@ -38,6 +38,7 @@ struct PathExtendParamsContainer {
 
     PathExtendParamsContainer(const config::dataset& dataset_info,
                               const pe_config::MainPEParamsT& pe_cfg_,
+                              const config::debruijn_config::strand_specificity& strand_specificity,
                               const std::string& output_dir_,
                               config::pipeline_type mode_,
                               bool uneven_depth_,
@@ -45,14 +46,14 @@ struct PathExtendParamsContainer {
                               bool use_scaffolder_):
         pe_cfg(pe_cfg_),
         pset(pe_cfg_.param_set),
+        ss(strand_specificity),
         output_dir(output_dir_),
         etc_dir(output_dir + pe_cfg_.etc_dir + "/"),
         mode(mode_),
         uneven_depth(uneven_depth_),
         avoid_rc_connections(avoid_rc_connections_),
         use_scaffolder(use_scaffolder_),
-        traverse_loops(true),
-        detect_repeats_online(mode_ != config::pipeline_type::meta && mode_ != config::pipeline_type::rna)
+        traverse_loops(true)
     {
         if (!(use_scaffolder && pset.scaffolder_options.enabled)) {
             traverse_loops = false;
@@ -66,13 +67,18 @@ struct PathExtendParamsContainer {
         if (HasLongReads(dataset_info))
             max_polisher_gap = max(max_polisher_gap, size_t(10000));
 
-        min_edge_len = 100;
-        max_path_diff = mode == config::pipeline_type::rna ? 1 : FindMaxISRightQuantile(dataset_info);
+        min_edge_len = 0;
+
+        max_path_diff = FindMaxISRightQuantile(dataset_info);
+        if (mode == config::pipeline_type::rna || mode == config::pipeline_type::meta)
+            max_path_diff = 0;
     }
 
     const pe_config::MainPEParamsT& pe_cfg;
     const pe_config::ParamSetT& pset;
 
+    const config::debruijn_config::strand_specificity& ss;
+
     std::string output_dir;
     std::string etc_dir;
 
@@ -82,8 +88,8 @@ struct PathExtendParamsContainer {
     bool avoid_rc_connections;
     bool use_scaffolder;
     bool traverse_loops;
-    bool detect_repeats_online;
 
+    //todo move to config
     size_t min_edge_len;
     size_t max_path_diff;
     size_t max_polisher_gap;
@@ -108,6 +114,8 @@ public:
 
     bool HasOnlyMPLibs() const;
 
+    bool HasOnlySingleReads() const;
+
     bool IsForSingleReadExtender(const io::SequencingLibrary<config::DataSetData> &lib) const;
 
     bool IsForSingleReadScaffolder(const io::SequencingLibrary<config::DataSetData> &lib) const;
diff --git a/src/common/modules/path_extend/pipeline/launcher.cpp b/src/common/modules/path_extend/pipeline/launcher.cpp
index 98540b6..d43ea02 100644
--- a/src/common/modules/path_extend/pipeline/launcher.cpp
+++ b/src/common/modules/path_extend/pipeline/launcher.cpp
@@ -6,6 +6,7 @@
 
 #include "modules/path_extend/path_visualizer.hpp"
 #include "modules/path_extend/loop_traverser.hpp"
+#include "modules/path_extend/path_extender.hpp"
 #include "modules/alignment/long_read_storage.hpp"
 #include "modules/path_extend/scaffolder2015/extension_chooser2015.hpp"
 #include "modules/path_extend/scaffolder2015/scaffold_graph_visualizer.hpp"
@@ -20,7 +21,6 @@ namespace path_extend {
 using namespace debruijn_graph;
 using namespace std;
 
-
 vector<shared_ptr<ConnectionCondition>>
     PathExtendLauncher::ConstructPairedConnectionConditions(const ScaffoldingUniqueEdgeStorage& edge_storage) const {
 
@@ -39,7 +39,7 @@ vector<shared_ptr<ConnectionCondition>>
                 INFO("Unusable for scaffold graph paired lib #" << lib_index);
                 continue;
             }
-            conditions.push_back(make_shared<ScaffoldGraphPairedConnectionCondition>(gp_.g, edge_storage.GetSet(),
+            conditions.push_back(make_shared<ScaffoldGraphPairedConnectionCondition>(gp_.g, edge_storage.unique_edges(),
                                                                                      paired_lib, lib_index,
                                                                                      params.always_add,
                                                                                      params.never_add,
@@ -55,7 +55,7 @@ shared_ptr<scaffold_graph::ScaffoldGraph> PathExtendLauncher::ConstructScaffoldG
     const pe_config::ParamSetT::ScaffoldGraphParamsT &params = params_.pset.scaffold_graph_params;
 
     INFO("Constructing connections");
-    LengthLowerBound edge_condition(gp_.g, edge_storage.GetMinLength());
+    LengthLowerBound edge_condition(gp_.g, edge_storage.min_length());
 
     vector<shared_ptr<ConnectionCondition>> conditions =
         ConstructPairedConnectionConditions(edge_storage);
@@ -68,9 +68,9 @@ shared_ptr<scaffold_graph::ScaffoldGraph> PathExtendLauncher::ConstructScaffoldG
 
     INFO("Total conditions " << conditions.size());
 
-    INFO("Constructing scaffold graph from set of size " << edge_storage.GetSet().size());
+    INFO("Constructing scaffold graph from set of size " << edge_storage.unique_edges().size());
 
-    DefaultScaffoldGraphConstructor constructor(gp_.g, edge_storage.GetSet(), conditions, edge_condition);
+    DefaultScaffoldGraphConstructor constructor(gp_.g, edge_storage.unique_edges(), conditions, edge_condition);
     auto scaffold_graph = constructor.Construct();
 
     INFO("Scaffold graph contains " << scaffold_graph->VertexCount() << " vertices and " << scaffold_graph->EdgeCount()
@@ -84,27 +84,12 @@ void PathExtendLauncher::PrintScaffoldGraph(const scaffold_graph::ScaffoldGraph
                                             const string &filename) const {
     using namespace scaffold_graph;
 
-    INFO("Constructing reference labels");
-    map<debruijn_graph::EdgeId, string> edge_labels;
-    size_t count = 0;
-    for (const auto &edge_coord_pair: genome_checker.ConstructEdgeOrder()) {
-        if (edge_labels.find(edge_coord_pair.first) == edge_labels.end()) {
-            edge_labels[edge_coord_pair.first] = "";
-        }
-        edge_labels[edge_coord_pair.first] += "order: " + ToString(count) +
-            "\n mapped range: " + ToString(edge_coord_pair.second.mapped_range.start_pos) + " : "
-            + ToString(edge_coord_pair.second.mapped_range.end_pos) +
-            "\n init range: " + ToString(edge_coord_pair.second.initial_range.start_pos) + " : "
-            + ToString(edge_coord_pair.second.initial_range.end_pos) + "\n";
-        ++count;
-    }
-
     auto vertex_colorer = make_shared<ScaffoldVertexSetColorer>(main_edge_set);
     auto edge_colorer = make_shared<ScaffoldEdgeColorer>();
     graph_colorer::CompositeGraphColorer<ScaffoldGraph> colorer(vertex_colorer, edge_colorer);
 
     INFO("Visualizing scaffold graph");
-    ScaffoldGraphVisualizer singleVisualizer(scaffold_graph, edge_labels);
+    ScaffoldGraphVisualizer singleVisualizer(scaffold_graph, genome_checker.EdgeLabels());
     std::ofstream single_dot;
     single_dot.open((filename + "_single.dot").c_str());
     singleVisualizer.Visualize(single_dot, colorer);
@@ -122,13 +107,17 @@ void PathExtendLauncher::MakeAndOutputScaffoldGraph() const {
     //Scaffold graph
     shared_ptr<scaffold_graph::ScaffoldGraph> scaffold_graph;
     if (params_.pset.scaffold_graph_params.construct) {
-        debruijn_graph::GenomeConsistenceChecker genome_checker(gp_, unique_data_.main_unique_storage_,
+        debruijn_graph::GenomeConsistenceChecker genome_checker(gp_,
                                                                 params_.pset.genome_consistency_checker.max_gap,
-                                                                params_.pset.genome_consistency_checker.relative_max_gap);
+                                                                params_.pset.genome_consistency_checker.relative_max_gap,
+                                                                unique_data_.main_unique_storage_.min_length(),
+                                                                unique_data_.main_unique_storage_,
+                                                                unique_data_.long_reads_cov_map_,
+                                                                dataset_info_.reads);
         scaffold_graph = ConstructScaffoldGraph(unique_data_.main_unique_storage_);
         if (params_.pset.scaffold_graph_params.output) {
             PrintScaffoldGraph(*scaffold_graph,
-                               unique_data_.main_unique_storage_.GetSet(),
+                               unique_data_.main_unique_storage_.unique_edges(),
                                genome_checker,
                                params_.etc_dir + "scaffold_graph");
         }
@@ -138,26 +127,38 @@ void PathExtendLauncher::MakeAndOutputScaffoldGraph() const {
 void PathExtendLauncher::CountMisassembliesWithReference(const PathContainer &paths) const {
     if (gp_.genome.size() == 0)
         return;
-
-    debruijn_graph::GenomeConsistenceChecker genome_checker(gp_, unique_data_.main_unique_storage_,
+    bool use_main_storage = params_.pset.genome_consistency_checker.use_main_storage;
+    size_t unresolvable_gap = unique_data_.main_unique_storage_.min_length();
+    ScaffoldingUniqueEdgeStorage tmp_storage;
+    if (!use_main_storage) {
+        unresolvable_gap = params_.pset.genome_consistency_checker.unresolvable_jump;
+        ScaffoldingUniqueEdgeAnalyzer tmp_analyzer(gp_, params_.pset.genome_consistency_checker.unique_length, unique_data_.unique_variation_);
+        tmp_analyzer.FillUniqueEdgeStorage(tmp_storage);
+    }
+    debruijn_graph::GenomeConsistenceChecker genome_checker(gp_,
                                                             params_.pset.genome_consistency_checker.max_gap,
-                                                            params_.pset.genome_consistency_checker.relative_max_gap);
+                                                            params_.pset.genome_consistency_checker.relative_max_gap,
+                                                            unresolvable_gap,
+                                                            use_main_storage ? unique_data_.main_unique_storage_ : tmp_storage,
+                                                            unique_data_.long_reads_cov_map_,
+                                                            dataset_info_.reads);
 
     size_t total_mis = 0, gap_mis = 0;
-    genome_checker.SpellGenome();
     for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
         BidirectionalPath *path = iter.get();
         auto map_res = genome_checker.CountMisassemblies(*path);
         if (map_res.misassemblies > 0) {
             INFO ("there are " << map_res.misassemblies << " misassemblies in path: ");
-            path->PrintInfo();
+            path->PrintINFO();
             total_mis += map_res.misassemblies;
         }
         if (map_res.wrong_gap_size > 0) {
-            INFO ("there are " << map_res.wrong_gap_size << " wrong gaps in path: ");
-            path->PrintInfo();
+            INFO ("there are " << map_res.wrong_gap_size << " wrong gaps in path. ");
+            path->PrintDEBUG();
             gap_mis += map_res.wrong_gap_size;
         }
+        genome_checker.CheckPathEnd(*path);
+        genome_checker.CheckPathEnd(path->Conjugate());
     }
     INFO ("In total found " << total_mis << " misassemblies " << " and " << gap_mis << " gaps.");
 }
@@ -199,49 +200,87 @@ void PathExtendLauncher::DebugOutputPaths(const PathContainer &paths, const stri
     if (!params_.pe_cfg.debug_output) {
         return;
     }
-    PathInfoWriter path_writer;
     PathVisualizer visualizer;
 
-    writer_.OutputPaths(paths, params_.etc_dir + name);
+    writer_.OutputPaths(paths, params_.etc_dir + name + ".fasta");
     if (params_.pe_cfg.output.write_paths) {
-        path_writer.WritePaths(paths, params_.etc_dir + name + ".dat");
+        std::ofstream oss(params_.etc_dir + name + ".dat");
+        for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
+            iter.get()->Print(oss);
+        }
+        oss.close();
     }
     if (params_.pe_cfg.viz.print_paths) {
         visualizer.writeGraphWithPathsSimple(gp_, params_.etc_dir + name + ".dot", name, paths);
     }
 }
 
-void PathExtendLauncher::FinalizePaths(PathContainer &paths,
-                                       GraphCoverageMap &cover_map,
-                                       const PathExtendResolver &resolver) const {
+void FilterInterstandBulges(PathContainer &paths) {
+    DEBUG ("Try to delete paths with interstand bulges");
+    for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
+        if (EndsWithInterstrandBulge(*iter.get())) {
+            iter.get()->PopBack();
+        }
+        if (EndsWithInterstrandBulge(*iter.getConjugate())) {
+            iter.getConjugate()->PopBack();
+        }
+    }
+    DEBUG("deleted paths with interstand bulges");
+}
+
+void PathExtendLauncher::RemoveOverlapsAndArtifacts(PathContainer &paths,
+                                                    GraphCoverageMap &cover_map,
+                                                    const PathExtendResolver &resolver) const {
+    INFO("Finalizing paths");
+
+    INFO("Deduplicating paths");
+    Deduplicate(gp_.g, paths, cover_map, params_.min_edge_len,
+                         params_.max_path_diff);
 
-    if (params_.pset.remove_overlaps) {
+    INFO("Paths deduplicated");
+
+    if (params_.pset.overlap_removal.enabled) {
         resolver.RemoveOverlaps(paths, cover_map, params_.min_edge_len, params_.max_path_diff,
-                                 params_.pset.cut_all_overlaps,
-                                 (params_.mode == config::pipeline_type::moleculo));
-    } else if (params_.mode == config::pipeline_type::rna) {
-        resolver.RemoveRNAOverlaps(paths, cover_map, params_.min_edge_len, params_.max_path_diff);
+                                params_.pset.overlap_removal.end_start_only,
+                                params_.pset.overlap_removal.cut_all);
     } else {
-        resolver.RemoveEqualPaths(paths, cover_map, params_.min_edge_len);
+        INFO("Overlaps will not be removed");
     }
 
+    //TODO do we still need it?
     if (params_.avoid_rc_connections) {
-        paths.FilterInterstandBulges();
+        FilterInterstandBulges(paths);
     }
-    paths.FilterEmptyPaths();
     resolver.AddUncoveredEdges(paths, cover_map);
 
-    if (params_.pset.path_filtration.enabled) {
-        LengthPathFilter(gp_.g, params_.pset.path_filtration.min_length).filter(paths);;
-        IsolatedPathFilter(gp_.g,
-                           params_.pset.path_filtration.min_length_for_low_covered,
-                           params_.pset.path_filtration.min_coverage).filter(paths);
-        IsolatedPathFilter(gp_.g, params_.pset.path_filtration.isolated_min_length).filter(paths);
-    }
     paths.SortByLength();
-    for (auto &path : paths) {
-        path.first->ResetOverlaps();
+    INFO("Paths finalized");
+}
+
+
+void PathExtendLauncher::CleanPaths(PathContainer &paths, const pe_config::ParamSetT::PathFiltrationT &path_filtration) const {
+    if (path_filtration.enabled) {
+        paths.FilterPaths(LengthPathCondition(GetLengthCutoff(path_filtration.min_length, path_filtration.rel_cutoff)));
+        paths.FilterPaths(func::And(CoveragePathCondition(gp_.g, path_filtration.min_coverage),
+                                    LengthPathCondition(GetLengthCutoff(path_filtration.min_length_for_low_covered, path_filtration.rel_low_covered_cutoff))));
+        paths.FilterPaths(func::And(IsolatedPathCondition(gp_.g),
+                                    func::And(LengthPathCondition(GetLengthCutoff(path_filtration.isolated_min_length, path_filtration.rel_isolated_cutoff)),
+                                              CoveragePathCondition(gp_.g, path_filtration.isolated_min_cov))));
     }
+
+    paths.SortByLength();
+}
+
+
+size_t PathExtendLauncher::GetLengthCutoff(size_t abs_cutoff, double rel_cutoff) const {
+    int rel_len = int(rel_cutoff * double(cfg::get().ds.RL())) - int(cfg::get().K);
+    int abs_len = int(abs_cutoff) - int(cfg::get().K);
+    size_t result = (size_t) max(0, max(rel_len, abs_len));
+
+    INFO("Read length relative cutoff " << rel_cutoff << " converted to " << rel_len);
+    INFO("Read length absolute cutoff " << abs_cutoff << " bp converted to " << result);
+    INFO("Length cutoff: " << result);
+    return result;
 }
 
 void PathExtendLauncher::TraverseLoops(PathContainer &paths, GraphCoverageMap &cover_map) const {
@@ -258,34 +297,30 @@ void PathExtendLauncher::TraverseLoops(PathContainer &paths, GraphCoverageMap &c
     INFO("Traversed " << res << " loops");
 }
 
-Extenders PathExtendLauncher::ConstructMPExtender(const ExtendersGenerator &generator, size_t uniqe_edge_len) {
-    ScaffoldingUniqueEdgeAnalyzer additional_edge_analyzer(gp_, (size_t) uniqe_edge_len, unique_data_.unique_variation_);
-    unique_data_.unique_storages_.push_back(make_shared<ScaffoldingUniqueEdgeStorage>());
-    additional_edge_analyzer.FillUniqueEdgeStorage(*unique_data_.unique_storages_.back());
-
-    return generator.MakeMPExtenders(*unique_data_.unique_storages_.back());
+void PathExtendLauncher::AddScaffUniqueStorage(size_t uniqe_edge_len) {
+    ScaffoldingUniqueEdgeAnalyzer additional_edge_analyzer(gp_, (size_t) uniqe_edge_len,
+                                                           unique_data_.unique_variation_);
+    unique_data_.unique_storages_.push_back(ScaffoldingUniqueEdgeStorage());
+    additional_edge_analyzer.FillUniqueEdgeStorage(unique_data_.unique_storages_.back());
 }
 
 Extenders PathExtendLauncher::ConstructMPExtenders(const ExtendersGenerator &generator) {
     const pe_config::ParamSetT &pset = params_.pset;
 
-    Extenders extenders =  generator.MakeMPExtenders(unique_data_.main_unique_storage_);
-    INFO("Using " << extenders.size() << " mate-pair " << support_.LibStr(extenders.size()));
-
     size_t cur_length = unique_data_.min_unique_length_ - pset.scaffolding2015.unique_length_step;
     size_t lower_bound = max(pset.scaffolding2015.unique_length_lower_bound, pset.scaffolding2015.unique_length_step);
 
     while (cur_length > lower_bound) {
-        INFO("Adding extender with length " << cur_length);
-        push_back_all(extenders, ConstructMPExtender(generator, cur_length));
+        INFO("Will add extenders for length " << cur_length);
+        AddScaffUniqueStorage(cur_length);
         cur_length -= pset.scaffolding2015.unique_length_step;
     }
     if (unique_data_.min_unique_length_ > lower_bound) {
-        INFO("Adding final extender with length " << lower_bound);
-        push_back_all(extenders, ConstructMPExtender(generator, lower_bound));
+        INFO("Will add final extenders for length " << lower_bound);
+        AddScaffUniqueStorage(lower_bound);
     }
 
-    return extenders;
+    return generator.MakeMPExtenders();
 }
 
 void PathExtendLauncher::FillPathContainer(size_t lib_index, size_t size_threshold) {
@@ -300,17 +335,17 @@ void PathExtendLauncher::FillPathContainer(size_t lib_index, size_t size_thresho
         BidirectionalPath *conj_path = new BidirectionalPath(new_path->Conjugate());
         new_path->SetWeight((float) path.getWeight());
         conj_path->SetWeight((float) path.getWeight());
-        unique_data_.long_reads_paths_[lib_index]->AddPair(new_path, conj_path);
+        unique_data_.long_reads_paths_[lib_index].AddPair(new_path, conj_path);
     }
-    DEBUG("Long reads paths " << unique_data_.long_reads_paths_[lib_index]->size());
-    unique_data_.long_reads_cov_map_[lib_index]->AddPaths(*unique_data_.long_reads_paths_[lib_index]);
+    DEBUG("Long reads paths " << unique_data_.long_reads_paths_[lib_index].size());
+    unique_data_.long_reads_cov_map_[lib_index].AddPaths(unique_data_.long_reads_paths_[lib_index]);
 }
 
 
 void PathExtendLauncher::FillLongReadsCoverageMaps() {
     for (size_t lib_index = 0; lib_index < dataset_info_.reads.lib_count(); lib_index++) {
-        unique_data_.long_reads_paths_.push_back(make_shared<PathContainer>());
-        unique_data_.long_reads_cov_map_.push_back(make_shared<GraphCoverageMap>(gp_.g));
+        unique_data_.long_reads_paths_.push_back(PathContainer());
+        unique_data_.long_reads_cov_map_.push_back(GraphCoverageMap(gp_.g));
         if (support_.IsForSingleReadExtender(dataset_info_.reads[lib_index])) {
             FillPathContainer(lib_index);
         }
@@ -319,6 +354,7 @@ void PathExtendLauncher::FillLongReadsCoverageMaps() {
 
 void  PathExtendLauncher::FillPBUniqueEdgeStorages() {
     //FIXME magic constants
+    //FIXME need to change for correct usage of prelimnary contigs in loops
     ScaffoldingUniqueEdgeAnalyzer unique_edge_analyzer_pb(gp_, 500, 0.5);
 
     INFO("Filling backbone edges for long reads scaffolding...");
@@ -348,26 +384,26 @@ void  PathExtendLauncher::FillPBUniqueEdgeStorages() {
 
 Extenders PathExtendLauncher::ConstructPBExtenders(const ExtendersGenerator &generator) {
     FillPBUniqueEdgeStorages();
-    return generator.MakePBScaffoldingExtenders(unique_data_.unique_pb_storage_,
-                                                unique_data_.long_reads_cov_map_);
+    return generator.MakePBScaffoldingExtenders();
 }
 
 
-Extenders PathExtendLauncher::ConstructExtenders(const GraphCoverageMap& cover_map) {
+Extenders PathExtendLauncher::ConstructExtenders(const GraphCoverageMap &cover_map,
+                                                 UsedUniqueStorage &used_unique_storage) {
     INFO("Creating main extenders, unique edge length = " << unique_data_.min_unique_length_);
     if (support_.SingleReadsMapped() || support_.HasLongReads())
         FillLongReadsCoverageMaps();
 
-    ExtendersGenerator generator(dataset_info_, params_, gp_, cover_map, support_);
-    Extenders extenders = generator.MakeBasicExtenders(unique_data_.main_unique_storage_,
-                                                       unique_data_.long_reads_cov_map_);
+    ExtendersGenerator generator(dataset_info_, params_, gp_, cover_map,
+                                 unique_data_, used_unique_storage, support_);
+    Extenders extenders = generator.MakeBasicExtenders();
 
     //long reads scaffolding extenders.
     if (support_.HasLongReads()) {
         if (params_.pset.sm == sm_old) {
             INFO("Will not use new long read scaffolding algorithm in this mode");
         } else {
-            push_back_all(extenders, ConstructPBExtenders(generator));
+            utils::push_back_all(extenders, ConstructPBExtenders(generator));
         }
     }
 
@@ -375,26 +411,69 @@ Extenders PathExtendLauncher::ConstructExtenders(const GraphCoverageMap& cover_m
         if (params_.pset.sm == sm_old) {
             INFO("Will not use mate-pairs is this mode");
         } else {
-            push_back_all(extenders, ConstructMPExtenders(generator));
+            utils::push_back_all(extenders, ConstructMPExtenders(generator));
         }
     }
 
     if (params_.pset.use_coordinated_coverage)
-        push_back_all(extenders, generator.MakeCoverageExtenders());
+        utils::push_back_all(extenders, generator.MakeCoverageExtenders());
 
     INFO("Total number of extenders is " << extenders.size());
     return extenders;
 }
 
-void PathExtendLauncher::PolishPaths(const PathContainer &paths, PathContainer &result) const {
+void PathExtendLauncher::PolishPaths(const PathContainer &paths, PathContainer &result,
+                                     const GraphCoverageMap& /* cover_map */) const {
     //Fixes distances for paths gaps and tries to fill them in
     INFO("Closing gaps in paths");
-    PathPolisher polisher(gp_, dataset_info_, unique_data_.main_unique_storage_, params_.max_polisher_gap);
-    polisher.PolishPaths(paths, result);
+
+    vector<shared_ptr<PathGapCloser>> gap_closers;
+
+    gap_closers.push_back(make_shared<DijkstraGapCloser>(gp_.g, params_.max_polisher_gap));
+    for (size_t i = 0; i < dataset_info_.reads.lib_count(); i++) {
+        auto lib = dataset_info_.reads[i];
+        if (lib.type() == io::LibraryType::HQMatePairs || lib.type() == io::LibraryType::MatePairs) {
+            shared_ptr<PairedInfoLibrary> paired_lib = MakeNewLib(gp_.g, lib, gp_.paired_indices[i]);
+            gap_closers.push_back(make_shared<MatePairGapCloser> (gp_.g, params_.max_polisher_gap, paired_lib,
+                                                                   unique_data_.main_unique_storage_));
+        }
+    }
+
+////TODO:: is it really empty?
+//    UniqueData unique_data;
+//    UsedUniqueStorage used_unique_storage(unique_data.main_unique_storage_);
+//    ExtendersGenerator generator(dataset_info_, params_, gp_, cover_map,
+//                                 unique_data, used_unique_storage, support_);
+//    auto polisher_storage = ScaffoldingUniqueEdgeStorage();
+//    for  (const auto& extender: generator.MakePEExtenders()) {
+//        gap_closers.push_back(make_shared<PathExtenderGapCloser>(gp_.g, params_.max_polisher_gap, extender));
+//    }
+//FIXME: uncomment cover_map 
+
+    PathPolisher polisher(gp_, gap_closers);
+    result = polisher.PolishPaths(paths);
     result.SortByLength();
     INFO("Gap closing completed")
 }
 
+void PathExtendLauncher::FilterPaths() {
+    PathContainer contig_paths_copy(gp_.contig_paths.begin(), gp_.contig_paths.end());
+    for (const auto& it: params_.pset.path_filtration) {
+        if (it.first == "default" && it.second.enabled) {
+            INFO("Finalizing main paths");
+            CleanPaths(gp_.contig_paths, it.second);
+            DebugOutputPaths(gp_.contig_paths, "final_paths");
+        }
+        else if (it.second.enabled) {
+            INFO("Finalizing paths - " + it.first);
+            PathContainer to_clean(contig_paths_copy.begin(), contig_paths_copy.end());
+            CleanPaths(to_clean, it.second);
+            DebugOutputPaths(to_clean, it.first + "_final_paths");
+            writer_.OutputPaths(to_clean, params_.output_dir + it.first + "_filtered_final_paths" + ".fasta");
+        }
+    }
+}
+
 void PathExtendLauncher::Launch() {
     INFO("ExSPAnder repeat resolving tool started");
     make_dir(params_.output_dir);
@@ -415,30 +494,37 @@ void PathExtendLauncher::Launch() {
     DebugOutputPaths(seeds, "init_paths");
 
     GraphCoverageMap cover_map(gp_.g);
-    Extenders extenders = ConstructExtenders(cover_map);
-    shared_ptr<CompositeExtender> composite_extender = make_shared<CompositeExtender>(gp_.g, cover_map, extenders,
-                                                                                      unique_data_.main_unique_storage_,
-                                                                                      params_.max_path_diff,
-                                                                                      params_.pset.extension_options.max_repeat_length,
-                                                                                      params_.detect_repeats_online);
-
-    auto paths = resolver.ExtendSeeds(seeds, *composite_extender);
-    paths.FilterEmptyPaths();
-    paths.SortByLength();
+    UsedUniqueStorage used_unique_storage(unique_data_.main_unique_storage_);
+    Extenders extenders = ConstructExtenders(cover_map, used_unique_storage);
+    CompositeExtender composite_extender(gp_.g, cover_map,
+                                         used_unique_storage,
+                                         extenders,
+                                         params_.max_path_diff);
+
+    auto paths = resolver.ExtendSeeds(seeds, composite_extender);
     DebugOutputPaths(paths, "raw_paths");
 
-    FinalizePaths(paths, cover_map, resolver);
+    RemoveOverlapsAndArtifacts(paths, cover_map, resolver);
     DebugOutputPaths(paths, "before_loop_traversal");
-
+    //TODO think about ordering of path polisher vs loop traversal
     TraverseLoops(paths, cover_map);
     DebugOutputPaths(paths, "loop_traveresed");
 
-    PolishPaths(paths, gp_.contig_paths);
+    //TODO does path polishing correctly work with coverage map
+    PolishPaths(paths, gp_.contig_paths, cover_map);
     DebugOutputPaths(gp_.contig_paths, "polished_paths");
-    
+
+    //TODO use move assignment to original map here
     GraphCoverageMap polished_map(gp_.g, gp_.contig_paths, true);
-    FinalizePaths(gp_.contig_paths, polished_map, resolver);
-    DebugOutputPaths(gp_.contig_paths, "final_paths");
+    RemoveOverlapsAndArtifacts(gp_.contig_paths, polished_map, resolver);
+    DebugOutputPaths(gp_.contig_paths, "overlap_removed");
+
+    if (params_.ss.ss_enabled) {
+        PathContainerCoverageSwitcher switcher(gp_.g, gp_.ss_coverage.front(), params_.ss.antisense);
+        switcher.Apply(gp_.contig_paths);
+    }
+
+    FilterPaths();
 
     CountMisassembliesWithReference(gp_.contig_paths);
 
diff --git a/src/common/modules/path_extend/pipeline/launcher.hpp b/src/common/modules/path_extend/pipeline/launcher.hpp
index e936f58..1b46b30 100644
--- a/src/common/modules/path_extend/pipeline/launcher.hpp
+++ b/src/common/modules/path_extend/pipeline/launcher.hpp
@@ -13,34 +13,22 @@
 #include "modules/path_extend/scaffolder2015/scaffold_graph.hpp"
 #include "assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp"
 
+#include "modules/alignment/rna/ss_coverage.hpp"
+
 namespace path_extend {
 
 using namespace debruijn_graph;
 
 class PathExtendLauncher {
-
-private:
     const config::dataset& dataset_info_;
     const PathExtendParamsContainer& params_;
     conj_graph_pack& gp_;
     PELaunchSupport support_;
 
-    DefaultContigCorrector<ConjugateDeBruijnGraph> corrector_;
-    DefaultContigConstructor<ConjugateDeBruijnGraph> constructor_;
     shared_ptr<ContigNameGenerator> contig_name_generator_;
     ContigWriter writer_;
 
-    struct {
-        size_t min_unique_length_;
-        double unique_variation_;
-
-        ScaffoldingUniqueEdgeStorage main_unique_storage_;
-        vector<shared_ptr<ScaffoldingUniqueEdgeStorage>> unique_storages_;
-
-        ScaffoldingUniqueEdgeStorage unique_pb_storage_;
-        vector<shared_ptr<PathContainer>> long_reads_paths_;
-        vector<shared_ptr<GraphCoverageMap>> long_reads_cov_map_;
-    } unique_data_;
+    UniqueData unique_data_;
 
     vector<shared_ptr<ConnectionCondition>>
         ConstructPairedConnectionConditions(const ScaffoldingUniqueEdgeStorage& edge_storage) const;
@@ -69,20 +57,25 @@ private:
 
     void DebugOutputPaths(const PathContainer& paths, const string& name) const;
 
-    void FinalizePaths(PathContainer& paths, GraphCoverageMap &cover_map, const PathExtendResolver&resolver) const;
+    void RemoveOverlapsAndArtifacts(PathContainer &paths, GraphCoverageMap &cover_map, const PathExtendResolver &resolver) const;
 
-    void TraverseLoops(PathContainer& paths, GraphCoverageMap& cover_map) const;
+    void CleanPaths(PathContainer &paths, const pe_config::ParamSetT::PathFiltrationT &path_filtration) const;
 
-    void PolishPaths(const PathContainer &paths, PathContainer &result) const;
+    size_t GetLengthCutoff(size_t abs_cutoff, double rel_cutoff) const;
 
-    Extenders ConstructExtenders(const GraphCoverageMap& cover_map);
+    void TraverseLoops(PathContainer &paths, GraphCoverageMap &cover_map) const;
+
+    void PolishPaths(const PathContainer &paths, PathContainer &result, const GraphCoverageMap &cover_map) const;
+
+    Extenders ConstructExtenders(const GraphCoverageMap &cover_map, UsedUniqueStorage &used_unique_storage);
 
     Extenders ConstructMPExtenders(const ExtendersGenerator &generator);
 
-    Extenders ConstructMPExtender(const ExtendersGenerator &generator, size_t uniqe_edge_len);
+    void AddScaffUniqueStorage(size_t uniqe_edge_len);
 
     Extenders ConstructPBExtenders(const ExtendersGenerator &generator);
 
+    void FilterPaths();
 
 public:
 
@@ -93,19 +86,14 @@ public:
         params_(params),
         gp_(gp),
         support_(dataset_info, params),
-        corrector_(gp.g),
-        constructor_(gp.g, corrector_),
         contig_name_generator_(MakeContigNameGenerator(params_.mode, gp)),
-        writer_(gp.g, constructor_, gp_.components, contig_name_generator_),
+        writer_(gp.g, contig_name_generator_),
         unique_data_()
     {
         unique_data_.min_unique_length_ = params.pset.scaffolding2015.unique_length_upper_bound;
         unique_data_.unique_variation_ = params.pset.uniqueness_analyser.unique_coverage_variation;
     }
 
-    ~PathExtendLauncher() {
-    }
-
     void Launch();
 
 };
diff --git a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp
index 9149f3c..f5eea7b 100644
--- a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp
+++ b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp
@@ -66,7 +66,7 @@ bool LongReadsLibConnectionCondition::CheckPath(BidirectionalPath *path, EdgeId
     if (pos2.size() != 1) {
         if (pos2.size() >= 2) {
             DEBUG("Something went wrong:: Edge " << graph_.int_id(e2) << "is called unique but presents in path twice! first edge " << graph_.int_id(e1) << " path ");
-            path->Print();
+            path->PrintDEBUG();
         }
         return false;
     }
@@ -216,7 +216,7 @@ int PairedLibConnectionCondition::GetMedianGap(debruijn_graph::EdgeId e1, debrui
 
 AssemblyGraphConnectionCondition::AssemblyGraphConnectionCondition(const debruijn_graph::Graph &g,
                     size_t max_connection_length, const ScaffoldingUniqueEdgeStorage & unique_edges) :
-        g_(g), max_connection_length_(max_connection_length), interesting_edge_set_(unique_edges.GetSet()), stored_distances_() {
+        g_(g), max_connection_length_(max_connection_length), interesting_edge_set_(unique_edges.unique_edges()), stored_distances_() {
 }
 
 map <debruijn_graph::EdgeId, double> AssemblyGraphConnectionCondition::ConnectedWith(debruijn_graph::EdgeId e) const {
diff --git a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp
index be1f51c..160f3f8 100644
--- a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp
+++ b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp
@@ -5,7 +5,7 @@
 #include "assembly_graph/graph_support/scaff_supplementary.hpp"
 #include "modules/alignment/long_read_storage.hpp"
 #include "modules/path_extend/pe_utils.hpp"
-#include "common/assembly_graph/graph_support/basic_edge_conditions.hpp"
+#include "assembly_graph/graph_support/basic_edge_conditions.hpp"
 #include <map>
 #include <set>
 
diff --git a/src/common/modules/path_extend/scaffolder2015/extension_chooser2015.hpp b/src/common/modules/path_extend/scaffolder2015/extension_chooser2015.hpp
index 18b5721..c6b91f7 100644
--- a/src/common/modules/path_extend/scaffolder2015/extension_chooser2015.hpp
+++ b/src/common/modules/path_extend/scaffolder2015/extension_chooser2015.hpp
@@ -43,7 +43,7 @@ public:
             lib_connection_condition_(condition),
             unique_edges_(unique_edges),
             relative_weight_threshold_(relative_threshold),
-            graph_connection_condition_(g, 2 * unique_edges_.GetMinLength(), unique_edges),
+            graph_connection_condition_(g, 2 * unique_edges_.min_length(), unique_edges),
             //TODO to config!
             absolute_weight_threshold_(2),
             graph_connection_bonus_(2),
diff --git a/src/common/modules/path_extend/scaffolder2015/path_polisher.cpp b/src/common/modules/path_extend/scaffolder2015/path_polisher.cpp
index 77f636e..08da8ba 100644
--- a/src/common/modules/path_extend/scaffolder2015/path_polisher.cpp
+++ b/src/common/modules/path_extend/scaffolder2015/path_polisher.cpp
@@ -6,169 +6,192 @@ namespace path_extend {
 void PathPolisher::InfoAboutGaps(const PathContainer & result){
     for (const auto& p_iter: result) {
         for (size_t i = 1; i < p_iter.first->Size(); ++i) {
-            if (p_iter.first->GapAt(i) > 0) {
-                DEBUG("Gap "<< p_iter.first->GapAt(i) << " left between " << gp_.g.int_id(p_iter.first->At(i-1)) << " and " << gp_.g.int_id(p_iter.first->At(i)));
+            if (p_iter.first->GapAt(i).gap > 0) {
+                DEBUG("Gap "<< p_iter.first->GapAt(i).gap
+                            << " left between " << gp_.g.int_id(p_iter.first->At(i-1))
+                            << " and " << gp_.g.int_id(p_iter.first->At(i)));
             }
         }
     }
 }
 
-PathPolisher::PathPolisher(const conj_graph_pack& gp, const config::dataset& dataset_info, const ScaffoldingUniqueEdgeStorage& storage, size_t max_resolvable_len ): gp_(gp) {
-    gap_closers.push_back(make_shared<DijkstraGapCloser>(gp.g, max_resolvable_len));
-    for (size_t i = 0; i <  dataset_info.reads.lib_count(); i++) {
-        auto lib = dataset_info.reads[i];
-        if (lib.type() == io::LibraryType::HQMatePairs || lib.type() == io::LibraryType::MatePairs) {
-            shared_ptr<PairedInfoLibrary> paired_lib = MakeNewLib(gp.g, lib, gp.paired_indices[i]);
-            gap_closers.push_back(make_shared<MatePairGapCloser> (gp.g, max_resolvable_len, paired_lib, storage));
-        }
-    }
-}
-
-void PathPolisher::PolishPaths(const PathContainer &paths, PathContainer &result) {
-    result.clear();
-
-    for (auto iter = paths.begin(); iter != paths.end(); ++iter) {
-
-        BidirectionalPath *path = new BidirectionalPath(Polish(*iter.get()));
-        BidirectionalPath *conjugatePath = new BidirectionalPath(Polish(path->Conjugate()));
-        BidirectionalPath *re_path = new BidirectionalPath(conjugatePath->Conjugate());
-        result.AddPair(re_path, conjugatePath);
+PathContainer PathPolisher::PolishPaths(const PathContainer &paths) {
+    PathContainer result;
+    for (const auto& path_pair : paths) {
+        BidirectionalPath path = Polish(*path_pair.first);
+        BidirectionalPath *conjugate_path = new BidirectionalPath(Polish(path.Conjugate()));
+        BidirectionalPath *re_path = new BidirectionalPath(conjugate_path->Conjugate());
+        result.AddPair(re_path, conjugate_path);
     }
     InfoAboutGaps(result);
+    return result;
 }
 
-size_t DijkstraGapCloser::MinPathLength(const omnigraph::PathStorageCallback<Graph>& path_storage) const {
-    size_t shortest_len = omnigraph::CumulativeLength(g_, path_storage.paths().front());
-    for (size_t j = 1; j < path_storage.paths().size(); ++j) {
-        size_t cur_len = omnigraph::CumulativeLength(g_, path_storage.paths()[j]);
+size_t DijkstraGapCloser::MinPathLength(const PathsT& paths) const {
+    size_t shortest_len = omnigraph::CumulativeLength(g_, paths.front());
+    for (size_t j = 1; j < paths.size(); ++j) {
+        size_t cur_len = omnigraph::CumulativeLength(g_, paths[j]);
         shortest_len = min(shortest_len, cur_len);
     }
     return shortest_len;
 }
 
-BidirectionalPath PathPolisher::Polish(const BidirectionalPath &path) {
-    if (path.Empty())
-        return path;
-    shared_ptr<BidirectionalPath> current;
-    shared_ptr<BidirectionalPath> prev_step = std::make_shared<BidirectionalPath>(path);
+BidirectionalPath PathPolisher::Polish(const BidirectionalPath &init_path) {
+    if (init_path.Empty())
+        return init_path;
+
+    auto path = make_shared<BidirectionalPath>(init_path);
+    size_t prev_len = path->Size();
+
     bool changed = true;
     size_t count = 0;
     while (changed) {
         changed = false;
-        for (size_t i = 0; i < gap_closers.size(); i++) {
-            current = std::make_shared<BidirectionalPath>(gap_closers[i]->Polish(*prev_step));
-            if (current->Size() != prev_step->Size()){
+        for (const auto& gap_closer : gap_closers_) {
+            path = make_shared<BidirectionalPath>(gap_closer->CloseGaps(*path));
+            if (path->Size() != prev_len){
                 changed = true;
-                std::swap(current, prev_step);
+                prev_len = path->Size();
                 break;
             }
         }
         count++;
-        if (count > 5) {
+        if (count > MAX_POLISH_ATTEMPTS) {
             INFO("Unexpected cycle while polishing path, stopping polishing " );
-            path.Print();
+            path->PrintDEBUG();
             break;
         }
     }
-    return *prev_step;
+    return *path;
 }
 
-BidirectionalPath DijkstraGapCloser::Polish(const BidirectionalPath &path) {
+BidirectionalPath PathGapCloser::CloseGaps(const BidirectionalPath &path) const {
     BidirectionalPath result(g_);
     if (path.Empty())
         return result;
-    result.PushBack(path[0], path.GapInfoAt(0));
+
+    VERIFY(path.GapAt(0) == Gap());
+    result.PushBack(path[0]);
     for (size_t i = 1; i < path.Size(); ++i) {
         if (g_.EdgeEnd(path[i - 1]) == g_.EdgeStart(path[i])) {
-            result.PushBack(path[i], path.GapInfoAt(i));
+            result.PushBack(path[i], path.GapAt(i));
         } else {
-            //Connect edges using Dijkstra
-            omnigraph::PathStorageCallback<Graph> path_storage(g_);
-            omnigraph::ProcessPaths(g_, 0,
-                                    max_path_len_,
-                                    g_.EdgeEnd(path[i - 1]),
-                                    g_.EdgeStart(path[i]),
-                                    path_storage);
-
-            if (path_storage.size() == 0) {
-                //No paths found, keeping the gap
-                result.PushBack(path[i], path.GapInfoAt(i));
-            } else if (path_storage.size() > 1) {
-                //More than one path, using shortest path for gap length estimation
-                //We cannot use both common paths and bridges in one attempt;
-                if (!FillWithMultiplePaths(path, i, path_storage, result))
-                    FillWithBridge(path, i, path_storage, result);
-            } else {
-                //Closing the gap with the unique shortest path
-                for (size_t j = 0; j < path_storage.paths().front().size(); ++j) {
-                    result.PushBack(path_storage.paths().front()[j]);
-                }
-                result.PushBack(path[i]);
-            }
+            DEBUG("Gap between " << path[i - 1].int_id() << " and " << path[i].int_id() << " " << path.GapAt(i));
+            auto new_gap = CloseGap(path, i, result);
+            DEBUG("gap after " << new_gap);
+            result.PushBack(path[i], new_gap);
         }
     }
     return result;
 }
 
+Gap DijkstraGapCloser::CloseGap(EdgeId target_edge, const Gap &orig_gap, BidirectionalPath &result) const {
+    VertexId target_vertex = g_.EdgeStart(target_edge);
+//TODO:: actually we do not need paths, only edges..
+    omnigraph::PathStorageCallback<Graph> path_storage(g_);
+    int process_res = omnigraph::ProcessPaths(g_, 0,
+                            max_path_len_,
+                            g_.EdgeEnd(result.Back()),
+                            target_vertex,
+                            path_storage);
+    if (path_storage.size() == 0 || process_res != 0) {
+//No paths found or path_processor error(in particular too many vertices in Dijkstra), keeping the gap
+        DEBUG("PathProcessor nonzero exit code, gap left unchanged");
+        return orig_gap;
+    } else if (path_storage.size() > 1) {
+//More than one result, using shortest result for gap length estimation
+//We cannot use both common paths and bridges in one attempt;
+        Gap gap = FillWithMultiplePaths(path_storage.paths(), result);
+        if (gap == Gap::INVALID())
+            gap = FillWithBridge(orig_gap, path_storage.paths(), result);
+        return gap;
+    } else {
+//Closing the gap with the unique shortest result
+        DEBUG("Unique path gap closing:");
+        for (EdgeId e : path_storage.paths().front()) {
+            DEBUG(e.int_id());
+            result.PushBack(e);
+        }
+        return Gap(0);
+    }
+}
+
+Gap PathExtenderGapCloser::CloseGap(EdgeId target_edge, const Gap &orig_gap, BidirectionalPath &result) const {
+    size_t added = 0;
+    VertexId target_vertex = g_.EdgeStart(target_edge);
+    while (g_.EdgeEnd(result.Back()) != target_vertex) {
+        bool has_grown = extender_->MakeGrowStep(result);
+        if (!has_grown)
+            break;
+        added += g_.length(result.Back());
+    }
+    //FIXME think of checking for 0 in advance
+    return Gap(orig_gap.gap - (int) added, 0, orig_gap.trash_current);
+}
 
-bool DijkstraGapCloser::FillWithBridge(const BidirectionalPath& path, size_t index,
-                                                          const omnigraph::PathStorageCallback<Graph>& path_storage,
-                                                          BidirectionalPath& result) const {
-//TODO:: constant;
-    auto counts = CountEdgesQuantity(path_storage, 300);
-    size_t path_quantity = path_storage.paths().size();
+Gap DijkstraGapCloser::FillWithBridge(const Gap &orig_gap,
+                                      const PathsT& paths,
+                                      BidirectionalPath& result) const {
+    //TODO:: constant;
+    auto counts = CountEdgesQuantity(paths, 300);
+    DEBUG("filing gap with bridges");
+    size_t path_quantity = paths.size();
     vector<EdgeId> bridges;
     for (const auto& pair: counts)
         if (pair.second == path_quantity)
             bridges.push_back(pair.first);
-    if (bridges.size() > 0) {
+
+    if (bridges.empty()) {
+        return orig_gap;
+    } else {
         std::sort(bridges.begin(), bridges.end(), [&] (EdgeId e1, EdgeId e2) {
-            return g_.length(e1) > g_.length(e2); });
+            return g_.length(e1) > g_.length(e2);});
         EdgeId bridge = bridges[0];
-        int min_gap_before = path.GapAt(index);
-        int min_gap_after = path.GapAt(index);
-        for (const auto& path:path_storage.paths()) {
-            int current_before = 0;
-            for(size_t i = 0; i< path.size(); i++) {
-                if (path[i] != bridge)
-                    current_before += (int)g_.length(path[i]);
-                else
+
+        VERIFY(orig_gap.gap >= 0 && orig_gap.NoTrash());
+        int min_gap_before = orig_gap.gap;
+        int min_gap_after = orig_gap.gap;
+        for (const auto& path : paths) {
+            size_t current_before = 0;
+            for (EdgeId e : path) {
+                if (e == bridge)
                     break;
+                current_before += g_.length(e);
             }
-            int current_after = (int)CumulativeLength(g_, path) - current_before - int(g_.length(bridge));
-            min_gap_after = std::min(current_after, min_gap_after);
-            min_gap_before = std::min(current_before, min_gap_before);
+            size_t current_after = CumulativeLength(g_, path) - current_before - g_.length(bridge);
+            min_gap_after = std::min(int(current_after), min_gap_after);
+            min_gap_before = std::min(int(current_before), min_gap_before);
         }
+
         min_gap_after = std::max(min_gap_after, min_gap_);
         min_gap_before = std::max(min_gap_before, min_gap_);
-        result.PushBack(bridge, min_gap_before);
-        result.PushBack(path[index], min_gap_after);
-        return true;
-    } else {
-        result.PushBack(path[index], path.GapAt(index));
-        return false;
+        DEBUG(bridge.int_id() << " " << min_gap_before);
+        result.PushBack(bridge, Gap(min_gap_before));
+        return Gap(min_gap_after);
     }
 }
 
-bool DijkstraGapCloser::FillWithMultiplePaths(const BidirectionalPath& path, size_t index,
-                                              const omnigraph::PathStorageCallback<Graph>& path_storage,
+Gap DijkstraGapCloser::FillWithMultiplePaths(const PathsT& paths,
                                               BidirectionalPath& result) const {
     bool changed = false;
-    auto left = LCP(path_storage);
+    auto left = LCP(paths);
+    DEBUG("Filling gap with prefix")
     for (auto e : left) {
+        DEBUG(e.int_id());
         result.PushBack(e);
         changed = true;
     }
-    int middle_gap = (int) max(size_t(min_gap_), MinPathLength(path_storage) -
-            omnigraph::CumulativeLength(g_, left));
-    if (changed)
-        result.PushBack(path[index], middle_gap);
-    return changed;
+    if (changed) {
+        int gap = max(min_gap_,
+                      int(MinPathLength(paths) - omnigraph::CumulativeLength(g_, left)));
+        return Gap(gap);
+    } else
+        return Gap::INVALID();
 }
 
-std::map<EdgeId, size_t> DijkstraGapCloser::CountEdgesQuantity(const omnigraph::PathStorageCallback<Graph>& path_storage, size_t length_limit ) const{
+std::map<EdgeId, size_t> DijkstraGapCloser::CountEdgesQuantity(const PathsT &paths, size_t length_limit) const {
     map<EdgeId, size_t> res;
-    for (const auto& path: path_storage.paths()) {
+    for (const auto& path: paths) {
         set<EdgeId> edge_set(path.begin(), path.end());
         for (const auto& e: edge_set) {
             if (g_.length(e) >= length_limit) {
@@ -179,23 +202,23 @@ std::map<EdgeId, size_t> DijkstraGapCloser::CountEdgesQuantity(const omnigraph::
     return res;
 };
 
-size_t DijkstraGapCloser::MinPathSize(const omnigraph::PathStorageCallback<Graph>& path_storage) const {
-    size_t size = path_storage.paths().front().size();
-    for (size_t i = 1; i < path_storage.size(); ++i) {
-        size = min(size, path_storage.paths()[i].size());
+size_t DijkstraGapCloser::MinPathSize(const PathsT& paths) const {
+    size_t size = paths.front().size();
+    for (size_t i = 1; i < paths.size(); ++i) {
+        size = min(size, paths[i].size());
     }
     return size;
 }
 
-vector<EdgeId> DijkstraGapCloser::LCP(const omnigraph::PathStorageCallback<Graph>& path_storage) const {
+vector<EdgeId> DijkstraGapCloser::LCP(const PathsT& paths) const {
     bool all_equal = true;
     size_t index = 0;
-    size_t min_size = MinPathSize(path_storage);
+    size_t min_size = MinPathSize(paths);
 
     while (index < min_size && all_equal) {
-        for (size_t i = 1; i < path_storage.size(); ++i) {
-            auto e = path_storage.paths().front()[index];
-            if (e != path_storage.paths()[i][index]) {
+        for (size_t i = 1; i < paths.size(); ++i) {
+            auto e = paths.front()[index];
+            if (e != paths[i][index]) {
                 all_equal = false;
                 break;
             }
@@ -206,26 +229,28 @@ vector<EdgeId> DijkstraGapCloser::LCP(const omnigraph::PathStorageCallback<Graph
 
     vector<EdgeId> result;
     for (size_t i = 0; i < index; ++i) {
-        result.push_back(path_storage.paths().front()[i]);
+        result.push_back(paths.front()[i]);
     }
     return result;
 }
 
-
-EdgeId MatePairGapCloser::FindNext(const BidirectionalPath& path, size_t index,
-                    const set<EdgeId>& present_in_paths, VertexId v) const {
-    auto next_edges = g_.OutgoingEdges(v);
+EdgeId MatePairGapCloser::FindNext(const BidirectionalPath& path,
+                                   const set<EdgeId>& present_in_paths,
+                                   VertexId last_v, EdgeId target_edge) const {
+    auto next_edges = g_.OutgoingEdges(last_v);
     map<EdgeId, double> candidates;
+
     for (const auto edge: next_edges)
         if (present_in_paths.find(edge) != present_in_paths.end())
             candidates.insert(make_pair(edge, 0));
-    if (candidates.size() <= 1 ) {
-        if (candidates.size() == 0 || candidates.begin()->first == path[index])
+
+    if (candidates.size() <= 1) {
+        if (candidates.size() == 0 || candidates.begin()->first == target_edge)
             return EdgeId(0);
         else 
             return (candidates.begin()->first);
     } else {
-        int i = (int) index - 1;
+        int i = (int) path.Size() - 1;
         for (; i >= 0; i--) {
             if (storage_.IsUnique(path[i]))
                 break;
@@ -248,79 +273,73 @@ EdgeId MatePairGapCloser::FindNext(const BidirectionalPath& path, size_t index,
             sort(to_sort.begin(), to_sort.end(), [&] (std::pair<EdgeId, double> a, std::pair<EdgeId, double> b ) {
                 return a.second > b.second;
             });
-            if (to_sort[0].second > to_sort[1].second * weight_priority && to_sort[0].first != path[index])
+            if (to_sort[0].second > to_sort[1].second * weight_priority && to_sort[0].first != target_edge)
                 return to_sort[0].first;
             else
                 return EdgeId(0);
         }
     }
 }
+//FIXME review logic
+Gap MatePairGapCloser::CloseGap(EdgeId target_edge, const Gap &orig_gap, BidirectionalPath &path) const {
+    VertexId target_vertex = g_.EdgeStart(target_edge);
+//TODO:: condition about trash_previous - do we need it globally?
+    if (orig_gap.gap <= min_gap_ || orig_gap.trash_previous > 0) {
+        return orig_gap;
+    } else {
+        vector<EdgeId> addition;
+        EdgeId last_e = path.Back();
+        VertexId last_v = g_.EdgeEnd(last_e);
+        DEBUG("Closing gap with mate pairs between edge " << g_.int_id(last_e)
+                  << " and edge " << g_.int_id(target_edge) << " was " << orig_gap);
+        omnigraph::PathStorageCallback<Graph> path_storage(g_);
+        int process_res = omnigraph::ProcessPaths(g_, 0,
+                                max_path_len_,
+                                last_v,
+                                target_vertex,
+                                path_storage);
+        if (process_res != 0) {
+            DEBUG("PathProcessor nonzero exit code, gap left unchanged");
+            return orig_gap;
+        }
+        set<EdgeId> present_in_paths;
+        for (const auto &p: path_storage.paths())
+            for (EdgeId e : p)
+                present_in_paths.insert(e);
 
-//TODO: make shorter functions
-BidirectionalPath MatePairGapCloser::Polish(const BidirectionalPath& path) {
-    BidirectionalPath result(g_);
-    DEBUG("Path " << path.GetId() << " len "<< path.Length() << " size " << path.Size());
-    result.PushBack(path[0], path.GapInfoAt(0));
-    for (size_t i = 1; i < path.Size(); ++i) {
-        if (g_.EdgeEnd(path[i - 1]) == g_.EdgeStart(path[i]) || path.GapAt(i) <= min_gap_) {
-            result.PushBack(path[i], path.GapInfoAt(i));
-        } else {
-            DEBUG("position "<< i <<" gap between edges " << g_.int_id(path[i-1]) << " and " << g_.int_id(path[i]) << " was " << path.GapAt(i));
-
-            vector<EdgeId> addition;
-            VertexId v = g_.EdgeEnd(path[i - 1]);
-            EdgeId last = path[i - 1];
-            omnigraph::PathStorageCallback<Graph> path_storage(g_);
-            omnigraph::ProcessPaths(g_, 0,
-                                    max_path_len_,
-                                    g_.EdgeEnd(path[i - 1]),
-                                    g_.EdgeStart(path[i]),
-                                    path_storage);
-            set<EdgeId> present_in_paths;
-            for(const auto &p: path_storage.paths())
-                for(size_t j = 0; j < p.size(); j ++)
-                    present_in_paths.insert(p[j]);
-            size_t total = 0;
-            while (last != EdgeId(0)){
-                last = FindNext(path, i, present_in_paths, v);
-                if (last != EdgeId(0)){
-                    v = g_.EdgeEnd(last);
-                    addition.push_back(last);
-                    total += g_.length(last);
-                }
-                if (total > max_path_len_){
-                    DEBUG("gap between edges " << g_.int_id(path[i-1]) << " and " << g_.int_id(path[i]) << " was: " << path.GapAt(i) << ", closing path length too long: " << total);
-                    break;
-                }
+        size_t total = 0;
+        while (last_e != EdgeId(0)) {
+            last_e = FindNext(path, present_in_paths, last_v, target_edge);
+            if (last_e != EdgeId(0)) {
+                last_v = g_.EdgeEnd(last_e);
+                addition.push_back(last_e);
+                total += g_.length(last_e);
             }
             if (total > max_path_len_) {
-                result.PushBack(path[i], path.GapInfoAt(i));
-                continue;                
-            }
-            int len = int(CumulativeLength(g_, addition));
-            int new_gap = path.GapAt(i) - len;
-            if (new_gap < min_gap_ && addition.size() > 0) {
-                if (path.GapAt(i) * 3 < len * 2 ) {
-//inserted path significantly longer than estimated gap
-                    DEBUG("Gap size estimation problem: gap between edges " << g_.int_id(path[i - 1]) << " and " << g_.int_id(path[i]) << " was " <<
-                         path.GapAt(i) << "filled len" << len);
-                }
-                if (g_.EdgeEnd(addition.back()) != g_.EdgeStart(path[i]))
-                    new_gap = min_gap_;
-                else
-                    new_gap = 0;
+                DEBUG("Closing result length too long: " << total);
+                return orig_gap;
             }
-            DEBUG("filling");
-            for (size_t j = 0; j < addition.size(); j++) {
-                DEBUG(g_.int_id(addition[j]));
-                result.PushBack(addition[j], 0);
+        }
+
+        int len = int(CumulativeLength(g_, addition));
+        Gap gap(orig_gap.gap - len);
+        if (gap.gap < min_gap_ && addition.size() > 0) {
+            //todo constant
+            if (orig_gap.gap * 2 < len) {
+//inserted result significantly longer than estimated gap
+                DEBUG("Filled len" << len);
             }
-            result.PushBack(path[i], new_gap);
-            DEBUG("filled");
+            if (g_.EdgeEnd(addition.back()) != target_vertex)
+                gap = Gap(min_gap_);
+            else
+                gap = Gap();
         }
+        for (EdgeId e : addition) {
+            DEBUG(g_.int_id(e));
+            path.PushBack(e);
+        }
+        return gap;
     }
-    DEBUG("result " << result.GetId() << " len "<< result.Length() << " size " << result.Size());
-    return result;
 }
 
 }
diff --git a/src/common/modules/path_extend/scaffolder2015/path_polisher.hpp b/src/common/modules/path_extend/scaffolder2015/path_polisher.hpp
index c13ddcb..30fe519 100644
--- a/src/common/modules/path_extend/scaffolder2015/path_polisher.hpp
+++ b/src/common/modules/path_extend/scaffolder2015/path_polisher.hpp
@@ -5,80 +5,130 @@
 #include "assembly_graph/paths/bidirectional_path.hpp"
 #include "assembly_graph/core/basic_graph_stats.hpp"
 #include "modules/path_extend/paired_library.hpp"
+#include "modules/path_extend/path_extender.hpp"
 #include "assembly_graph/graph_support/scaff_supplementary.hpp"
-#include "common/pipeline/graph_pack.hpp"
+#include "pipeline/graph_pack.hpp"
 
 namespace path_extend {
 
 class PathGapCloser {
 protected:
     const Graph& g_;
-    size_t max_path_len_;
-    int min_gap_;
+    const size_t max_path_len_;
+    const int min_gap_;
+
+    virtual Gap CloseGap(const BidirectionalPath &original_path, size_t position,
+                         BidirectionalPath &path) const = 0;
+    DECL_LOGGER("PathGapCloser")
+public:
+    BidirectionalPath CloseGaps(const BidirectionalPath &path) const;
+
+    PathGapCloser(const Graph& g, size_t max_path_len):
+                  g_(g),
+                  max_path_len_(max_path_len),
+                  //TODO:: config
+                  min_gap_(int(g.k() + 10)) {}
+
+};
+
+//Intermediate abstract class - majority of GapClosers needs only one next edge after gap, not all original path.
+class TargetEdgeGapCloser : public PathGapCloser {
+protected:
+    //returns updated gap to target edge
+    virtual Gap CloseGap(EdgeId target_edge, const Gap &gap, BidirectionalPath &path) const = 0;
+
+    Gap CloseGap(const BidirectionalPath &original_path,
+                 size_t position, BidirectionalPath &path) const final override {
+        return CloseGap(original_path.At(position), original_path.GapAt(position), path);
+    }
+
 public:
-    virtual BidirectionalPath Polish(const BidirectionalPath& path) = 0;
-//TODO:: config
-    PathGapCloser(const Graph& g, size_t max_path_len): g_(g), max_path_len_(max_path_len), min_gap_(int(g.k() + 10)) {}
+    TargetEdgeGapCloser(const Graph& g, size_t max_path_len):
+            PathGapCloser(g, max_path_len) {}
+
+};
+
+class PathExtenderGapCloser: public TargetEdgeGapCloser {
+    shared_ptr<path_extend::PathExtender> extender_;
 
+protected:
+    Gap CloseGap(EdgeId target_edge, const Gap &gap, BidirectionalPath &path) const override;
+
+public:
+    PathExtenderGapCloser(const Graph& g, size_t max_path_len, shared_ptr<PathExtender> extender):
+            TargetEdgeGapCloser(g, max_path_len), extender_(extender) {
+        DEBUG("ext added");
+    }
 };
 
-class MatePairGapCloser: public PathGapCloser {
+class MatePairGapCloser: public TargetEdgeGapCloser {
     const shared_ptr<PairedInfoLibrary> lib_;
     const ScaffoldingUniqueEdgeStorage& storage_;
-
 //TODO: config? somewhere else?
     static constexpr double weight_priority = 5;
+
+    EdgeId FindNext(const BidirectionalPath& path,
+                    const set<EdgeId>& present_in_paths,
+                    VertexId last_v, EdgeId target_edge) const;
+protected:
+    Gap CloseGap(EdgeId target_edge, const Gap &gap, BidirectionalPath &path) const override;
+
+    DECL_LOGGER("MatePairGapCloser")
+
 public:
-    EdgeId FindNext(const BidirectionalPath& path, size_t index,
-                        const set<EdgeId>& present_in_paths, VertexId v) const;
-    MatePairGapCloser(const Graph& g, size_t max_path_len, const shared_ptr<PairedInfoLibrary> lib, const ScaffoldingUniqueEdgeStorage& storage):
-            PathGapCloser(g, max_path_len), lib_(lib), storage_(storage) {}
-    BidirectionalPath Polish(const BidirectionalPath& path) override;
+    MatePairGapCloser(const Graph& g, size_t max_path_len,
+                      const shared_ptr<PairedInfoLibrary> lib,
+                      const ScaffoldingUniqueEdgeStorage& storage):
+            TargetEdgeGapCloser(g, max_path_len), lib_(lib), storage_(storage) {}
 };
 
-class DijkstraGapCloser: public PathGapCloser {
+//TODO switch to a different Callback, no need to store all paths
+class DijkstraGapCloser: public TargetEdgeGapCloser {
+    typedef vector<vector<EdgeId>> PathsT;
 
-protected:
+    Gap FillWithMultiplePaths(const PathsT& paths,
+                              BidirectionalPath& result) const;
 
-    BidirectionalPath Polish(const BidirectionalPath& path) override;
+    Gap FillWithBridge(const Gap &orig_gap,
+                       const PathsT& paths, BidirectionalPath& result) const;
 
-    size_t MinPathLength(const omnigraph::PathStorageCallback<Graph>& path_storage) const;
+    size_t MinPathLength(const PathsT& paths) const;
 
-    bool FillWithMultiplePaths(const BidirectionalPath& path, size_t index,
-                                       const omnigraph::PathStorageCallback<Graph>& path_storage,
-                                       BidirectionalPath& result) const;
+    size_t MinPathSize(const PathsT& paths) const;
 
-    bool FillWithBridge(const BidirectionalPath& path, size_t index,
-                                                                  const omnigraph::PathStorageCallback<Graph>& path_storage,
-                                                                  BidirectionalPath& result) const;
+    vector<EdgeId> LCP(const PathsT& paths) const;
 
-    size_t MinPathSize(const omnigraph::PathStorageCallback<Graph>& path_storage) const;
+    std::map<EdgeId, size_t> CountEdgesQuantity(const PathsT& paths, size_t length_limit) const;
 
-    vector<EdgeId> LCP(const omnigraph::PathStorageCallback<Graph>& path_storage) const;
+protected:
+    Gap CloseGap(EdgeId target_edge, const Gap &gap, BidirectionalPath &path) const override;
 
-    std::map<EdgeId, size_t> CountEdgesQuantity(const omnigraph::PathStorageCallback<Graph>& path_storage, size_t length_limit) const;
+    DECL_LOGGER("DijkstraGapCloser")
 
 public:
     DijkstraGapCloser(const Graph& g, size_t max_path_len):
-        PathGapCloser(g, max_path_len) {}
-
+        TargetEdgeGapCloser(g, max_path_len) {}
 
 };
 
 class PathPolisher {
+    static const size_t MAX_POLISH_ATTEMPTS = 5;
+
+    const conj_graph_pack &gp_;
+    vector<shared_ptr<PathGapCloser>> gap_closers_;
 
-private:
-    const conj_graph_pack& gp_;
-    vector<shared_ptr<PathGapCloser>> gap_closers;
+    void InfoAboutGaps(const PathContainer& result);
 
-private:
-    void InfoAboutGaps(const PathContainer & result);
     BidirectionalPath Polish(const BidirectionalPath& path);
+    DECL_LOGGER("PathPolisher")
 
 public:
-    PathPolisher(const conj_graph_pack& gp, const config::dataset& dataset_info, const ScaffoldingUniqueEdgeStorage& storage, size_t max_resolvable_len);
+    PathPolisher(const conj_graph_pack &gp,
+                               const vector<shared_ptr<PathGapCloser>> &gap_closers):
+            gp_(gp), gap_closers_(gap_closers) {
+    }
 
-    void PolishPaths(const PathContainer& paths, PathContainer& result);
+    PathContainer PolishPaths(const PathContainer &paths);
 };
 
 
diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp
index 9ac3fdf..6fdfcf1 100644
--- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp
+++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp
@@ -9,7 +9,7 @@
 #include "connection_condition2015.hpp"
 
 #include "utils/standard_base.hpp"
-#include "common/adt/iterator_range.hpp"
+#include "adt/iterator_range.hpp"
 
 namespace path_extend {
 namespace scaffold_graph {
diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp
index 8017eee..764a3a1 100644
--- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp
+++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp
@@ -18,17 +18,17 @@ const map<size_t, string> ScaffoldEdgeColorer::color_map =
 const string ScaffoldEdgeColorer::default_color = "black";
 
 string ScaffoldGraphLabeler::label(EdgeId e) const {
-    return "ID: " + ToString(e.getId()) +
-        "\\n Weight: " + ToString(e.getWeight()) +
-        "\\n Lib#: " + ToString(e.getColor());
+    return "ID: " + std::to_string(e.getId()) +
+        "\\n Weight: " + std::to_string(e.getWeight()) +
+        "\\n Lib#: " + std::to_string(e.getColor());
 }
 
 string ScaffoldGraphLabeler::label(VertexId v) const {
     auto it = additional_vertex_labels_.find(v);
     string additional_label = it == additional_vertex_labels_.end() ? "" : it->second + "\n";
-    return "ID: " + ToString(graph_.int_id(v)) +
-        "\\n Len: " + ToString(graph_.AssemblyGraph().length(v)) +
-        "\\n Cov: " + ToString(graph_.AssemblyGraph().coverage(v)) + "\n" +
+    return "ID: " + std::to_string(graph_.int_id(v)) +
+        "\\n Len: " + std::to_string(graph_.AssemblyGraph().length(v)) +
+        "\\n Cov: " + std::to_string(graph_.AssemblyGraph().coverage(v)) + "\n" +
         additional_label;
 }
 
diff --git a/src/common/modules/path_extend/weight_counter.hpp b/src/common/modules/path_extend/weight_counter.hpp
index d031bb2..0f7abc8 100644
--- a/src/common/modules/path_extend/weight_counter.hpp
+++ b/src/common/modules/path_extend/weight_counter.hpp
@@ -80,7 +80,9 @@ class IdealInfoProvider {
 public:
     virtual ~IdealInfoProvider() {}
 
-    virtual std::vector<EdgeWithPairedInfo> FindCoveredEdges(const BidirectionalPath& path, EdgeId candidate) const = 0;
+    virtual std::vector<EdgeWithPairedInfo> FindCoveredEdges(const BidirectionalPath& path, EdgeId candidate, int gap = 0) const = 0;
+protected:
+    DECL_LOGGER("IdealInfoProvider");
 };
 
 class BasicIdealInfoProvider : public IdealInfoProvider {
@@ -89,11 +91,11 @@ public:
     BasicIdealInfoProvider(const shared_ptr<PairedInfoLibrary>& lib) : lib_(lib) {
     }
 
-    std::vector<EdgeWithPairedInfo> FindCoveredEdges(const BidirectionalPath& path, EdgeId candidate) const override {
+    std::vector<EdgeWithPairedInfo> FindCoveredEdges(const BidirectionalPath& path, EdgeId candidate, int gap) const override {
         std::vector<EdgeWithPairedInfo> covered;
         for (int i = (int) path.Size() - 1; i >= 0; --i) {
             double w = lib_->IdealPairedInfo(path[i], candidate,
-                                            (int) path.LengthAt(i));
+                                            (int) path.LengthAt(i) + gap);
             //FIXME think if we need extremely low ideal weights
             if (math::gr(w, 0.)) {
                 covered.push_back(EdgeWithPairedInfo(i, w));
@@ -107,13 +109,13 @@ class WeightCounter {
 
 protected:
     const Graph& g_;
-    const shared_ptr<PairedInfoLibrary> lib_;
+    shared_ptr<PairedInfoLibrary> lib_;
     bool normalize_weight_;
     shared_ptr<IdealInfoProvider> ideal_provider_;
 
 public:
 
-    WeightCounter(const Graph& g, const shared_ptr<PairedInfoLibrary>& lib, 
+    WeightCounter(const Graph& g, shared_ptr<PairedInfoLibrary> lib,
                   bool normalize_weight = true, 
                   shared_ptr<IdealInfoProvider> ideal_provider = nullptr) :
             g_(g), lib_(lib), normalize_weight_(normalize_weight), ideal_provider_(ideal_provider) {
@@ -128,15 +130,11 @@ public:
     virtual double CountWeight(const BidirectionalPath& path, EdgeId e,
             const std::set<size_t>& excluded_edges = std::set<size_t>(), int gapLength = 0) const = 0;
 
-    const PairedInfoLibrary& lib() const {
+    const PairedInfoLibrary& PairedLibrary() const {
         return *lib_;
     }
 
-    const shared_ptr<PairedInfoLibrary> get_libptr() const {
-        return lib_;
-    };
-
-private:
+protected:
     DECL_LOGGER("WeightCounter");
 };
 
@@ -146,7 +144,7 @@ class ReadCountWeightCounter: public WeightCounter {
             int add_gap = 0) const {
         std::vector<EdgeWithPairedInfo> answer;
 
-        for (const EdgeWithPairedInfo& e_w_pi : ideal_provider_->FindCoveredEdges(path, e)) {
+        for (const EdgeWithPairedInfo& e_w_pi : ideal_provider_->FindCoveredEdges(path, e, add_gap)) {
             double w = lib_->CountPairedInfo(path[e_w_pi.e_], e,
                     (int) path.LengthAt(e_w_pi.e_) + add_gap);
 
@@ -213,17 +211,27 @@ class PathCoverWeightCounter: public WeightCounter {
             const std::vector<EdgeWithPairedInfo>& ideally_covered_edges, int add_gap = 0) const {
         std::vector<EdgeWithPairedInfo> answer;
 
-        for (const EdgeWithPairedInfo& e_w_pi : ideally_covered_edges) {
+        for (const auto& e_w_pi : ideally_covered_edges) {
             double ideal_weight = e_w_pi.pi_;
+            TRACE("Supposedly covered edge " << e_w_pi.e_ << " "
+                                            << g_.str(path.At(e_w_pi.e_))
+                                            << " ideal weight " << ideal_weight);
 
-            double weight = lib_->CountPairedInfo(
-                    path[e_w_pi.e_], e,
+            TRACE("Querying paired library for edges " << g_.str(path[e_w_pi.e_])
+                                                       << " " << g_.str(e) << " at dist "
+                                                       << (path.LengthAt(e_w_pi.e_) + add_gap));
+
+            double weight = lib_->CountPairedInfo(path[e_w_pi.e_], e,
                     (int) path.LengthAt(e_w_pi.e_) + add_gap);
 
+            TRACE("Actual weight " << weight);
+
             if (normalize_weight_) {
                 weight /= ideal_weight;
             }
 
+            TRACE("After normalization " << weight << " threshold " << single_threshold_);
+
             if (math::ge(weight, single_threshold_)) {
                 answer.push_back(EdgeWithPairedInfo(e_w_pi.e_, ideal_weight));
             }
@@ -245,8 +253,9 @@ public:
 
     double CountWeight(const BidirectionalPath& path, EdgeId e,
             const std::set<size_t>& excluded_edges, int gap) const override {
+        TRACE("Counting weight for edge " << g_.str(e));
         double lib_weight = 0.;
-        const auto ideal_coverage = ideal_provider_->FindCoveredEdges(path, e);
+        const auto ideal_coverage = ideal_provider_->FindCoveredEdges(path, e, gap);
 
         for (const auto& e_w_pi : CountLib(path, e, ideal_coverage, gap)) {
             if (!excluded_edges.count(e_w_pi.e_)) {
@@ -255,13 +264,18 @@ public:
         }
 
         double total_ideal_coverage = TotalIdealNonExcluded(ideal_coverage, excluded_edges);
+
+        TRACE("Excluded edges  " << utils::join(excluded_edges, ", ",
+                                                [&] (const size_t &i) { return g_.str(path.At(i)); }));
+        TRACE("Total ideal coverage " << total_ideal_coverage);
+        TRACE("Lib weight " << lib_weight);
         return math::eq(total_ideal_coverage, 0.) ? 0. : lib_weight / total_ideal_coverage;
     }
 
     std::set<size_t> PairInfoExist(const BidirectionalPath& path, EdgeId e, 
                                     int gap = 0) const override {
         std::set<size_t> answer;
-        for (const auto& e_w_pi : CountLib(path, e, ideal_provider_->FindCoveredEdges(path, e), gap)) {
+        for (const auto& e_w_pi : CountLib(path, e, ideal_provider_->FindCoveredEdges(path, e, gap), gap)) {
             if (math::gr(e_w_pi.pi_, 0.)) {
                 answer.insert(e_w_pi.e_);
             }
@@ -292,13 +306,16 @@ public:
         VERIFY(read_length_ > g_.k());
     }
 
-    std::vector<EdgeWithPairedInfo> FindCoveredEdges(const BidirectionalPath& path, EdgeId candidate) const override {
+    //TODO optimize number of calls of EstimatePathCoverage(path)
+    std::vector<EdgeWithPairedInfo> FindCoveredEdges(const BidirectionalPath& path, EdgeId candidate, int gap) const override {
         VERIFY(read_length_ != -1ul);
         //bypassing problems with ultra-low coverage estimates
         double estimated_coverage = max(EstimatePathCoverage(path), 1.0);
         double correction_coeff = estimated_coverage / ((double(read_length_) - double(g_.k())) * MAGIC_COEFF);
+        TRACE("Estimated coverage " << estimated_coverage);
+        TRACE("Correction coefficient " << correction_coeff);
 
-        std::vector<EdgeWithPairedInfo> answer = BasicIdealInfoProvider::FindCoveredEdges(path, candidate);
+        std::vector<EdgeWithPairedInfo> answer = BasicIdealInfoProvider::FindCoveredEdges(path, candidate, gap);
         for (auto& e_w_pi : answer) {
             e_w_pi.pi_ *= correction_coeff;
         }
@@ -324,34 +341,6 @@ public:
     }
 };
 
-//TODO optimize number of calls of EstimatePathCoverage(path)
-//class MetagenomicWeightCounter: public WeightCounter {
-//    shared_ptr<CoverageAwareIdealInfoProvider> cov_info_provider_;
-//    shared_ptr<WeightCounter> normalizing_wc_;
-//
-//public:
-//
-//    //negative raw_threshold leads to the halt if no sufficiently long edges are in the path
-//    MetagenomicWeightCounter(const Graph& g, const shared_ptr<PairedInfoLibrary>& lib,
-//                             size_t read_length, double weight_threshold) :
-//            WeightCounter(g, lib) {
-//        cov_info_provider_ = make_shared<CoverageAwareIdealInfoProvider>(g, lib, read_length);
-//        normalizing_wc_ = make_shared<PathCoverWeightCounter>(g, lib,
-//                /*normalize weight*/true, weight_threshold, cov_info_provider_);
-//    }
-//
-//    double CountWeight(const BidirectionalPath& path, EdgeId e,
-//            const std::set<size_t>& excluded_edges, int gap = 0) const override {
-//        VERIFY(path.Length() > 0);
-//        return normalizing_wc_->CountWeight(path, e, excluded_edges, gap);
-//    }
-//
-//    std::set<size_t> PairInfoExist(const BidirectionalPath& path, EdgeId e,
-//                                    int gap = 0) const override {
-//        return normalizing_wc_->PairInfoExist(path, e, gap);
-//    }
-//};
-
 };
 
 #endif /* WEIGHT_COUNTER_HPP_ */
diff --git a/src/common/modules/simplification/bulge_remover.hpp b/src/common/modules/simplification/bulge_remover.hpp
index 73254b1..bff2ec1 100644
--- a/src/common/modules/simplification/bulge_remover.hpp
+++ b/src/common/modules/simplification/bulge_remover.hpp
@@ -123,12 +123,12 @@ class BulgeGluer {
 
         EnsureEndsPositionAligner aligner(CumulativeLength(g_, path),
                 g_.length(edge));
-        double prefix_length = 0.;
+        size_t prefix_length = 0.;
         vector<size_t> bulge_prefix_lengths;
 
         for (EdgeId e : path) {
-            prefix_length += (double) g_.length(e);
-            bulge_prefix_lengths.push_back(aligner.GetPosition((size_t) prefix_length));
+            prefix_length += g_.length(e);
+            bulge_prefix_lengths.push_back(aligner.GetPosition(prefix_length));
         }
 
         EdgeId edge_to_split = edge;
@@ -210,6 +210,7 @@ class AlternativesAnalyzer {
     size_t max_delta_;
     double max_relative_delta_;
     size_t max_edge_cnt_;
+    size_t dijkstra_vertex_limit_;
 
     static vector<EdgeId> EmptyPath() {
         static vector<EdgeId> vec = {};
@@ -229,14 +230,16 @@ class AlternativesAnalyzer {
 public:
     AlternativesAnalyzer(const Graph& g, double max_coverage, size_t max_length,
                          double max_relative_coverage, size_t max_delta,
-                         double max_relative_delta, size_t max_edge_cnt) :
+                         double max_relative_delta, size_t max_edge_cnt,
+                         size_t dijkstra_vertex_limit) :
                          g_(g),
                          max_coverage_(max_coverage),
                          max_length_(max_length),
                          max_relative_coverage_(max_relative_coverage),
                          max_delta_(max_delta),
                          max_relative_delta_(max_relative_delta),
-                         max_edge_cnt_(max_edge_cnt) {
+                         max_edge_cnt_(max_edge_cnt),
+                         dijkstra_vertex_limit_(dijkstra_vertex_limit) {
         DEBUG("Created alternatives analyzer max_length=" << max_length
         << " max_coverage=" << max_coverage
         << " max_relative_coverage=" << max_relative_coverage
@@ -261,8 +264,10 @@ public:
         VertexId end = g_.EdgeEnd(e);
         TRACE("End " << g_.str(end));
 
-        ProcessPaths(g_, (g_.length(e) > delta) ? g_.length(e) - delta : 0,
-                g_.length(e) + delta, start, end, path_chooser, max_edge_cnt_);
+        size_t max_path_len = g_.length(e) + delta;
+        PathProcessor<Graph> processor(g_, start, max_path_len, dijkstra_vertex_limit_);
+        processor.Process(end, (g_.length(e) > delta) ? g_.length(e) - delta : 0,
+                          max_path_len, path_chooser, max_edge_cnt_);
 
         const vector<EdgeId>& path = path_chooser.most_covered_path();
         if (!path.empty()) {
@@ -307,6 +312,21 @@ NecessaryBulgeCondition(const Graph& g, size_t max_length, double max_coverage)
                                                      CoverageUpperBound<Graph>(g, max_coverage)));
 }
 
+template<class Graph>
+func::TypedPredicate<typename Graph::EdgeId>
+NecessaryBulgeCondition(const Graph& g, const AlternativesAnalyzer<Graph>& analyzer) {
+    return NecessaryBulgeCondition(g, analyzer.max_length(), analyzer.max_coverage());
+}
+
+template<class Graph>
+InterestingFinderPtr<Graph, typename Graph::EdgeId>
+BulgeCandidateFinder(const Graph &g,
+                     const AlternativesAnalyzer<Graph> &analyzer,
+                     size_t chunk_cnt) {
+    return std::make_shared<omnigraph::ParallelInterestingElementFinder<Graph>>(
+            omnigraph::NecessaryBulgeCondition(g, analyzer), chunk_cnt);
+};
+
 /**
  * This class removes simple bulges from given graph with the following algorithm: it iterates through all edges of
  * the graph and for each edge checks if this edge is likely to be a simple bulge
@@ -345,13 +365,13 @@ public:
 
     typedef std::function<void(EdgeId edge, const vector<EdgeId>& path)> BulgeCallbackF;
 
-    BulgeRemover(Graph& g, const std::shared_ptr<InterestingElementFinder<Graph, EdgeId>>& interesting_finder,
+    BulgeRemover(Graph& g, size_t chunk_cnt,
             const AlternativesAnalyzer<Graph>& alternatives_analyzer,
             BulgeCallbackF opt_callback = 0,
             std::function<void(EdgeId)> removal_handler = 0,
             bool track_changes = true) :
             base(g,
-                 interesting_finder,
+                 BulgeCandidateFinder(g, alternatives_analyzer, chunk_cnt),
                  /*canonical_only*/true,
                  CoverageComparator<Graph>(g),
                  track_changes),
@@ -369,9 +389,10 @@ private:
 template<class Graph>
 class ParallelBulgeRemover : public PersistentAlgorithmBase<Graph> {
 private:
+    static const size_t SMALL_BUFFER_THR = 1000;
     typedef typename Graph::EdgeId EdgeId;
     typedef typename Graph::VertexId VertexId;
-    typedef std::shared_ptr<InterestingElementFinder<Graph, EdgeId>> CandidateFinderPtr;
+    typedef InterestingFinderPtr<Graph, EdgeId> CandidateFinderPtr;
     typedef SmartSetIterator<Graph, EdgeId, CoverageComparator<Graph>> SmartEdgeSet;
 
     size_t buff_size_;
@@ -438,6 +459,12 @@ private:
 
     };
 
+    SmartEdgeSet AsSmartSet(const std::vector<EdgeId> &edges) {
+        SmartEdgeSet smart_set(this->g(), false, CoverageComparator<Graph>(this->g()));
+        smart_set.insert(edges.begin(), edges.end());
+        return smart_set;
+    }
+
     bool CheckInteracting(const BulgeInfo& info, const std::unordered_set<EdgeId>& involved_edges) const {
         if (involved_edges.count(info.e))
             return true;
@@ -462,55 +489,55 @@ private:
         }
     }
 
-    //false if time to stop
+    //returns false if time to stop
     bool FillEdgeBuffer(vector<EdgeId>& buffer, func::TypedPredicate<EdgeId> proceed_condition) {
         VERIFY(buffer.empty());
         DEBUG("Filling edge buffer of size " << buff_size_);
-        perf_counter perf;
-        double low_cov = 0.;
-        double cov_diff = 0.;
+        utils::perf_counter perf;
+        double max_cov = std::numeric_limits<double>::min();
+        bool exhausted = false;
+
         while (!it_.IsEnd() && buffer.size() < buff_size_) {
             EdgeId e = *it_;
             TRACE("Current edge " << this->g().str(e));
-            if (!proceed_condition(e)) {
-                TRACE("Stop condition was reached.");
-                //need to release last element of the iterator to make it replaceable by new elements
-                it_.ReleaseCurrent();
-                return false;
-            }
 
             double cov = this->g().coverage(e);
             if (buffer.empty()) {
-                low_cov = cov;
-                cov_diff = max(buff_cov_diff_, buff_cov_rel_diff_ * low_cov);
-            } else {
-                if (math::gr(cov, low_cov + cov_diff)) {
-                    //need to release last element of the iterator to make it replaceable by new elements
-                    it_.ReleaseCurrent();
-                    return true;
-                }
+                max_cov = cov + max(buff_cov_diff_, buff_cov_rel_diff_ * cov);
+                DEBUG("Coverage interval [" << cov << ", " << max_cov << "]");
+            }
+
+            if (!proceed_condition(e)) {
+                DEBUG("Stop condition was reached.");
+                exhausted = true;
+                break;
+            }
+
+            if (math::gr(cov, max_cov)) {
+                DEBUG("Coverage exceeded " << cov << " > " << max_cov);
+                break;
             }
+
             TRACE("Potential bulge edge");
             buffer.push_back(e);
             ++it_;
         }
 
+        exhausted |= it_.IsEnd();
+        it_.ReleaseCurrent();
+
         DEBUG("Filled in " << perf.time() << " seconds");
-        if (buffer.size() == buff_size_) {
-            TRACE("Buffer filled");
-            return true;
-        } else {
-            TRACE("No more edges in iterator");
-            return false;
-        }
+        DEBUG("Candidate queue exhausted " << exhausted);
+        return !exhausted;
     }
 
     std::vector<std::vector<BulgeInfo>> FindBulges(const std::vector<EdgeId>& edge_buffer) const {
-        DEBUG("Looking for bulges (in parallel). Edge buffer size " << edge_buffer.size());
-        perf_counter perf;
+        DEBUG("Looking for bulges in parallel");
+        utils::perf_counter perf;
         std::vector<std::vector<BulgeInfo>> bulge_buffers(omp_get_max_threads());
-        size_t n = edge_buffer.size();
+        const size_t n = edge_buffer.size();
         //order is in agreement with coverage
+        DEBUG("Edge buffer size " << n);
         #pragma omp parallel for schedule(guided)
         for (size_t i = 0; i < n; ++i) {
             EdgeId e = edge_buffer[i];
@@ -519,13 +546,13 @@ private:
                 bulge_buffers[omp_get_thread_num()].push_back(BulgeInfo(i, e, std::move(alternative)));
             }
         }
-        DEBUG("Bulges found in " << perf.time() << " seconds");
+        DEBUG("Bulges found (in parallel) in " << perf.time() << " seconds");
         return bulge_buffers;
     }
 
     std::vector<BulgeInfo> MergeBuffers(std::vector<std::vector<BulgeInfo>>&& buffers) const {
         DEBUG("Merging bulge buffers");
-        perf_counter perf;
+        utils::perf_counter perf;
 
         std::vector<BulgeInfo> merged_bulges;
         for (auto& bulge_buffer : buffers) {
@@ -545,7 +572,7 @@ private:
     SmartEdgeSet RetainIndependentBulges(std::vector<BulgeInfo>& bulges) const {
         DEBUG("Looking for independent bulges");
         size_t total_cnt = bulges.size();
-        perf_counter perf;
+        utils::perf_counter perf;
 
         std::vector<BulgeInfo> filtered;
         filtered.reserve(bulges.size());
@@ -574,9 +601,24 @@ private:
         return interacting_edges;
     }
 
-    size_t ProcessBulges(const std::vector<BulgeInfo>& independent_bulges, SmartEdgeSet&& interacting_edges) {
+    size_t BasicProcessBulges(SmartEdgeSet& edges) {
+        size_t triggered = 0;
+        //usual br strategy
+        for (; !edges.IsEnd(); ++edges) {
+            EdgeId e = *edges;
+            TRACE("Processing edge " << this->g().str(e));
+            std::vector<EdgeId> alternative = alternatives_analyzer_(e);
+            if (!alternative.empty()) {
+                gluer_(e, alternative);
+                triggered++;
+            }
+        }
+        return triggered;
+    }
+
+    size_t ProcessBulges(const std::vector<BulgeInfo>& independent_bulges, SmartEdgeSet& interacting_edges) {
         DEBUG("Processing bulges");
-        perf_counter perf;
+        utils::perf_counter perf;
 
         size_t triggered = 0;
 
@@ -590,16 +632,7 @@ private:
         perf.reset();
 
         DEBUG("Processing remaining interacting bulges " << interacting_edges.size());
-        //usual br strategy
-        for (; !interacting_edges.IsEnd(); ++interacting_edges) {
-            EdgeId e = *interacting_edges;
-            TRACE("Processing edge " << this->g().str(e));
-            std::vector<EdgeId> alternative = alternatives_analyzer_(e);
-            if (!alternative.empty()) {
-                gluer_(e, alternative);
-                triggered++;
-            }
-        }
+        triggered += BasicProcessBulges(interacting_edges);
         DEBUG("Interacting edges processed in " << perf.time() << " seconds");
         return triggered;
     }
@@ -608,22 +641,29 @@ public:
 
     typedef std::function<void(EdgeId edge, const vector<EdgeId>& path)> BulgeCallbackF;
 
-    ParallelBulgeRemover(Graph& g, const CandidateFinderPtr& interesting_edge_finder,
-                         size_t buff_size, double buff_cov_diff,
-                         double buff_cov_rel_diff, const AlternativesAnalyzer<Graph>& alternatives_analyzer,
+    ParallelBulgeRemover(Graph& g,
+                         size_t chunk_cnt,
+                         size_t buff_size,
+                         double buff_cov_diff,
+                         double buff_cov_rel_diff,
+                         const AlternativesAnalyzer<Graph>& alternatives_analyzer,
                          BulgeCallbackF opt_callback = 0,
                          std::function<void(EdgeId)> removal_handler = 0,
                          bool track_changes = true) :
+
                          PersistentAlgorithmBase<Graph>(g),
                          buff_size_(buff_size),
                          buff_cov_diff_(buff_cov_diff),
                          buff_cov_rel_diff_(buff_cov_rel_diff),
                          alternatives_analyzer_(alternatives_analyzer),
                          gluer_(g, opt_callback, removal_handler),
-                         interesting_edge_finder_(interesting_edge_finder),
+                         interesting_edge_finder_(BulgeCandidateFinder(g, alternatives_analyzer, chunk_cnt)),
                          tracking_(track_changes),
                          curr_iteration_(0),
-                         it_(g, true, CoverageComparator<Graph>(g), true) {
+                         it_(g, /*add new*/true,
+                             CoverageComparator<Graph>(g),
+                             /*canonical only*/true,
+                             NecessaryBulgeCondition(g, alternatives_analyzer)) {
         VERIFY(buff_size_ > 0);
         it_.Detach();
     }
@@ -639,32 +679,46 @@ public:
         }
         if (primary_launch) {
             it_.clear();
-            TRACE("Primary launch.");
-            TRACE("Start search for interesting edges");
+            DEBUG("Primary launch.");
+            DEBUG("Start search for interesting edges");
             interesting_edge_finder_->Run(this->g(), [&](EdgeId e) {it_.push(e);});
-            TRACE(it_.size() << " interesting edges to process");
+            DEBUG(it_.size() << " interesting edges to process");
         } else {
             VERIFY(tracking_);
-            TRACE(it_.size() << " edges to process");
+            DEBUG(it_.size() << " edges to process");
         }
 
         size_t triggered = 0;
         bool proceed = true;
         while (proceed) {
             std::vector<EdgeId> edge_buffer;
+            DEBUG("Filling edge buffer");
             edge_buffer.reserve(buff_size_);
             proceed = FillEdgeBuffer(edge_buffer, proceed_condition);
+            DEBUG("Edge buffer filled");
+
+            DEBUG("Edge buffer size " << edge_buffer.size());
+            size_t inner_triggered = 0;
+            //FIXME magic constant
+            if (edge_buffer.size() < SMALL_BUFFER_THR) {
+                DEBUG("Processing small buffer");
+                utils::perf_counter perf;
+                //TODO implement via moves?
+                auto edges = AsSmartSet(edge_buffer);
+                inner_triggered = BasicProcessBulges(edges);
+                DEBUG("Small buffer processed in " << perf.time() << " seconds");
+            } else {
+                std::vector<BulgeInfo> bulges = MergeBuffers(FindBulges(edge_buffer));
+                auto interacting_edges = RetainIndependentBulges(bulges);
+                inner_triggered = ProcessBulges(bulges, interacting_edges);
+            }
 
-            std::vector<BulgeInfo> bulges = MergeBuffers(FindBulges(edge_buffer));
-
-            auto interacting_edges = RetainIndependentBulges(bulges);
-
-            size_t inner_triggered = ProcessBulges(bulges, std::move(interacting_edges));
             proceed |= (inner_triggered > 0);
             triggered += inner_triggered;
+            DEBUG("Buffer processed");
         }
 
-        TRACE("Finished processing. Triggered = " << triggered);
+        DEBUG("Finished processing. Triggered = " << triggered);
         if (!tracking_)
             it_.Detach();
 
diff --git a/src/common/modules/simplification/cleaner.hpp b/src/common/modules/simplification/cleaner.hpp
index ce3eac5..f888325 100644
--- a/src/common/modules/simplification/cleaner.hpp
+++ b/src/common/modules/simplification/cleaner.hpp
@@ -36,4 +36,10 @@ protected:
     }
 };
 
+template<class Graph>
+size_t CleanIsolatedVertices(Graph &g, size_t chunk_cnt = 1) {
+    Cleaner<Graph> cleaner(g, chunk_cnt);
+    return cleaner.Run();
+}
+
 }
diff --git a/src/common/modules/simplification/complex_bulge_remover.hpp b/src/common/modules/simplification/complex_bulge_remover.hpp
index 2abed3d..cd2cc90 100644
--- a/src/common/modules/simplification/complex_bulge_remover.hpp
+++ b/src/common/modules/simplification/complex_bulge_remover.hpp
@@ -10,7 +10,7 @@
 #include <cmath>
 #include <stack>
 #include <queue>
-#include "common/adt/concurrent_dsu.hpp"
+#include "adt/concurrent_dsu.hpp"
 #include "utils/standard_base.hpp"
 #include "assembly_graph/components/graph_component.hpp"
 #include "math/xmath.h"
@@ -114,7 +114,7 @@ public:
 //    }
 
     bool CheckCompleteness() const {
-        for (VertexId v : key_set(vertex_depth_)) {
+        for (VertexId v : utils::key_set(vertex_depth_)) {
             if (v == start_vertex_)
                 continue;
             if (!AllEdgeIn(v) && !AllEdgeOut(v))
@@ -125,7 +125,7 @@ public:
 
     bool NeedsProjection() const {
         DEBUG("Checking if component needs projection");
-        for (VertexId v : key_set(vertex_depth_)) {
+        for (VertexId v : utils::key_set(vertex_depth_)) {
             if (v == start_vertex_)
                 continue;
             vector<EdgeId> filtered_incoming;
@@ -161,7 +161,7 @@ public:
 
     set<size_t> avg_distances() const {
         set<size_t> distances;
-        for (VertexId v : key_set(vertex_depth_)) {
+        for (VertexId v : utils::key_set(vertex_depth_)) {
             distances.insert(avg_distance(v));
         }
         return distances;
@@ -194,12 +194,12 @@ public:
     }
 
     GraphComponent<Graph> AsGraphComponent() const {
-        return GraphComponent<Graph>::FromVertices(g_, key_set(vertex_depth_));
+        return GraphComponent<Graph>::FromVertices(g_, utils::key_set(vertex_depth_));
     }
 
     bool ContainsConjugateVertices() const {
         set<VertexId> conjugate_vertices;
-        for (VertexId v : key_set(vertex_depth_)) {
+        for (VertexId v : utils::key_set(vertex_depth_)) {
             if (conjugate_vertices.count(v) == 0) {
                 conjugate_vertices.insert(g_.conjugate(v));
             } else {
@@ -256,7 +256,7 @@ public:
                     "Inserting vertex " << g_.str(new_vertex) << " to component during split");
             vertex_depth_.insert(make_pair(new_vertex, new_vertex_depth));
             height_2_vertices_.insert(
-                    make_pair(Average(new_vertex_depth), new_vertex));
+                    std::make_pair(Average(new_vertex_depth), new_vertex));
         }
     }
 
@@ -308,25 +308,25 @@ public:
         return vertices_.count(v) > 0;
     }
 
-    virtual void HandleDelete(VertexId v) {
+    void HandleDelete(VertexId v) override {
         //verify v not in the tree
         VERIFY(!Contains(v));
     }
 
-    virtual void HandleDelete(EdgeId e) {
+    void HandleDelete(EdgeId e) override {
         //verify e not in the tree
         DEBUG("Trying to delete " << br_comp_.g().str(e));
         VERIFY(!Contains(e));
     }
 
-    virtual void HandleMerge(const vector<EdgeId>& old_edges, EdgeId /*new_edge*/) {
+    void HandleMerge(const vector<EdgeId>& old_edges, EdgeId /*new_edge*/) override {
         //verify false
         for (EdgeId e : old_edges) {
             VERIFY(!Contains(e));
         }
     }
 
-    virtual void HandleGlue(EdgeId new_edge, EdgeId edge1, EdgeId edge2) {
+    void HandleGlue(EdgeId new_edge, EdgeId edge1, EdgeId edge2) override {
 //         verify edge2 in tree
 //         put new_edge instead of edge2
         DEBUG("Glueing " << br_comp_.g().str(new_edge) << " " << br_comp_.g().str(edge1) << " " << br_comp_.g().str(edge2));
@@ -338,8 +338,8 @@ public:
         }
     }
 
-    virtual void HandleSplit(EdgeId old_edge, EdgeId new_edge_1,
-            EdgeId new_edge_2) {
+    void HandleSplit(EdgeId old_edge, EdgeId new_edge_1,
+            EdgeId new_edge_2) override {
         VERIFY(old_edge != br_comp_.g().conjugate(old_edge));
         if (Contains(old_edge)) {
             edges_.erase(old_edge);
@@ -499,7 +499,7 @@ class SkeletonTreeFinder {
 
     typedef typename Graph::EdgeId EdgeId;
     typedef typename Graph::VertexId VertexId;
-    typedef ConcurrentDSU color_partition_ds_t;
+    typedef dsu::ConcurrentDSU color_partition_ds_t;
 
     const LocalizedComponent<Graph>& component_;
     const ComponentColoring<Graph>& coloring_;
@@ -554,7 +554,7 @@ class SkeletonTreeFinder {
         vector<EdgeId> answer;
         for (VertexId v : vertices) {
             if (component_.end_vertices().count(v) == 0) {
-                push_back_all(answer, GoodOutgoingEdges(v));
+                utils::push_back_all(answer, GoodOutgoingEdges(v));
             }
         }
         return answer;
@@ -627,10 +627,10 @@ class SkeletonTreeFinder {
             }
         }
         size_t coverage = 0;
-        for (size_t cov : value_set(best_subtrees_coverage)) {
+        for (size_t cov : utils::value_set(best_subtrees_coverage)) {
             coverage += cov;
         }
-        next_edges_[v] = SetAsVector<EdgeId>(value_set(best_alternatives));
+        next_edges_[v] = SetAsVector<EdgeId>(utils::value_set(best_alternatives));
         subtree_coverage_[v] = coverage;
     }
 
@@ -677,7 +677,7 @@ public:
             VERIFY(!level_vertices.empty());
 
             //looking for good edges
-            insert_all(good_edges_,
+            utils::insert_all(good_edges_,
                     GoodOutgoingEdges(
                             vector<VertexId>(level_vertices.begin(),
                                     level_vertices.end())));
@@ -751,7 +751,7 @@ class ComponentProjector {
     bool SplitComponent() {
         DEBUG("Splitting component");
         set<size_t> level_heights(component_.avg_distances());
-        DEBUG("Level heights " << ToString<size_t>(level_heights));
+        DEBUG("Level heights " << utils::ContainerToString(level_heights));
 
         GraphComponent<Graph> gc = component_.AsGraphComponent();
 
@@ -763,7 +763,7 @@ class ComponentProjector {
             DEBUG("Processing edge " << g_.str(*it) << " avg_start " << start_dist << " avg_end " << end_dist);
             set<size_t> dist_to_split(level_heights.lower_bound(start_dist),
                     level_heights.upper_bound(end_dist));
-            DEBUG("Distances to split " << ToString<size_t>(dist_to_split));
+            DEBUG("Distances to split " << utils::ContainerToString(dist_to_split));
 
             size_t offset = start_dist;
             EdgeId e = *it;
@@ -1108,8 +1108,8 @@ class ComplexBulgeRemover : public PersistentProcessingAlgorithm<Graph, typename
             if (!pics_folder_.empty()) {
                 PrintComponent(component, tree,
                         pics_folder_ + "success/"
-                                + ToString(this->g().int_id(component.start_vertex()))
-                                + "_" + ToString(candidate_cnt) + ".dot");
+                                + std::to_string(this->g().int_id(component.start_vertex()))
+                                + "_" + std::to_string(candidate_cnt) + ".dot");
             }
 
             ComponentProjector<Graph> projector(this->g(), component, coloring, tree);
@@ -1126,7 +1126,7 @@ class ComplexBulgeRemover : public PersistentProcessingAlgorithm<Graph, typename
                 //todo check if we rewrite all of the previous pics!
                 PrintComponent(component,
                         pics_folder_ + "fail/"
-                                + ToString(this->g().int_id(component.start_vertex())) //+ "_" + ToString(candidate_cnt)
+                                + std::to_string(this->g().int_id(component.start_vertex())) //+ "_" + std::to_string(candidate_cnt)
                                 + ".dot");
             }
             return false;
diff --git a/src/common/modules/simplification/complex_tip_clipper.hpp b/src/common/modules/simplification/complex_tip_clipper.hpp
index 5da0d68..09cf149 100644
--- a/src/common/modules/simplification/complex_tip_clipper.hpp
+++ b/src/common/modules/simplification/complex_tip_clipper.hpp
@@ -160,7 +160,7 @@ public:
         if (!pics_folder_.empty()) {
             visualization::visualization_utils::WriteComponentSinksSources(component,
                                                       pics_folder_
-                                                      + ToString(this->g().int_id(v)) //+ "_" + ToString(candidate_cnt)
+                                                      + std::to_string(this->g().int_id(v)) //+ "_" + std::to_string(candidate_cnt)
                                                       + ".dot");
         }
 
diff --git a/src/common/modules/simplification/compressor.hpp b/src/common/modules/simplification/compressor.hpp
index 7d210fd..3fbb3a1 100644
--- a/src/common/modules/simplification/compressor.hpp
+++ b/src/common/modules/simplification/compressor.hpp
@@ -118,7 +118,7 @@ protected:
 * Method compresses all vertices which can be compressed.
 */
 template<class Graph>
-bool CompressAllVertices(Graph &g, bool safe_merging = true, size_t chunk_cnt = 1) {
+size_t CompressAllVertices(Graph &g, size_t chunk_cnt = 1, bool safe_merging = true) {
     CompressingProcessor<Graph> compressor(g, chunk_cnt, safe_merging);
     return compressor.Run();
 }
diff --git a/src/common/modules/simplification/dominated_set_finder.hpp b/src/common/modules/simplification/dominated_set_finder.hpp
index b7e779a..bd6ac33 100644
--- a/src/common/modules/simplification/dominated_set_finder.hpp
+++ b/src/common/modules/simplification/dominated_set_finder.hpp
@@ -15,10 +15,10 @@ class DominatedSetFinder {
     std::map<VertexId, Range> dominated_;
 
     bool CheckCanBeProcessed(VertexId v) const {
-        DEBUG( "Check if vertex " << g_.str(v) << " is dominated close neighbour");
+        DEBUG("Check if vertex " << g_.str(v) << " is dominated close neighbour");
         for (EdgeId e : g_.IncomingEdges(v)) {
             if (dominated_.count(g_.EdgeStart(e)) == 0) {
-                DEBUG( "Blocked by external vertex " << g_.int_id(g_.EdgeStart(e)) << " that starts edge " << g_.int_id(e));
+                DEBUG("Blocked by external vertex " << g_.int_id(g_.EdgeStart(e)) << " that starts edge " << g_.int_id(e));
                 DEBUG("Check fail");
                 return false;
             }
@@ -31,7 +31,7 @@ class DominatedSetFinder {
                               std::queue<VertexId>& can_be_processed) const {
         DEBUG("Updating can be processed");
         for (EdgeId e : g_.OutgoingEdges(v)) {
-            DEBUG("Considering edge " << ToString(e));
+            DEBUG("Considering edge " << g_.str(e));
             VertexId neighbour_v = g_.EdgeEnd(e);
             if (CheckCanBeProcessed(neighbour_v)) {
                 can_be_processed.push(neighbour_v);
@@ -115,13 +115,13 @@ public:
     }
 
     GraphComponent<Graph> AsGraphComponent() const {
-        return GraphComponent<Graph>::FromVertices(g_, key_set(dominated_));
+        return GraphComponent<Graph>::FromVertices(g_, utils::key_set(dominated_));
     }
 
     //little meaning if FillDominated returned false
     const map<VertexId, Range> CountBorder() const {
         map<VertexId, Range> border;
-        for (VertexId v : key_set(border)) {
+        for (VertexId v : utils::key_set(border)) {
             for (EdgeId e : g_.OutgoingEdges(v)) {
                 VertexId e_end = g_.EdgeEnd(e);
                 if (dominated_.count(e_end) == 0) {
diff --git a/src/common/modules/simplification/ec_threshold_finder.hpp b/src/common/modules/simplification/ec_threshold_finder.hpp
index f0e27eb..5b3b470 100644
--- a/src/common/modules/simplification/ec_threshold_finder.hpp
+++ b/src/common/modules/simplification/ec_threshold_finder.hpp
@@ -8,9 +8,9 @@
 #ifndef OMNI_TOOLS_HPP_
 #define OMNI_TOOLS_HPP_
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 #include "assembly_graph/graph_support/basic_edge_conditions.hpp"
 #include "assembly_graph/graph_support/parallel_processing.hpp"
 #include "assembly_graph/graph_support/basic_vertex_conditions.hpp"
@@ -39,9 +39,9 @@ private:
             return false;
 
         std::vector<EdgeId> v1;
-        push_back_all(v1, graph_.OutgoingEdges(graph_.EdgeStart(e)));
+        utils::push_back_all(v1, graph_.OutgoingEdges(graph_.EdgeStart(e)));
         std::vector<EdgeId> v2;
-        push_back_all(v2, graph_.IncomingEdges(graph_.EdgeEnd(e)));
+        utils::push_back_all(v2, graph_.IncomingEdges(graph_.EdgeEnd(e)));
         bool eq = (v1.size() == 2 && v2.size() == 2) && ((v1[0] == v2[0] && v1[1] == v2[1])    || (v1[0] == v2[1] && v1[0] == v2[1]));
         return !eq;
     }
diff --git a/src/common/modules/simplification/erroneous_connection_remover.hpp b/src/common/modules/simplification/erroneous_connection_remover.hpp
index f841913..1cba4d7 100644
--- a/src/common/modules/simplification/erroneous_connection_remover.hpp
+++ b/src/common/modules/simplification/erroneous_connection_remover.hpp
@@ -130,7 +130,7 @@ inline bool IsAlternativePathExist(const Graph &g, typename Graph::EdgeId e){
     VertexId end = g.EdgeEnd(e);
     TRACE("End " << g.str(end));
 
-    ProcessPaths(g, 0, std::numeric_limits<std::size_t>::max(), start, end, path_chooser, std::numeric_limits<std::size_t>::max());
+    ProcessPaths(g, 0, std::numeric_limits<std::size_t>::max(), start, end, path_chooser);
 
     const vector<EdgeId>& path = path_chooser.most_covered_path();
     double path_coverage = path_chooser.max_coverage();
@@ -156,7 +156,7 @@ inline bool IsAlternativeInclusivePathExist(const Graph &g, typename Graph::Edge
     VertexId end = g.EdgeEnd(forbidden_edge);
     TRACE("End " << g.str(end));
 
-    ProcessPaths(g, 0, std::numeric_limits<std::size_t>::max(), start, end, path_chooser, std::numeric_limits<std::size_t>::max());
+    ProcessPaths(g, 0, std::numeric_limits<std::size_t>::max(), start, end, path_chooser);
 
     const vector<EdgeId>& path = path_chooser.most_covered_path();
     double path_coverage = path_chooser.max_coverage();
@@ -182,8 +182,8 @@ inline bool IsReachableBulge(const Graph &g, typename Graph::EdgeId e){
     else{
         VertexId start = g.EdgeStart(e), end = g.EdgeEnd(e);
         vector<EdgeId> incident;
-        push_back_all(incident, g.IncomingEdges(end));
-        push_back_all(incident, g.OutgoingEdges(start));
+        utils::push_back_all(incident, g.IncomingEdges(end));
+        utils::push_back_all(incident, g.OutgoingEdges(start));
         for (auto it = incident.begin(); it != incident.end(); ++it){
             res = IsAlternativeInclusivePathExist(g, *it, e);
             if(res){
@@ -260,7 +260,7 @@ public:
 
     TopologicalThornCondition(Graph& g,
                               size_t max_jump_dist,
-                              size_t max_edge_cnt = -1ul)
+                              size_t max_edge_cnt = std::numeric_limits<size_t>::max())
             : base(g),
               max_jump_distance_(max_jump_dist),
               max_edge_cnt_(max_edge_cnt) {
@@ -490,7 +490,7 @@ class MetaHiddenECRemover: public PersistentProcessingAlgorithm<Graph, typename
             return false;
         }
         vector<EdgeId> edges;
-        push_back_all(edges, this->g().OutgoingEdges(v));
+        utils::push_back_all(edges, this->g().OutgoingEdges(v));
         VERIFY(edges.size() == 2);
         if (this->g().conjugate(edges[0]) != edges[1]) {
             return false;
@@ -554,7 +554,7 @@ class HiddenECRemover: public PersistentProcessingAlgorithm<Graph, typename Grap
         omnigraph::MultiplicityCounter<Graph> mult_counter(this->g(), uniqueness_length_, 8);
 
         vector<EdgeId> edges;
-        push_back_all(edges, this->g().OutgoingEdges(this->g().EdgeEnd(e)));
+        utils::push_back_all(edges, this->g().OutgoingEdges(this->g().EdgeEnd(e)));
         VERIFY(edges.size() == 2);
         return (this->g().conjugate(edges[0]) == edges[1] && mult_counter.count(e, this->g().EdgeStart(e)) <= 1) ||
                 this->g().length(e) >= uniqueness_length_;
diff --git a/src/common/modules/simplification/parallel_simplification_algorithms.hpp b/src/common/modules/simplification/parallel_simplification_algorithms.hpp
index f33075b..0517c7d 100644
--- a/src/common/modules/simplification/parallel_simplification_algorithms.hpp
+++ b/src/common/modules/simplification/parallel_simplification_algorithms.hpp
@@ -340,22 +340,7 @@ public:
     bool ShouldFilterConjugate() const {
         return true;
     }
-//    bool operator()(EdgeId e) {
-//        if (ec_condition_->Check(e)) {
-//            edges_to_remove_.push_back(e);
-//        }
-//        return false;
-//    }
-//
-//    void RemoveCollectedEdges() {
-//        omnigraph::SmartSetIterator<Graph, EdgeId> to_delete(g_, edges_to_remove_.begin(), edges_to_remove_.end());
-//        while (!to_delete.IsEnd()) {
-//            EdgeId e = *to_delete;
-//            handler_f_(e);
-//            g_.DeleteEdge(e);
-//            ++to_delete;
-//        }
-//    }
+
 private:
     DECL_LOGGER("ParallelLowCoverageFunctor");
 };
@@ -727,70 +712,6 @@ private:
     ;
 };
 
-template<class Graph, class ElementType>
-class SemiParallelAlgorithmRunner {
-    typedef typename Graph::VertexId VertexId;
-    typedef typename Graph::EdgeId EdgeId;
-
-    const Graph& g_;
-
-public:
-
-    const Graph& g() const {
-        return g_;
-    }
-
-    SemiParallelAlgorithmRunner(Graph& g)
-            : g_(g) {
-
-    }
-
-    template<class Algo, class ItVec, class Comparator = std::less<ElementType>>
-    bool RunFromChunkIterators(Algo& algo, const ItVec& chunk_iterators,
-            const Comparator& comp = Comparator()) {
-        VERIFY(chunk_iterators.size() > 1);
-        omnigraph::SmartSetIterator<Graph, ElementType, Comparator> it(g_, false, comp);
-
-        omnigraph::FindInterestingFromChunkIterators(chunk_iterators,
-                                          [&](ElementType el) {return algo.IsOfInterest(el);},
-                                          [&](ElementType el) {it.push(el);});
-
-        bool changed = false;
-        for (; !it.IsEnd(); ++it) {
-            changed |= algo.Process(*it);
-        }
-        return changed;
-    }
-
-private:
-    DECL_LOGGER("SemiParallelAlgorithmRunner");
-};
-
-template<class Graph>
-class SemiParallelEdgeRemovingAlgorithm {
-    typedef typename Graph::EdgeId EdgeId;
-    typedef typename Graph::VertexId VertexId;
-    Graph& g_;
-    func::TypedPredicate<EdgeId> condition_;
-    omnigraph::EdgeRemover<Graph> edge_remover_;
-
-public:
-    SemiParallelEdgeRemovingAlgorithm(Graph& g,
-                                      func::TypedPredicate<EdgeId> condition,
-                                      std::function<void(EdgeId)> removal_handler = 0) :
-            g_(g), condition_(condition), edge_remover_(g, removal_handler) {
-    }
-
-    bool IsOfInterest(EdgeId e) const {
-        return condition_(e);
-    }
-
-    bool Process(EdgeId e) {
-        edge_remover_.DeleteEdge(e);
-        return true;
-    }
-};
-
 template<class Graph, class AlgoRunner, class Algo>
 bool RunVertexAlgorithm(Graph& g, AlgoRunner& runner, Algo& algo, size_t chunk_cnt) {
     return runner.RunFromChunkIterators(algo, omnigraph::IterationHelper<Graph, typename Graph::VertexId>(g).Chunks(chunk_cnt));
@@ -801,6 +722,7 @@ bool RunEdgeAlgorithm(Graph& g, AlgoRunner& runner, Algo& algo, size_t chunk_cnt
     return runner.RunFromChunkIterators(algo, omnigraph::IterationHelper<Graph, typename Graph::EdgeId>(g).Chunks(chunk_cnt));
 }
 
+//Deprecated
 template<class Graph>
 void ParallelCompress(Graph &g, size_t chunk_cnt, bool loop_post_compression = true) {
     INFO("Parallel compression");
@@ -817,6 +739,7 @@ void ParallelCompress(Graph &g, size_t chunk_cnt, bool loop_post_compression = t
     }
 }
 
+//Deprecated
 template<class Graph>
 bool ParallelClipTips(Graph &g,
                       size_t max_length,
@@ -839,6 +762,7 @@ bool ParallelClipTips(Graph &g,
     return true;
 }
 
+//TODO review if can be useful... AFAIK never actually worked
 //template<class Graph>
 //bool ParallelRemoveBulges(Graph &g,
 //              const config::debruijn_config::simplification::bulge_remover &br_config,
@@ -867,6 +791,8 @@ bool ParallelClipTips(Graph &g,
 //    return true;
 //}
 
+//TODO looks obsolete
+//Deprecated
 template<class Graph>
 bool ParallelEC(Graph &g,
                 size_t max_length,
diff --git a/src/common/modules/simplification/relative_coverage_remover.hpp b/src/common/modules/simplification/relative_coverage_remover.hpp
index 177f5b6..15e9e40 100644
--- a/src/common/modules/simplification/relative_coverage_remover.hpp
+++ b/src/common/modules/simplification/relative_coverage_remover.hpp
@@ -253,7 +253,7 @@ public:
         DEBUG("Max local coverage incoming  - " << rel_helper_.MaxLocalCoverage(this->g().IncomingEdges(v), v));
         DEBUG("Max local coverage outgoing  - " << rel_helper_.MaxLocalCoverage(this->g().OutgoingEdges(v), v));
         return rel_helper_.AnyHighlyCoveredOnBothSides(v, coverage_edge_around_v) &&
-                HighCoverageComponentFinder<Graph>(this->g(), this->g().coverage(e) * diff_mult_)
+                HighCoverageComponentFinder<Graph>(this->g(), this->g().coverage(e) * diff_mult_, min_neighbourhood_size_)
                        .EdgeSummaryLength(v) >= min_neighbourhood_size_;
     }
 
@@ -332,7 +332,7 @@ public:
             if (cycle_detected_)
                 return -1u;
             VERIFY(max_distance_.count(v) > 0);
-            answer = std::max(answer, get(max_distance_, v));
+            answer = std::max(answer, utils::get(max_distance_, v));
         }
         VERIFY(answer >= 0);
         if (answer == 0)
@@ -546,7 +546,7 @@ class RelativeCovComponentFinder {
                 vertices.insert(g_.EdgeEnd(e));
             }
 
-            auto filename = success ? vis_dir_ + "/success/" + ToString(succ_cnt_++) : vis_dir_ + "/fail/" + ToString(fail_cnt_++);
+            auto filename = success ? vis_dir_ + "/success/" + std::to_string(succ_cnt_++) : vis_dir_ + "/fail/" + std::to_string(fail_cnt_++);
             visualization::visualization_utils::WriteComponent(
                     ComponentCloser<Graph>(g_, 0).CloseComponent(
                             GraphComponent<Graph>::FromVertices(g_, vertices)),
@@ -578,9 +578,9 @@ public:
         VERIFY(tip_allowing_length_bound >= length_bound);
         TRACE("Coverage gap " << min_coverage_gap);
         if (!vis_dir_.empty()) {
-            path::make_dirs(vis_dir_);
-            path::make_dirs(vis_dir_ + "/success/");
-            path::make_dirs(vis_dir_ + "/fail/");
+            fs::make_dirs(vis_dir_);
+            fs::make_dirs(vis_dir_ + "/success/");
+            fs::make_dirs(vis_dir_ + "/fail/");
         }
     }
 
@@ -662,7 +662,7 @@ public:
                       max_coverage, vertex_count_limit, vis_dir),
               component_remover_(g, handler_function) {
         this->interest_el_finder_ = std::make_shared<ParallelInterestingElementFinder<Graph, EdgeId>>(
-                [&](EdgeId e) { return finder_(e); }, chunk_cnt);
+            [&](EdgeId e) { return static_cast<bool>(finder_(e)); }, chunk_cnt);
     }
 
 protected:
diff --git a/src/common/modules/simplification/tip_clipper.hpp b/src/common/modules/simplification/tip_clipper.hpp
index 7f87d66..5841399 100644
--- a/src/common/modules/simplification/tip_clipper.hpp
+++ b/src/common/modules/simplification/tip_clipper.hpp
@@ -147,22 +147,19 @@ class ATCondition: public EdgeCondition<Graph> {
     typedef typename Graph::VertexId VertexId;
     typedef EdgeCondition<Graph> base;
     const double max_AT_percentage_;
-    const size_t max_tip_length_;
     const bool check_tip_ ;
 
 public:
 
-    ATCondition(const Graph& g, double max_AT_percentage, size_t max_tip_length, bool check_tip) :
-            base(g), max_AT_percentage_(max_AT_percentage), max_tip_length_(max_tip_length), check_tip_(check_tip) {
+    ATCondition(const Graph& g, double max_AT_percentage, bool check_tip) :
+            base(g), max_AT_percentage_(max_AT_percentage), check_tip_(check_tip) {
 		DEBUG("check_tip: " << check_tip_);
     }
 
     bool Check(EdgeId e) const {
         //+1 is a trick to deal with edges of 0 coverage from iterative run
+        //FIXME where is the trick?
         size_t start = 0;
-        //TODO: Do we need this check?
-        if(this->g().length(e) > max_tip_length_)
-            return false;
         size_t end = this->g().length(e) + this->g().k();
         if (check_tip_) {
             if (this->g().OutgoingEdgeCount(this->g().EdgeEnd(e)) == 0)
@@ -175,13 +172,12 @@ public:
         const Sequence &s_edge = this->g().EdgeNucls(e);
 
         for (size_t position = start; position < end; position ++) {
-            counts[s_edge[position]] ++;
+            counts[s_edge[position]]++;
         }
         size_t curm = *std::max_element(counts.begin(), counts.end());
-        if (curm > max_AT_percentage_ * double(end - start)) {
+        if (math::gr(double(curm), max_AT_percentage_ * double(end - start))) {
             DEBUG("deleting edge" << s_edge.str());;
 			DEBUG("curm: " << curm);
-			
             DEBUG("start end cutoff" << start << " " << end << " " << max_AT_percentage_ * double(this->g().length(e)));
 
             return true;
diff --git a/src/common/utils/mph_index/CMakeLists.txt b/src/common/paired_info/CMakeLists.txt
similarity index 67%
copy from src/common/utils/mph_index/CMakeLists.txt
copy to src/common/paired_info/CMakeLists.txt
index cf07729..509338e 100644
--- a/src/common/utils/mph_index/CMakeLists.txt
+++ b/src/common/paired_info/CMakeLists.txt
@@ -5,9 +5,7 @@
 # See file LICENSE for details.
 ############################################################################
 
-project(mph_index CXX)
-
-add_library(mph_index STATIC bitpair_vector.cpp)
-
-target_link_libraries(mph_index cityhash)
+project(paired_info CXX)
 
+add_library(paired_info STATIC
+        distance_estimation.cpp weighted_distance_estimation.cpp smoothing_distance_estimation.cpp)
diff --git a/src/common/paired_info/distance_estimation.cpp b/src/common/paired_info/distance_estimation.cpp
new file mode 100644
index 0000000..59569b7
--- /dev/null
+++ b/src/common/paired_info/distance_estimation.cpp
@@ -0,0 +1,176 @@
+#include "distance_estimation.hpp"
+
+namespace omnigraph {
+namespace de {
+
+using namespace debruijn_graph;
+
+std::vector<size_t> GraphDistanceFinder::GetGraphDistancesLengths(EdgeId e1, EdgeId e2) const {
+    LengthMap m;
+    m.insert({e2, {}});
+
+    FillGraphDistancesLengths(e1, m);
+
+    return m[e2];
+}
+
+void GraphDistanceFinder::FillGraphDistancesLengths(EdgeId e1, LengthMap &second_edges) const {
+    vector <size_t> path_lower_bounds;
+    size_t path_upper_bound = PairInfoPathLengthUpperBound(graph_.k(), insert_size_, delta_);
+    PathProcessor <Graph> paths_proc(graph_, graph_.EdgeEnd(e1), path_upper_bound);
+
+    for (auto &entry : second_edges) {
+        EdgeId e2 = entry.first;
+        size_t path_lower_bound = PairInfoPathLengthLowerBound(graph_.k(), graph_.length(e1),
+                                                               graph_.length(e2), gap_, delta_);
+
+        TRACE("Bounds for paths are " << path_lower_bound << " " << path_upper_bound);
+
+        DistancesLengthsCallback <Graph> callback(graph_);
+        paths_proc.Process(graph_.EdgeStart(e2), path_lower_bound, path_upper_bound, callback);
+        GraphLengths lengths = callback.distances();
+        for (size_t j = 0; j < lengths.size(); ++j) {
+            lengths[j] += graph_.length(e1);
+            TRACE("Resulting distance set for " <<
+                                                " edge " << graph_.int_id(e2) <<
+                                                " #" << j << " length " << lengths[j]);
+        }
+
+        if (e1 == e2)
+            lengths.push_back(0);
+
+        std::sort(lengths.begin(), lengths.end());
+        entry.second = lengths;
+    }
+}
+
+void AbstractDistanceEstimator::FillGraphDistancesLengths(EdgeId e1, LengthMap &second_edges) const {
+    distance_finder_.FillGraphDistancesLengths(e1, second_edges);
+}
+
+AbstractDistanceEstimator::OutHistogram AbstractDistanceEstimator::ClusterResult(EdgePair,
+                                                                                 const EstimHist &estimated) const {
+    OutHistogram result;
+    for (size_t i = 0; i < estimated.size(); ++i) {
+        size_t left = i;
+        DEWeight weight = DEWeight(estimated[i].second);
+        while (i + 1 < estimated.size() &&
+               (estimated[i + 1].first - estimated[i].first) <= (int) linkage_distance_) {
+            ++i;
+            weight += estimated[i].second;
+        }
+        DEDistance center = DEDistance((estimated[left].first + estimated[i].first) * 0.5);
+        DEVariance var = DEVariance((estimated[i].first - estimated[left].first) * 0.5);
+        result.insert(Point(center, weight, var));
+    }
+    return result;
+}
+
+void AbstractDistanceEstimator::AddToResult(const OutHistogram &clustered, EdgePair ep,
+                                            PairedInfoBuffer<Graph> &result) const  {
+    result.AddMany(ep.first, ep.second, clustered);
+}
+
+void DistanceEstimator::Estimate(PairedInfoIndexT<Graph> &result, size_t nthreads) const  {
+    this->Init();
+    const auto &index = this->index();
+
+    DEBUG("Collecting edge infos");
+    std::vector<EdgeId> edges;
+    for (auto it = this->graph().ConstEdgeBegin(); !it.IsEnd(); ++it)
+        edges.push_back(*it);
+
+    DEBUG("Processing");
+    PairedInfoBuffersT<Graph> buffer(this->graph(), nthreads);
+#   pragma omp parallel for num_threads(nthreads) schedule(guided, 10)
+    for (size_t i = 0; i < edges.size(); ++i) {
+        EdgeId edge = edges[i];
+        ProcessEdge(edge, index, buffer[omp_get_thread_num()]);
+    }
+
+    for (size_t i = 0; i < nthreads; ++i) {
+        result.Merge(buffer[i]);
+        buffer[i].clear();
+    }
+}
+
+DistanceEstimator::EstimHist DistanceEstimator::EstimateEdgePairDistances(EdgePair ep, const InHistogram &histogram,
+                                                                          const GraphLengths &raw_forward) const {
+    using std::abs;
+    using namespace math;
+    EdgeId e1 = ep.first, e2 = ep.second;
+    size_t first_len = this->graph().length(e1), second_len = this->graph().length(e2);
+    int minD = rounded_d(histogram.min()), maxD = rounded_d(histogram.max());
+
+    TRACE("Bounds are " << minD << " " << maxD);
+    EstimHist result;
+    vector<DEDistance> forward;
+    forward.reserve(raw_forward.size());
+    for (auto raw_length : raw_forward) {
+        int length = int(raw_length);
+        if (minD - int(max_distance_) <= length && length <= maxD + int(max_distance_))
+            forward.push_back(DEDistance(length));
+    }
+    if (forward.size() == 0)
+        return result;
+
+    size_t cur_dist = 0;
+    vector<DEWeight> weights(forward.size(), 0);
+    for (auto point : histogram) {
+        if (ls(2 * point.d + DEDistance(second_len), DEDistance(first_len)))
+            continue;
+        while (cur_dist + 1 < forward.size() && forward[cur_dist + 1] < point.d)
+            ++cur_dist;
+
+        if (cur_dist + 1 < forward.size() &&
+            ls(forward[cur_dist + 1] - point.d, point.d - forward[cur_dist])) {
+            ++cur_dist;
+
+            if (le(abs(forward[cur_dist] - point.d), max_distance_))
+                weights[cur_dist] += point.weight;
+        } else if (cur_dist + 1 < forward.size() &&
+                   eq(forward[cur_dist + 1] - point.d, point.d - forward[cur_dist])) {
+            if (le(abs(forward[cur_dist] - point.d), max_distance_))
+                weights[cur_dist] += point.weight * 0.5;
+            ++cur_dist;
+            if (le(abs(forward[cur_dist] - point.d), max_distance_))
+                weights[cur_dist] += point.weight * 0.5;
+        } else {
+            if (le(abs(forward[cur_dist] - point.d), max_distance_))
+                weights[cur_dist] += point.weight;
+        }
+    }
+
+    for (size_t i = 0; i < forward.size(); ++i)
+        if (ge(weights[i], DEWeight(0)))
+            result.push_back(make_pair(forward[i], weights[i]));
+
+    VERIFY(result.size() == forward.size());
+    return result;
+}
+
+void DistanceEstimator::ProcessEdge(EdgeId e1, const InPairedIndex &pi, PairedInfoBuffer<Graph> &result) const {
+    typename base::LengthMap second_edges;
+    auto inner_map = pi.GetHalf(e1);
+    for (auto i : inner_map)
+        second_edges[i.first];
+
+    this->FillGraphDistancesLengths(e1, second_edges);
+
+    for (const auto &entry: second_edges) {
+        EdgeId e2 = entry.first;
+        EdgePair ep(e1, e2);
+
+        VERIFY(ep <= pi.ConjugatePair(ep));
+
+        const GraphLengths &forward = entry.second;
+        TRACE("Edge pair is " << this->graph().int_id(ep.first)
+                              << " " << this->graph().int_id(ep.second));
+        auto hist = pi.Get(e1, e2);
+        const EstimHist &estimated = this->EstimateEdgePairDistances(ep, hist, forward);
+        OutHistogram res = this->ClusterResult(ep, estimated);
+        this->AddToResult(res, ep, result);
+    }
+}
+}
+}
diff --git a/src/common/paired_info/distance_estimation.hpp b/src/common/paired_info/distance_estimation.hpp
index 97663a4..938f1dc 100644
--- a/src/common/paired_info/distance_estimation.hpp
+++ b/src/common/paired_info/distance_estimation.hpp
@@ -8,154 +8,94 @@
 #ifndef DISTANCE_ESTIMATION_HPP_
 #define DISTANCE_ESTIMATION_HPP_
 
-#include "math/xmath.h"
-#include "utils/openmp_wrapper.h"
-
-#include "paired_info.hpp"
+#include "utils/parallel/openmp_wrapper.h"
+#include "assembly_graph/core/basic_graph_stats.hpp"
+#include "assembly_graph/core/graph.hpp"
 #include "assembly_graph/paths/path_processor.hpp"
+
 #include "paired_info/pair_info_bounds.hpp"
+#include "paired_info.hpp"
+#include "math/xmath.h"
 
 namespace omnigraph {
 
 namespace de {
 
 //todo move to some more common place
-template<class Graph>
 class GraphDistanceFinder {
-    typedef typename Graph::EdgeId EdgeId;
-    typedef typename Graph::VertexId VertexId;
-    typedef std::vector<EdgeId> Path;
+    typedef std::vector<debruijn_graph::EdgeId> Path;
     typedef std::vector<size_t> GraphLengths;
-    typedef std::map<EdgeId, GraphLengths> LengthMap;
+    typedef std::map<debruijn_graph::EdgeId, GraphLengths> LengthMap;
 
 public:
-    GraphDistanceFinder(const Graph &graph, size_t insert_size, size_t read_length, size_t delta) :
+    GraphDistanceFinder(const debruijn_graph::Graph &graph, size_t insert_size, size_t read_length, size_t delta) :
             graph_(graph), insert_size_(insert_size), gap_((int) (insert_size - 2 * read_length)),
             delta_((double) delta) { }
 
-    std::vector<size_t> GetGraphDistancesLengths(EdgeId e1, EdgeId e2) const {
-        LengthMap m;
-        m.insert({e2, {}});
-
-        FillGraphDistancesLengths(e1, m);
-
-        return m[e2];
-    }
+    std::vector<size_t> GetGraphDistancesLengths(debruijn_graph::EdgeId e1, debruijn_graph::EdgeId e2) const;
 
     // finds all distances from a current edge to a set of edges
-    void FillGraphDistancesLengths(EdgeId e1, LengthMap &second_edges) const {
-        vector<size_t> path_lower_bounds;
-
-        size_t path_upper_bound = PairInfoPathLengthUpperBound(graph_.k(), insert_size_, delta_);
-
-        PathProcessor<Graph> paths_proc(graph_, graph_.EdgeEnd(e1), path_upper_bound);
-
-        for (auto &entry : second_edges) {
-            EdgeId e2 = entry.first;
-            size_t path_lower_bound = PairInfoPathLengthLowerBound(graph_.k(), graph_.length(e1),
-                                                                   graph_.length(e2), gap_, delta_);
-
-            TRACE("Bounds for paths are " << path_lower_bound << " " << path_upper_bound);
-
-            DistancesLengthsCallback<Graph> callback(graph_);
-            paths_proc.Process(graph_.EdgeStart(e2), path_lower_bound, path_upper_bound, callback);
-            GraphLengths lengths = callback.distances();
-            for (size_t j = 0; j < lengths.size(); ++j) {
-                lengths[j] += graph_.length(e1);
-                TRACE("Resulting distance set for " <<
-                          " edge " << graph_.int_id(e2) <<
-                          " #" << j << " length " << lengths[j]);
-            }
-
-            if (e1 == e2)
-                lengths.push_back(0);
-
-            std::sort(lengths.begin(), lengths.end());
-            entry.second = lengths;
-        }
-    }
+    void FillGraphDistancesLengths(debruijn_graph::EdgeId e1, LengthMap &second_edges) const;
 
 private:
     DECL_LOGGER("GraphDistanceFinder");
-
-    const Graph &graph_;
+    const debruijn_graph::Graph &graph_;
     const size_t insert_size_;
     const int gap_;
     const double delta_;
 };
 
-template<class Graph>
 class AbstractDistanceEstimator {
 protected:
-    typedef UnclusteredPairedInfoIndexT<Graph> InPairedIndex;
-    typedef PairedInfoIndexT<Graph> OutPairedIndex;
+    typedef UnclusteredPairedInfoIndexT<debruijn_graph::Graph> InPairedIndex;
+    typedef PairedInfoIndexT<debruijn_graph::Graph> OutPairedIndex;
     typedef typename InPairedIndex::HistProxy InHistogram;
     typedef typename OutPairedIndex::Histogram OutHistogram;
 
 public:
-    AbstractDistanceEstimator(const Graph &graph,
+    AbstractDistanceEstimator(const debruijn_graph::Graph &graph,
                               const InPairedIndex &index,
-                              const GraphDistanceFinder<Graph> &distance_finder,
+                              const GraphDistanceFinder &distance_finder,
                               size_t linkage_distance = 0)
             : graph_(graph), index_(index),
               distance_finder_(distance_finder), linkage_distance_(linkage_distance) { }
 
-    virtual void Estimate(PairedInfoIndexT<Graph> &result, size_t nthreads) const = 0;
+    virtual void Estimate(PairedInfoIndexT<debruijn_graph::Graph> &result, size_t nthreads) const = 0;
 
     virtual ~AbstractDistanceEstimator() { }
 
 protected:
-    typedef typename Graph::EdgeId EdgeId;
-    typedef pair<EdgeId, EdgeId> EdgePair;
+    typedef pair<debruijn_graph::EdgeId, debruijn_graph::EdgeId> EdgePair;
     typedef vector<pair<int, double> > EstimHist;
     typedef vector<size_t> GraphLengths;
-    typedef std::map<EdgeId, GraphLengths> LengthMap;
+    typedef std::map<debruijn_graph::EdgeId, GraphLengths> LengthMap;
 
-    const Graph &graph() const { return graph_; }
+    const debruijn_graph::Graph &graph() const { return graph_; }
 
     const InPairedIndex &index() const { return index_; }
 
-    void FillGraphDistancesLengths(EdgeId e1, LengthMap &second_edges) const {
-        distance_finder_.FillGraphDistancesLengths(e1, second_edges);
-    }
+    void FillGraphDistancesLengths(debruijn_graph::EdgeId e1, LengthMap &second_edges) const;
 
-    OutHistogram ClusterResult(EdgePair /*ep*/, const EstimHist &estimated) const {
-        OutHistogram result;
-        for (size_t i = 0; i < estimated.size(); ++i) {
-            size_t left = i;
-            DEWeight weight = DEWeight(estimated[i].second);
-            while (i + 1 < estimated.size() &&
-                   (estimated[i + 1].first - estimated[i].first) <= (int) linkage_distance_) {
-                ++i;
-                weight += estimated[i].second;
-            }
-            DEDistance center = DEDistance((estimated[left].first + estimated[i].first) * 0.5);
-            DEVariance var = DEVariance((estimated[i].first - estimated[left].first) * 0.5);
-            result.insert(Point(center, weight, var));
-        }
-        return result;
-    }
+    OutHistogram ClusterResult(EdgePair /*ep*/, const EstimHist &estimated) const;
 
-    void AddToResult(const OutHistogram &clustered, EdgePair ep, PairedInfoBuffer<Graph> &result) const {
-        result.AddMany(ep.first, ep.second, clustered);
-    }
+    void AddToResult(const OutHistogram &clustered, EdgePair ep, PairedInfoBuffer<debruijn_graph::Graph> &result) const;
 
 private:
-    const Graph &graph_;
+    const debruijn_graph::Graph &graph_;
     const InPairedIndex &index_;
-    const GraphDistanceFinder<Graph> &distance_finder_;
+    const GraphDistanceFinder &distance_finder_;
     const size_t linkage_distance_;
 
     virtual const string Name() const = 0;
+
+    DECL_LOGGER("AbstractDistanceEstimator");
 };
 
-template<class Graph>
-class DistanceEstimator : public AbstractDistanceEstimator<Graph> {
-    typedef AbstractDistanceEstimator<Graph> base;
-    typedef typename Graph::EdgeId EdgeId;
+class DistanceEstimator : public AbstractDistanceEstimator {
+    typedef AbstractDistanceEstimator base;
     typedef vector<size_t> GraphLengths;
     typedef vector<pair<int, double> > EstimHist;
-    typedef pair<EdgeId, EdgeId> EdgePair;
+    typedef pair<debruijn_graph::EdgeId, debruijn_graph::EdgeId> EdgePair;
 
 protected:
     typedef typename base::InPairedIndex InPairedIndex;
@@ -164,9 +104,9 @@ protected:
     typedef typename base::OutHistogram OutHistogram;
 
 public:
-    DistanceEstimator(const Graph &graph,
+    DistanceEstimator(const debruijn_graph::Graph &graph,
                       const InPairedIndex &index,
-                      const GraphDistanceFinder<Graph> &distance_finder,
+                      const GraphDistanceFinder &distance_finder,
                       size_t linkage_distance, size_t max_distance)
             : base(graph, index, distance_finder, linkage_distance), max_distance_(max_distance) { }
 
@@ -176,114 +116,19 @@ public:
         INFO("Using " << this->Name() << " distance estimator");
     }
 
-    virtual void Estimate(OutPairedIndex &result, size_t nthreads) const {
-        this->Init();
-        const auto &index = this->index();
-
-        DEBUG("Collecting edge infos");
-        std::vector<EdgeId> edges;
-        for (auto it = this->graph().ConstEdgeBegin(); !it.IsEnd(); ++it)
-            edges.push_back(*it);
-
-        DEBUG("Processing");
-        PairedInfoBuffersT<Graph> buffer(this->graph(), nthreads);
-#   pragma omp parallel for num_threads(nthreads) schedule(guided, 10)
-        for (size_t i = 0; i < edges.size(); ++i) {
-            EdgeId edge = edges[i];
-            ProcessEdge(edge, index, buffer[omp_get_thread_num()]);
-        }
-
-        for (size_t i = 0; i < nthreads; ++i) {
-            result.Merge(buffer[i]);
-            buffer[i].clear();
-        }
-    }
+    virtual void Estimate(OutPairedIndex &result, size_t nthreads) const;
 
 protected:
     const DEDistance max_distance_;
 
     virtual EstimHist EstimateEdgePairDistances(EdgePair ep,
                                                 const InHistogram &histogram,
-                                                const GraphLengths &raw_forward) const {
-        using std::abs;
-        using namespace math;
-        EdgeId e1 = ep.first, e2 = ep.second;
-        size_t first_len = this->graph().length(e1), second_len = this->graph().length(e2);
-        int minD = rounded_d(histogram.min()), maxD = rounded_d(histogram.max());
-
-        TRACE("Bounds are " << minD << " " << maxD);
-        EstimHist result;
-        vector<DEDistance> forward;
-        forward.reserve(raw_forward.size());
-        for (auto raw_length : raw_forward) {
-            int length = int(raw_length);
-            if (minD - int(max_distance_) <= length && length <= maxD + int(max_distance_))
-                forward.push_back(DEDistance(length));
-        }
-        if (forward.size() == 0)
-            return result;
-
-        size_t cur_dist = 0;
-        vector<DEWeight> weights(forward.size(), 0);
-        for (auto point : histogram) {
-            if (ls(2 * point.d + DEDistance(second_len), DEDistance(first_len)))
-                continue;
-            while (cur_dist + 1 < forward.size() && forward[cur_dist + 1] < point.d)
-                ++cur_dist;
-
-            if (cur_dist + 1 < forward.size() &&
-                ls(forward[cur_dist + 1] - point.d, point.d - forward[cur_dist])) {
-                ++cur_dist;
-
-                if (le(abs(forward[cur_dist] - point.d), max_distance_))
-                    weights[cur_dist] += point.weight;
-            } else if (cur_dist + 1 < forward.size() &&
-                       eq(forward[cur_dist + 1] - point.d, point.d - forward[cur_dist])) {
-                if (le(abs(forward[cur_dist] - point.d), max_distance_))
-                    weights[cur_dist] += point.weight * 0.5;
-                ++cur_dist;
-                if (le(abs(forward[cur_dist] - point.d), max_distance_))
-                    weights[cur_dist] += point.weight * 0.5;
-            } else {
-                if (le(abs(forward[cur_dist] - point.d), max_distance_))
-                    weights[cur_dist] += point.weight;
-            }
-        }
-
-        for (size_t i = 0; i < forward.size(); ++i)
-            if (ge(weights[i], DEWeight(0)))
-                result.push_back(make_pair(forward[i], weights[i]));
-
-        VERIFY(result.size() == forward.size());
-        return result;
-    }
+                                                const GraphLengths &raw_forward) const;
 
 private:
-    virtual void ProcessEdge(EdgeId e1,
+    virtual void ProcessEdge(debruijn_graph::EdgeId e1,
                              const InPairedIndex &pi,
-                             PairedInfoBuffer<Graph> &result) const {
-        typename base::LengthMap second_edges;
-        auto inner_map = pi.GetHalf(e1);
-        for (auto i : inner_map)
-            second_edges[i.first];
-
-        this->FillGraphDistancesLengths(e1, second_edges);
-
-        for (const auto &entry: second_edges) {
-            EdgeId e2 = entry.first;
-            EdgePair ep(e1, e2);
-
-            VERIFY(ep <= pi.ConjugatePair(ep));
-
-            const GraphLengths &forward = entry.second;
-            TRACE("Edge pair is " << this->graph().int_id(ep.first)
-                  << " " << this->graph().int_id(ep.second));
-            auto hist = pi.Get(e1, e2);
-            const EstimHist &estimated = this->EstimateEdgePairDistances(ep, hist, forward);
-            OutHistogram res = this->ClusterResult(ep, estimated);
-            this->AddToResult(res, ep, result);
-        }
-    }
+                             PairedInfoBuffer<debruijn_graph::Graph> &result) const;
 
     virtual const string Name() const {
         static const string my_name = "SIMPLE";
diff --git a/src/common/paired_info/histogram.hpp b/src/common/paired_info/histogram.hpp
index d8983fc..266804c 100644
--- a/src/common/paired_info/histogram.hpp
+++ b/src/common/paired_info/histogram.hpp
@@ -8,8 +8,8 @@
 #pragma once
 
 #include <btree/btree_set.h>
-#include "common/adt/flat_set.hpp"
-#include "common/adt/small_pod_vector.hpp"
+#include "adt/flat_set.hpp"
+#include "adt/small_pod_vector.hpp"
 #include "index_point.hpp"
 
 namespace omnigraph {
diff --git a/src/common/paired_info/paired_info.hpp b/src/common/paired_info/paired_info.hpp
index 0bba662..ab2822f 100644
--- a/src/common/paired_info/paired_info.hpp
+++ b/src/common/paired_info/paired_info.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "common/adt/iterator_range.hpp"
+#include "adt/iterator_range.hpp"
 #include <boost/iterator/iterator_facade.hpp>
 #include <btree/safe_btree_map.h>
 
diff --git a/src/common/paired_info/smoothing_distance_estimation.cpp b/src/common/paired_info/smoothing_distance_estimation.cpp
new file mode 100644
index 0000000..bc10e24
--- /dev/null
+++ b/src/common/paired_info/smoothing_distance_estimation.cpp
@@ -0,0 +1,185 @@
+#include "smoothing_distance_estimation.hpp"
+
+namespace omnigraph {
+namespace de {
+
+using namespace debruijn_graph;
+
+SmoothingDistanceEstimator::EstimHist SmoothingDistanceEstimator::FindEdgePairDistances(EdgePair ep,
+                                                                                        const TempHistogram &raw_hist) const {
+    size_t first_len = this->graph().length(ep.first);
+    size_t second_len = this->graph().length(ep.second);
+    TRACE("Lengths are " << first_len << " " << second_len);
+    TempHistogram data;
+    for (auto I = raw_hist.begin(), E = raw_hist.end(); I != E; ++I) {
+        Point p = *I;
+        if (math::ge(2 * (long) rounded_d(p) + (long) second_len, (long) first_len)) if (
+                (long) rounded_d(p) + (long) OVERLAP_TOLERANCE >= (long) first_len)
+            data.insert(p);
+    }
+    EstimHist result;
+    double picture_weight = 0.;
+    for (auto I = data.begin(), E = data.end(); I != E; ++I)
+        picture_weight += I->weight;
+    if (math::ls(picture_weight, 3.))
+        return result;
+
+    DataDivider<EdgeId> data_divider(threshold_,
+                                     vector<Point>(data.begin(), data.end()));
+
+    PairInfos infos;
+    infos.reserve(data.size());
+    const vector<Interval> &clusters =
+            data_divider.DivideAndSmoothData(ep, infos, this->weight_f_);
+    DEBUG("Seeking for distances");
+    TRACE("size " << infos.size());
+
+    for (size_t i = 0; i < clusters.size(); ++i) {
+        size_t begin = clusters[i].first;
+        size_t end = clusters[i].second;
+        TRACE("begin " << begin << " at " << rounded_d(infos[begin])
+                       << ", " << " end " << end << " at " << rounded_d(infos[end - 1]));
+        size_t data_length = rounded_d(infos[end - 1]) - rounded_d(infos[begin]) + 1;
+        TRACE("data length " << data_length);
+        if (end - begin > min_peak_points_) {
+            size_t range = (size_t) math::round((double) data_length * range_coeff_);
+            size_t delta = (size_t) math::round((double) data_length * delta_coeff_);
+            PeakFinder<EdgeId> peakfinder(infos, begin, end, range, delta, percentage_, deriv_thr);
+            DEBUG("Processing window : " << rounded_d(infos[begin])
+                                         << " " << rounded_d(infos[end - 1]));
+            peakfinder.FFTSmoothing(cutoff_);
+            TRACE("Listing peaks");
+            const EstimHist &peaks = peakfinder.ListPeaks();
+            //for (auto iter = peaks.begin(); iter != peaks.end(); ++iter) {
+            //TRACE("PEAKS " << iter->first << " " << iter->second);
+            //}
+            if (peaks.size() == 0)
+                continue;
+            size_t index_of_max_weight = 0;
+            for (size_t i = 0; i < peaks.size(); ++i)
+                if (math::ls(peaks[index_of_max_weight].second, peaks[i].second))
+                    index_of_max_weight = i;
+            result.push_back(peaks[index_of_max_weight]);
+        }
+    }
+
+    if (result.size() == 0)
+        return result;
+    size_t index_of_max_weight = 0;
+    for (size_t i = 0; i < result.size(); ++i)
+        if (math::ls(result[index_of_max_weight].second, result[i].second))
+            index_of_max_weight = i;
+
+    EstimHist new_result;
+    for (size_t i = 0; i < result.size(); ++i)
+        if (result[i].second > .5 * result[index_of_max_weight].second)
+            new_result.push_back(result[i]);
+    return new_result;
+}
+
+void SmoothingDistanceEstimator::ProcessEdge(EdgeId e1, const InPairedIndex &pi,
+                                             PairedInfoBuffer<Graph> &result) const {
+    typename base::LengthMap second_edges;
+    auto inner_map = pi.GetHalf(e1);
+    for (auto I : inner_map)
+        second_edges[I.first];
+
+    this->FillGraphDistancesLengths(e1, second_edges);
+
+    for (const auto &entry: second_edges) {
+        EdgeId e2 = entry.first;
+        EdgePair ep(e1, e2);
+
+        VERIFY(ep <= pi.ConjugatePair(ep));
+
+        TRACE("Processing edge pair " << this->graph().int_id(e1)
+                                      << " " << this->graph().int_id(e2));
+        const GraphLengths &forward = entry.second;
+
+        auto hist = pi.Get(e1, e2).Unwrap();
+        EstimHist estimated;
+        //DEBUG("Extending paired information");
+        //DEBUG("Extend left");
+        //this->base::ExtendInfoLeft(e1, e2, hist, 1000);
+        DEBUG("Extend right");
+        this->ExtendInfoRight(e1, e2, hist, 1000);
+        if (forward.size() == 0) {
+            estimated = FindEdgePairDistances(ep, hist);
+            ++gap_distances;
+        } else if (forward.size() > 0 && (!only_scaffolding_)) {
+            //TODO: remove THIS
+            InPairedIndex temp_index(this->graph());
+            temp_index.AddMany(e1, e2, hist);
+            auto hist = temp_index.Get(e1, e2);
+            estimated = this->base::EstimateEdgePairDistances(ep, hist, forward);
+        }
+        DEBUG(gap_distances << " distances between gap edge pairs have been found");
+        OutHistogram res = this->ClusterResult(ep, estimated);
+        this->AddToResult(res, ep, result);
+    }
+}
+
+bool SmoothingDistanceEstimator::IsTipTip(EdgeId e1, EdgeId e2) const {
+    return (this->graph().OutgoingEdgeCount(this->graph().EdgeEnd(e1)) == 0 &&
+            this->graph().IncomingEdgeCount(this->graph().EdgeEnd(e1)) == 1 &&
+            this->graph().IncomingEdgeCount(this->graph().EdgeStart(e2)) == 0 &&
+            this->graph().OutgoingEdgeCount(this->graph().EdgeStart(e2)) == 1);
+}
+
+void SmoothingDistanceEstimator::MergeInto(const InHistogram &what, TempHistogram &where, int shift) const {
+    // assuming they are sorted already
+    if (what.size() == 0)
+        return;
+
+    if (where.size() == 0) {
+        for (auto to_be_added : what) {
+            to_be_added.d += shift;
+            where.insert(to_be_added);
+        }
+
+        return;
+    }
+
+    // Check, whether two histograms intersect. If not, we can just merge them
+    // straightforwardly.
+    if (math::ls(where.rbegin()->d, what.min().d + float(shift)) ||
+        math::gr(where.begin()->d, what.max().d + float(shift))) {
+        for (auto to_be_added : what) {
+            to_be_added.d += shift;
+            where.insert(to_be_added);
+        }
+    } else {
+        for (auto to_be_added : what) {
+            to_be_added.d += shift;
+            auto low_bound = std::lower_bound(where.begin(), where.end(), to_be_added);
+            if (low_bound != where.end() && to_be_added == *low_bound) {
+                to_be_added.weight += low_bound->weight;
+                where.erase(to_be_added);
+                where.insert(to_be_added);
+            } else
+                where.insert(low_bound, to_be_added);
+        }
+    }
+}
+
+void SmoothingDistanceEstimator::ExtendRightDFS(const EdgeId &first, EdgeId current, TempHistogram &data, int shift,
+                                                size_t max_shift) const {
+    auto end = this->graph().EdgeEnd(current);
+    if (current == first)
+        return;
+    if (this->graph().IncomingEdgeCount(end) > 1)
+        return;
+
+    for (EdgeId next : this->graph().OutgoingEdges(end)) {
+        auto hist = this->index().Get(first, next);
+        if (-shift < (int) max_shift)
+            ExtendRightDFS(first, next, data, shift - (int) this->graph().length(current), max_shift);
+
+        //auto filtered_infos = FilterPositive(hist, this->graph().length(first), this->graph().length(next));
+        //if (filtered_infos.size() > 0)
+        //  MergeInto(filtered_infos, data, shift - (int) this->graph().length(current));
+        MergeInto(hist, data, shift - (int) this->graph().length(current));
+    }
+}
+}
+}
diff --git a/src/common/paired_info/smoothing_distance_estimation.hpp b/src/common/paired_info/smoothing_distance_estimation.hpp
index c605e00..04e24b6 100644
--- a/src/common/paired_info/smoothing_distance_estimation.hpp
+++ b/src/common/paired_info/smoothing_distance_estimation.hpp
@@ -8,21 +8,19 @@
 #ifndef SMOOTHING_DISTANCE_ESTIMATION_HPP_
 #define SMOOTHING_DISTANCE_ESTIMATION_HPP_
 
-#include "paired_info.hpp"
+#include "weighted_distance_estimation.hpp"
 #include "data_divider.hpp"
 #include "peak_finder.hpp"
-#include "weighted_distance_estimation.hpp"
 
 namespace omnigraph {
 
 namespace de {
 
-template<class Graph>
-class SmoothingDistanceEstimator : public WeightedDistanceEstimator<Graph> {
+class SmoothingDistanceEstimator : public WeightedDistanceEstimator {
     //FIXME configure
     static const size_t OVERLAP_TOLERANCE = 1000;
 protected:
-    typedef WeightedDistanceEstimator<Graph> base;
+    typedef WeightedDistanceEstimator base;
     typedef typename base::InPairedIndex InPairedIndex;
     typedef typename base::OutPairedIndex OutPairedIndex;
     typedef typename base::InHistogram InHistogram;
@@ -30,9 +28,9 @@ protected:
     typedef typename InPairedIndex::Histogram TempHistogram;
 
 public:
-    SmoothingDistanceEstimator(const Graph &graph,
+    SmoothingDistanceEstimator(const debruijn_graph::Graph &graph,
                                const InPairedIndex &histogram,
-                               const GraphDistanceFinder<Graph> &dist_finder,
+                               const GraphDistanceFinder &dist_finder,
                                std::function<double(int)> weight_f,
                                size_t linkage_distance, size_t max_distance, size_t threshold,
                                double range_coeff, double delta_coeff,
@@ -57,10 +55,9 @@ public:
     virtual ~SmoothingDistanceEstimator() { }
 
 protected:
-    typedef typename Graph::EdgeId EdgeId;
-    typedef pair<EdgeId, EdgeId> EdgePair;
+    typedef pair<debruijn_graph::EdgeId, debruijn_graph::EdgeId> EdgePair;
     typedef vector<pair<int, double> > EstimHist;
-    typedef vector<PairInfo<EdgeId> > PairInfos;
+    typedef vector<PairInfo<debruijn_graph::EdgeId> > PairInfos;
     typedef vector<size_t> GraphLengths;
 
     EstimHist EstimateEdgePairDistances(EdgePair /*ep*/,
@@ -87,186 +84,23 @@ private:
     mutable size_t gap_distances;
 
     EstimHist FindEdgePairDistances(EdgePair ep,
-                                    const TempHistogram &raw_hist) const {
-        size_t first_len = this->graph().length(ep.first);
-        size_t second_len = this->graph().length(ep.second);
-        TRACE("Lengths are " << first_len << " " << second_len);
-        TempHistogram data;
-        for (auto I = raw_hist.begin(), E = raw_hist.end(); I != E; ++I) {
-            Point p = *I;
-            if (math::ge(2 * (long) rounded_d(p) + (long) second_len, (long) first_len)) if (
-                    (long) rounded_d(p) + (long) OVERLAP_TOLERANCE >= (long) first_len)
-                data.insert(p);
-        }
-        EstimHist result;
-        double picture_weight = 0.;
-        for (auto I = data.begin(), E = data.end(); I != E; ++I)
-            picture_weight += I->weight;
-        if (math::ls(picture_weight, 3.))
-            return result;
-
-        DataDivider<EdgeId> data_divider(threshold_,
-                                         vector<Point>(data.begin(), data.end()));
-
-        PairInfos infos;
-        infos.reserve(data.size());
-        const vector<Interval> &clusters =
-                data_divider.DivideAndSmoothData(ep, infos, this->weight_f_);
-        DEBUG("Seeking for distances");
-        TRACE("size " << infos.size());
-
-        for (size_t i = 0; i < clusters.size(); ++i) {
-            size_t begin = clusters[i].first;
-            size_t end = clusters[i].second;
-            TRACE("begin " << begin << " at " << rounded_d(infos[begin])
-                  << ", " << " end " << end << " at " << rounded_d(infos[end - 1]));
-            size_t data_length = rounded_d(infos[end - 1]) - rounded_d(infos[begin]) + 1;
-            TRACE("data length " << data_length);
-            if (end - begin > min_peak_points_) {
-                size_t range = (size_t) math::round((double) data_length * range_coeff_);
-                size_t delta = (size_t) math::round((double) data_length * delta_coeff_);
-                PeakFinder<EdgeId> peakfinder(infos, begin, end, range, delta, percentage_, deriv_thr);
-                DEBUG("Processing window : " << rounded_d(infos[begin])
-                      << " " << rounded_d(infos[end - 1]));
-                peakfinder.FFTSmoothing(cutoff_);
-                TRACE("Listing peaks");
-                const EstimHist &peaks = peakfinder.ListPeaks();
-                //for (auto iter = peaks.begin(); iter != peaks.end(); ++iter) {
-                //TRACE("PEAKS " << iter->first << " " << iter->second);
-                //}
-                if (peaks.size() == 0)
-                    continue;
-                size_t index_of_max_weight = 0;
-                for (size_t i = 0; i < peaks.size(); ++i)
-                    if (math::ls(peaks[index_of_max_weight].second, peaks[i].second))
-                        index_of_max_weight = i;
-                result.push_back(peaks[index_of_max_weight]);
-            }
-        }
-
-        if (result.size() == 0)
-            return result;
-        size_t index_of_max_weight = 0;
-        for (size_t i = 0; i < result.size(); ++i)
-            if (math::ls(result[index_of_max_weight].second, result[i].second))
-                index_of_max_weight = i;
+                                    const TempHistogram &raw_hist) const;
 
-        EstimHist new_result;
-        for (size_t i = 0; i < result.size(); ++i)
-            if (result[i].second > .5 * result[index_of_max_weight].second)
-                new_result.push_back(result[i]);
-        return new_result;
-    }
-
-    void ProcessEdge(EdgeId e1,
+    void ProcessEdge(debruijn_graph::EdgeId e1,
                      const InPairedIndex &pi,
-                     PairedInfoBuffer<Graph> &result) const override {
-        typename base::LengthMap second_edges;
-        auto inner_map = pi.GetHalf(e1);
-        for (auto I : inner_map)
-            second_edges[I.first];
-
-        this->FillGraphDistancesLengths(e1, second_edges);
-
-        for (const auto &entry: second_edges) {
-            EdgeId e2 = entry.first;
-            EdgePair ep(e1, e2);
-
-            VERIFY(ep <= pi.ConjugatePair(ep));
+                     PairedInfoBuffer<debruijn_graph::Graph> &result) const override;
 
-            TRACE("Processing edge pair " << this->graph().int_id(e1)
-                  << " " << this->graph().int_id(e2));
-            const GraphLengths &forward = entry.second;
+    bool IsTipTip(debruijn_graph::EdgeId e1, debruijn_graph::EdgeId e2) const;
 
-            auto hist = pi.Get(e1, e2).Unwrap();
-            EstimHist estimated;
-            //DEBUG("Extending paired information");
-            //DEBUG("Extend left");
-            //this->base::ExtendInfoLeft(e1, e2, hist, 1000);
-            DEBUG("Extend right");
-            this->ExtendInfoRight(e1, e2, hist, 1000);
-            if (forward.size() == 0) {
-                estimated = FindEdgePairDistances(ep, hist);
-                ++gap_distances;
-            } else if (forward.size() > 0 && (!only_scaffolding_)) {
-                //TODO: remove THIS
-                InPairedIndex temp_index(this->graph());
-                temp_index.AddMany(e1, e2, hist);
-                auto hist = temp_index.Get(e1, e2);
-                estimated = this->base::EstimateEdgePairDistances(ep, hist, forward);
-            }
-            DEBUG(gap_distances << " distances between gap edge pairs have been found");
-            OutHistogram res = this->ClusterResult(ep, estimated);
-            this->AddToResult(res, ep, result);
-        }
-    }
-
-    bool IsTipTip(EdgeId e1, EdgeId e2) const {
-        return (this->graph().OutgoingEdgeCount(this->graph().EdgeEnd(e1)) == 0 &&
-                this->graph().IncomingEdgeCount(this->graph().EdgeEnd(e1)) == 1 &&
-                this->graph().IncomingEdgeCount(this->graph().EdgeStart(e2)) == 0 &&
-                this->graph().OutgoingEdgeCount(this->graph().EdgeStart(e2)) == 1);
-    }
-
-    void ExtendInfoRight(EdgeId e1, EdgeId e2, TempHistogram &data, size_t max_shift) const {
+    void ExtendInfoRight(debruijn_graph::EdgeId e1, debruijn_graph::EdgeId e2, TempHistogram &data,
+                         size_t max_shift) const {
         ExtendRightDFS(e1, e2, data, 0, max_shift);
     }
 
-    void MergeInto(const InHistogram &what, TempHistogram &where, int shift) const {
-        // assuming they are sorted already
-        if (what.size() == 0)
-            return;
-
-        if (where.size() == 0) {
-            for (auto to_be_added : what) {
-                to_be_added.d += shift;
-                where.insert(to_be_added);
-            }
-
-            return;
-        }
+    void MergeInto(const InHistogram &what, TempHistogram &where, int shift) const;
 
-        // Check, whether two histograms intersect. If not, we can just merge them
-        // straightforwardly.
-        if (math::ls(where.rbegin()->d, what.min().d + float(shift)) ||
-            math::gr(where.begin()->d, what.max().d + float(shift))) {
-            for (auto to_be_added : what) {
-                to_be_added.d += shift;
-                where.insert(to_be_added);
-            }
-        } else {
-            for (auto to_be_added : what) {
-                to_be_added.d += shift;
-                auto low_bound = std::lower_bound(where.begin(), where.end(), to_be_added);
-                if (low_bound != where.end() && to_be_added == *low_bound) {
-                    to_be_added.weight += low_bound->weight;
-                    where.erase(to_be_added);
-                    where.insert(to_be_added);
-                } else
-                    where.insert(low_bound, to_be_added);
-            }
-        }
-    }
-
-    void ExtendRightDFS(const EdgeId &first, EdgeId current, TempHistogram &data, int shift,
-                        size_t max_shift) const {
-        auto end = this->graph().EdgeEnd(current);
-        if (current == first)
-            return;
-        if (this->graph().IncomingEdgeCount(end) > 1)
-            return;
-
-        for (EdgeId next : this->graph().OutgoingEdges(end)) {
-            auto hist = this->index().Get(first, next);
-            if (-shift < (int) max_shift)
-                ExtendRightDFS(first, next, data, shift - (int) this->graph().length(current), max_shift);
-
-            //auto filtered_infos = FilterPositive(hist, this->graph().length(first), this->graph().length(next));
-            //if (filtered_infos.size() > 0)
-            //  MergeInto(filtered_infos, data, shift - (int) this->graph().length(current));
-            MergeInto(hist, data, shift - (int) this->graph().length(current));
-        }
-    }
+    void ExtendRightDFS(const debruijn_graph::EdgeId &first, debruijn_graph::EdgeId current, TempHistogram &data,
+                        int shift, size_t max_shift) const;
 
     const string Name() const override {
         static const string my_name = "SMOOTHING";
diff --git a/src/common/paired_info/split_path_constructor.hpp b/src/common/paired_info/split_path_constructor.hpp
index d2d23b2..99d8482 100644
--- a/src/common/paired_info/split_path_constructor.hpp
+++ b/src/common/paired_info/split_path_constructor.hpp
@@ -111,7 +111,7 @@ public:
                 for (size_t j = 0; j < common_part.size(); ++j)
                     total_length += graph_.length(common_part[j]);
 
-                DEBUG("Common part " << ToString(common_part));
+                DEBUG("Common part " << std::to_string(total_length));
                 for (size_t j = 0; j < common_part.size(); ++j) {
                     PairInfo cur_pi(cur_edge, common_part[j],
                                     cur_info.d() - (double) total_length,
diff --git a/src/common/paired_info/weighted_distance_estimation.cpp b/src/common/paired_info/weighted_distance_estimation.cpp
new file mode 100644
index 0000000..93638d3
--- /dev/null
+++ b/src/common/paired_info/weighted_distance_estimation.cpp
@@ -0,0 +1,63 @@
+#include "weighted_distance_estimation.hpp"
+
+namespace omnigraph {
+namespace de {
+
+using namespace debruijn_graph;
+
+WeightedDistanceEstimator::EstimHist WeightedDistanceEstimator::EstimateEdgePairDistances(EdgePair ep,
+                                                                                          const InHistogram &histogram,
+                                                                                          const GraphLengths &raw_forward) const {
+    using std::abs;
+    using namespace math;
+    TRACE("Estimating with weight function");
+    size_t first_len = this->graph().length(ep.first);
+    size_t second_len = this->graph().length(ep.second);
+
+    EstimHist result;
+    int maxD = rounded_d(histogram.max()), minD = rounded_d(histogram.min());
+    vector<int> forward;
+    for (auto len : raw_forward) {
+        int length = (int) len;
+        if (minD - (int) this->max_distance_ <= length && length <= maxD + (int) this->max_distance_) {
+            forward.push_back(length);
+        }
+    }
+    if (forward.size() == 0)
+        return result;
+
+    DEDistance max_dist = this->max_distance_;
+    size_t i = 0;
+    vector<double> weights(forward.size());
+    for (auto point : histogram) {
+        DEDistance cur_dist(forward[i]), next_dist(forward[i + 1]);
+        if (le(2 * point.d + DEDistance(second_len), DEDistance(first_len)))
+            continue;
+        while (i + 1 < forward.size() && next_dist < point.d) {
+            ++i;
+        }
+        if (i + 1 < forward.size() && ls(DEDistance(next_dist) - point.d, point.d - DEDistance(cur_dist))) {
+            ++i;
+            if (le(abs(cur_dist - point.d), max_dist))
+                weights[i] += point.weight * weight_f_(forward[i] - rounded_d(point));
+        }
+        else if (i + 1 < forward.size() && eq(next_dist - point.d, point.d - cur_dist)) {
+            if (le(abs(cur_dist - point.d), max_dist))
+                weights[i] += point.weight * 0.5 * weight_f_(forward[i] - rounded_d(point));
+
+            ++i;
+
+            if (le(abs(cur_dist - point.d), max_dist))
+                weights[i] += point.weight * 0.5 * weight_f_(forward[i] - rounded_d(point));
+        } else if (le(abs(cur_dist - point.d), max_dist))
+            weights[i] += point.weight * weight_f_(forward[i] - rounded_d(point));
+    }
+
+    for (size_t i = 0; i < forward.size(); ++i)
+        if (gr(weights[i], 0.))
+            result.push_back(make_pair(forward[i], weights[i]));
+
+    return result;
+}
+}
+}
diff --git a/src/common/paired_info/weighted_distance_estimation.hpp b/src/common/paired_info/weighted_distance_estimation.hpp
index 486a608..585c704 100644
--- a/src/common/paired_info/weighted_distance_estimation.hpp
+++ b/src/common/paired_info/weighted_distance_estimation.hpp
@@ -8,27 +8,24 @@
 #ifndef WEIGHTED_DISTANCE_ESTIMATION_HPP_
 #define WEIGHTED_DISTANCE_ESTIMATION_HPP_
 
-#include "math/xmath.h"
-#include "paired_info.hpp"
 #include "distance_estimation.hpp"
 
 namespace omnigraph {
 
 namespace de {
 
-template<class Graph>
-class WeightedDistanceEstimator : public DistanceEstimator<Graph> {
+class WeightedDistanceEstimator : public DistanceEstimator {
 protected:
-    typedef DistanceEstimator<Graph> base;
+    typedef DistanceEstimator base;
     typedef typename base::InPairedIndex InPairedIndex;
     typedef typename base::OutPairedIndex OutPairedIndex;
     typedef typename base::InHistogram InHistogram;
     typedef typename base::OutHistogram OutHistogram;
 
 public:
-    WeightedDistanceEstimator(const Graph &graph,
+    WeightedDistanceEstimator(const debruijn_graph::Graph &graph,
                               const InPairedIndex &histogram,
-                              const GraphDistanceFinder<Graph> &distance_finder,
+                              const GraphDistanceFinder &distance_finder,
                               std::function<double(int)> weight_f,
                               size_t linkage_distance, size_t max_distance) :
             base(graph, histogram, distance_finder, linkage_distance, max_distance), weight_f_(weight_f) { }
@@ -36,74 +33,24 @@ public:
     virtual ~WeightedDistanceEstimator() { }
 
 protected:
-    typedef typename Graph::EdgeId EdgeId;
 
     typedef vector<pair<int, double> > EstimHist;
-    typedef pair<EdgeId, EdgeId> EdgePair;
+    typedef pair<debruijn_graph::EdgeId, debruijn_graph::EdgeId> EdgePair;
     typedef vector<size_t> GraphLengths;
 
     std::function<double(int)> weight_f_;
 
     virtual EstimHist EstimateEdgePairDistances(EdgePair ep,
                                                 const InHistogram &histogram,
-                                                const GraphLengths &raw_forward) const override {
-        using std::abs;
-        using namespace math;
-        TRACE("Estimating with weight function");
-        size_t first_len = this->graph().length(ep.first);
-        size_t second_len = this->graph().length(ep.second);
-
-        EstimHist result;
-        int maxD = rounded_d(histogram.max()), minD = rounded_d(histogram.min());
-        vector<int> forward;
-        for (auto len : raw_forward) {
-            int length = (int) len;
-            if (minD - (int) this->max_distance_ <= length && length <= maxD + (int) this->max_distance_) {
-                forward.push_back(length);
-            }
-        }
-        if (forward.size() == 0)
-            return result;
-
-        DEDistance max_dist = this->max_distance_;
-        size_t i = 0;
-        vector<double> weights(forward.size());
-        for (auto point : histogram) {
-            DEDistance cur_dist(forward[i]), next_dist(forward[i + 1]);
-            if (le(2 * point.d + DEDistance(second_len), DEDistance(first_len)))
-                continue;
-            while (i + 1 < forward.size() && next_dist < point.d) {
-                ++i;
-            }
-            if (i + 1 < forward.size() && ls(DEDistance(next_dist) - point.d, point.d - DEDistance(cur_dist))) {
-                ++i;
-                if (le(abs(cur_dist - point.d), max_dist))
-                    weights[i] += point.weight * weight_f_(forward[i] - rounded_d(point));
-            }
-            else if (i + 1 < forward.size() && eq(next_dist - point.d, point.d - cur_dist)) {
-                if (le(abs(cur_dist - point.d), max_dist))
-                    weights[i] += point.weight * 0.5 * weight_f_(forward[i] - rounded_d(point));
-
-                ++i;
-
-                if (le(abs(cur_dist - point.d), max_dist))
-                    weights[i] += point.weight * 0.5 * weight_f_(forward[i] - rounded_d(point));
-            } else if (le(abs(cur_dist - point.d), max_dist))
-                weights[i] += point.weight * weight_f_(forward[i] - rounded_d(point));
-        }
-
-        for (size_t i = 0; i < forward.size(); ++i)
-            if (gr(weights[i], 0.))
-                result.push_back(make_pair(forward[i], weights[i]));
-
-        return result;
-    }
+                                                const GraphLengths &raw_forward) const override;
 
     const string Name() const override {
         static const string my_name = "WEIGHTED";
         return my_name;
     }
 
+private:
+    DECL_LOGGER("WeightedDistanceEstimator");
 };
 
 }
diff --git a/src/common/paired_info/weights.hpp b/src/common/paired_info/weights.hpp
index c0e8b43..b5a5a5e 100644
--- a/src/common/paired_info/weights.hpp
+++ b/src/common/paired_info/weights.hpp
@@ -1,11 +1,10 @@
 #pragma once
 
+namespace debruijn_graph {
 using omnigraph::Path;
 using omnigraph::MappingPath;
-using omnigraph::Range;
 using omnigraph::MappingRange;
 
-namespace debruijn_graph {
 inline double PairedReadCountWeight(const std::pair<EdgeId, EdgeId>&,
                                     const MappingRange&, const MappingRange&) {
     return 1.;
diff --git a/src/common/pipeline/config_common.hpp b/src/common/pipeline/config_common.hpp
index 0f38490..b877035 100755
--- a/src/common/pipeline/config_common.hpp
+++ b/src/common/pipeline/config_common.hpp
@@ -7,8 +7,8 @@
 
 #pragma once
 
-#include "utils/simple_tools.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/stl_utils.hpp"
+#include "utils/filesystem/path_helper.hpp"
 #include "utils/verify.hpp"
 
 // todo: undo dirty fix
@@ -82,12 +82,12 @@ void load(std::vector <T> &vec, boost::property_tree::ptree const &pt, std::stri
         return;
     }
     for (size_t i = 1; ; i++) {
-        value = pt.get_optional<std::string>(key + "#" + ToString(i));
+        value = pt.get_optional<std::string>(key + "#" + std::to_string(i));
         if (value) {
             vec.push_back(*value);
             continue;
         }
-        value = pt.get_optional<std::string>(key + "." + ToString(i));
+        value = pt.get_optional<std::string>(key + "." + std::to_string(i));
         if (value) {
             vec.push_back(*value);
             continue;
diff --git a/src/common/pipeline/config_struct.cpp b/src/common/pipeline/config_struct.cpp
index ad5795c..6b306fa 100644
--- a/src/common/pipeline/config_struct.cpp
+++ b/src/common/pipeline/config_struct.cpp
@@ -7,7 +7,7 @@
 #include "pipeline/config_struct.hpp"
 
 #include "pipeline/config_common.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include "utils/logger/logger.hpp"
 #include "utils/verify.hpp"
@@ -249,6 +249,7 @@ void load(debruijn_config::simplification::bulge_remover& br,
   load(br.max_delta,                        pt,     "max_delta", complete);
   load(br.max_relative_delta,               pt,     "max_relative_delta", complete);
   load(br.max_number_edges,                 pt,     "max_number_edges", complete);
+  load(br.dijkstra_vertex_limit,            pt,     "dijkstra_vertex_limit", complete);
   load(br.parallel,                         pt,     "parallel", complete);
   load(br.buff_size,                        pt,     "buff_size", complete);
   load(br.buff_cov_diff,                    pt,     "buff_cov_diff", complete);
@@ -292,13 +293,22 @@ void load(debruijn_config::simplification::relative_coverage_comp_remover& rcc,
   load(rcc.max_coverage_coeff, pt, "max_coverage_coeff", complete);
 }
 
-void load(debruijn_config::simplification::isolated_edges_remover& ier,
+void load(debruijn_config::simplification::isolated_edge_remover& ier,
           boost::property_tree::ptree const& pt, bool complete) {
   using config_common::load;
   load(ier.enabled, pt, "enabled", complete);
   load(ier.max_length, pt, "max_length", complete);
+  load(ier.use_rl_for_max_length, pt, "use_rl_for_max_length", complete);
   load(ier.max_coverage, pt, "max_coverage", complete);
   load(ier.max_length_any_cov, pt, "max_length_any_cov", complete);
+  load(ier.use_rl_for_max_length_any_cov, pt, "use_rl_for_max_length_any_cov", complete);
+}
+
+void load(debruijn_config::simplification::low_covered_edge_remover& lcer,
+          boost::property_tree::ptree const& pt, bool complete) {
+  using config_common::load;
+  load(lcer.enabled, pt, "lcer_enabled", complete);
+  load(lcer.coverage_threshold, pt, "lcer_coverage_threshold", complete);
 }
 
 void load(debruijn_config::simplification::init_cleaning& init_clean,
@@ -517,6 +527,13 @@ void load(debruijn_config::graph_read_corr_cfg& graph_read_corr,
   load(graph_read_corr.binary, pt, "binary");
 }
 
+void load(debruijn_config::strand_specificity& ss,
+          boost::property_tree::ptree const& pt, bool /*complete*/) {
+    using config_common::load;
+    load(ss.ss_enabled, pt, "ss_enabled");
+    load(ss.antisense, pt, "antisense");
+}
+
 void load(debruijn_config::kmer_coverage_model& kcm,
           boost::property_tree::ptree const& pt, bool /*complete*/) {
   using config_common::load;
@@ -544,23 +561,27 @@ void load_reads(dataset &ds,
                 std::string input_dir) {
     if (ds.reads_filename[0] != '/')
         ds.reads_filename = input_dir + ds.reads_filename;
-    path::CheckFileExistenceFATAL(ds.reads_filename);
+    fs::CheckFileExistenceFATAL(ds.reads_filename);
     ds.reads.load(ds.reads_filename);
 }
 
 void load_reference_genome(dataset &ds,
                            std::string input_dir) {
     if (ds.reference_genome_filename == "") {
-        ds.reference_genome = "";
+        ds.reference_genome = vector<std::string>(0);
         return;
     }
     if (ds.reference_genome_filename[0] != '/')
         ds.reference_genome_filename = input_dir + ds.reference_genome_filename;
-    path::CheckFileExistenceFATAL(ds.reference_genome_filename);
+    fs::CheckFileExistenceFATAL(ds.reference_genome_filename);
     io::FileReadStream genome_stream(ds.reference_genome_filename);
-    io::SingleRead genome;
-    genome_stream >> genome;
-    ds.reference_genome = genome.GetSequenceString();
+    while (!genome_stream.eof()) {
+        io::SingleRead genome;
+        genome_stream >> genome;
+        ds.reference_genome.push_back(genome.GetSequenceString());
+    }
+
+
 }
 
 void load(debruijn_config::simplification& simp,
@@ -591,8 +612,6 @@ void load(debruijn_config::simplification& simp,
   load(simp.init_clean, pt, "init_clean", complete); // presimplification
   load(simp.final_tc, pt, "final_tc", complete);
   load(simp.final_br, pt, "final_br", complete);
-  simp.second_final_br = simp.final_br;
-  load(simp.second_final_br, pt, "second_final_br", false);
 }
 
 void load(debruijn_config::info_printer& printer,
@@ -650,7 +669,7 @@ void load_launch_info(debruijn_config &cfg, boost::property_tree::ptree const &p
     // input options:
     load(cfg.dataset_file, pt, "dataset");
     // input dir is based on dataset file location (all paths in datasets are relative to its location)
-    cfg.input_dir = path::parent_path(cfg.dataset_file);
+    cfg.input_dir = fs::parent_path(cfg.dataset_file);
     if (cfg.input_dir[cfg.input_dir.length() - 1] != '/')
         cfg.input_dir += '/';
 
@@ -702,7 +721,7 @@ void load_launch_info(debruijn_config &cfg, boost::property_tree::ptree const &p
 
     load(cfg.max_memory, pt, "max_memory");
 
-    path::CheckFileExistenceFATAL(cfg.dataset_file);
+    fs::CheckFileExistenceFATAL(cfg.dataset_file);
     boost::property_tree::ptree ds_pt;
     boost::property_tree::read_info(cfg.dataset_file, ds_pt);
     load(cfg.ds, ds_pt, true);
@@ -725,8 +744,6 @@ void load_cfg(debruijn_config &cfg, boost::property_tree::ptree const &pt,
 
     load(cfg.co, pt, "contig_output", complete);
 
-    load(cfg.use_unipaths, pt, "use_unipaths", complete);
-
     load(cfg.pb, pt, "pacbio_processor", complete);
 
     load(cfg.two_step_rr, pt, "two_step_rr", complete);
@@ -753,6 +770,8 @@ void load_cfg(debruijn_config &cfg, boost::property_tree::ptree const &pt,
     load(cfg.flanking_range, pt, "flanking_range", complete);
     load(cfg.graph_read_corr, pt, "graph_read_corr", complete);
     load(cfg.kcm, pt, "kmer_coverage_model", complete);
+    //TODO come up with a fix to this hack
+    load(cfg.simp.lcer, pt, "lcer", complete); //low coverage edge remover
     load(cfg.pos, pt, "pos", complete); // position handler:
 
     load(cfg.rm, pt, "resolving_mode", complete);
@@ -763,12 +782,23 @@ void load_cfg(debruijn_config &cfg, boost::property_tree::ptree const &pt,
 
     load(cfg.sensitive_map, pt, "sensitive_mapper", complete);
 
+    bool save_gp;
+    load(save_gp, pt, "save_gp", complete);
     load(cfg.info_printers, pt, "info_printers", complete);
 
+    if (save_gp) {
+        INFO("Graph pack will be saved before repeat resolution");
+        cfg.info_printers[info_printer_pos::before_repeat_resolution].save_graph_pack = true;
+    }
+
     load(cfg.bwa, pt, "bwa_aligner", complete);
 
     load(cfg.series_analysis, pt, "series_analysis", complete);
 
+    load(cfg.ss, pt, "strand_specificity", complete);
+    load(cfg.calculate_coverage_for_each_lib, pt, "calculate_coverage_for_each_lib", complete);
+
+
     if (pt.count("plasmid")) {
         VERIFY_MSG(!cfg.pd, "Option can be loaded only once");
         cfg.pd.reset(debruijn_config::plasmid());
diff --git a/src/common/pipeline/config_struct.hpp b/src/common/pipeline/config_struct.hpp
index deddf72..3d3f7d5 100644
--- a/src/common/pipeline/config_struct.hpp
+++ b/src/common/pipeline/config_struct.hpp
@@ -185,7 +185,7 @@ struct dataset {
     std::string reference_genome_filename;
     std::string reads_filename;
 
-    std::string reference_genome;
+    std::vector<std::string> reference_genome;
 
     dataset(): max_read_length(0), average_coverage(0.0) {
     }
@@ -236,6 +236,7 @@ struct debruijn_config {
             size_t max_delta;
             double max_relative_delta;
             size_t max_number_edges;
+            size_t dijkstra_vertex_limit;
             bool parallel;
             size_t buff_size;
             double buff_cov_diff;
@@ -279,10 +280,17 @@ struct debruijn_config {
             size_t plausibility_length;
         };
 
-        struct isolated_edges_remover {
+        struct low_covered_edge_remover {
             bool enabled;
+            double coverage_threshold;
+        };
+
+        struct isolated_edge_remover {
+            bool enabled;
+            bool use_rl_for_max_length; // max_length will be taken max with read_length
             size_t max_length;
             double max_coverage;
+            bool use_rl_for_max_length_any_cov;
             size_t max_length_any_cov;
         };
 
@@ -320,7 +328,7 @@ struct debruijn_config {
 
             bool early_it_only;
             double activation_cov;
-            isolated_edges_remover ier;
+            isolated_edge_remover ier;
             std::string tip_condition;
             std::string ec_condition;
             double disconnect_flank_cov;
@@ -343,13 +351,12 @@ struct debruijn_config {
         tr_based_ec_remover trec;
         interstrand_ec_remover isec;
         max_flow_ec_remover mfec;
-        isolated_edges_remover ier;
+        isolated_edge_remover ier;
         complex_bulge_remover cbr;
         hidden_ec_remover her;
-
+        low_covered_edge_remover lcer;
         tip_clipper final_tc;
         bulge_remover final_br;
-        bulge_remover second_final_br;
 
         init_cleaning init_clean;
     };
@@ -496,9 +503,7 @@ struct debruijn_config {
     std::string output_base;
     std::string output_dir;
     std::string tmp_dir;
-    std::string output_suffix;
     std::string output_saves;
-    std::string final_contigs_file;
     std::string log_filename;
     std::string series_analysis;
 
@@ -507,7 +512,6 @@ struct debruijn_config {
     bool compute_paths_number;
 
     bool use_additional_contigs;
-    bool use_unipaths;
     std::string additional_contigs;
 
     struct scaffold_correction {
@@ -528,6 +532,11 @@ struct debruijn_config {
         output_broken_scaffolds obs_mode;
     };
 
+    struct strand_specificity {
+        bool ss_enabled;
+        bool antisense;
+    };
+
     contig_output co;
 
     boost::optional<scaffold_correction> sc_cor;
@@ -554,7 +563,6 @@ struct debruijn_config {
     size_t buffer_size;
     std::string temp_bin_reads_dir;
     std::string temp_bin_reads_path;
-    std::string temp_bin_reads_info;
     std::string paired_read_prefix;
     std::string single_read_prefix;
 
@@ -588,6 +596,9 @@ struct debruijn_config {
     boost::optional<plasmid> pd;
     size_t flanking_range;
 
+    bool calculate_coverage_for_each_lib;
+    strand_specificity ss;
+
     bool need_mapping;
 
     debruijn_config() :
diff --git a/src/common/pipeline/genomic_info_filler.cpp b/src/common/pipeline/genomic_info_filler.cpp
index 67a91aa..b1e3031 100644
--- a/src/common/pipeline/genomic_info_filler.cpp
+++ b/src/common/pipeline/genomic_info_filler.cpp
@@ -6,19 +6,13 @@
 //***************************************************************************
 
 #include "genomic_info_filler.hpp"
-
-#include "utils/coverage_model/kmer_coverage_model.hpp"
+#include "modules/coverage_model/kmer_coverage_model.hpp"
 #include "modules/simplification/ec_threshold_finder.hpp"
 
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 
-#include <string>
-
-#include <map>
-#include <vector>
-
 using namespace llvm;
 using namespace debruijn_graph;
 
@@ -123,7 +117,7 @@ void GenomicInfoFiller::run(conj_graph_pack &gp, const char*) {
         gp.ginfo.set_cov_histogram(extract(tmp));
 
         // Fit the coverage model and get the threshold
-        utils::coverage_model::KMerCoverageModel CovModel(gp.ginfo.cov_histogram(), cfg::get().kcm.probability_threshold, cfg::get().kcm.strong_probability_threshold);
+        coverage_model::KMerCoverageModel CovModel(gp.ginfo.cov_histogram(), cfg::get().kcm.probability_threshold, cfg::get().kcm.strong_probability_threshold);
         CovModel.Fit();
 
         gp.ginfo.set_genome_size(CovModel.GetGenomeSize());
diff --git a/src/common/pipeline/graph_pack.hpp b/src/common/pipeline/graph_pack.hpp
index e05a243..d9d4f6e 100644
--- a/src/common/pipeline/graph_pack.hpp
+++ b/src/common/pipeline/graph_pack.hpp
@@ -7,8 +7,8 @@
 
 #pragma once
 
-#include "utils/indices/edge_position_index.hpp"
-#include "utils/indices/storing_traits.hpp"
+#include "assembly_graph/index/edge_position_index.hpp"
+#include "utils/ph_map/storing_traits.hpp"
 #include "sequence/genome_storage.hpp"
 #include "assembly_graph/handlers/id_track_handler.hpp"
 #include "assembly_graph/handlers/edges_position_handler.hpp"
@@ -23,8 +23,9 @@
 #include "assembly_graph/graph_support/detail_coverage.hpp"
 #include "assembly_graph/components/connected_component.hpp"
 #include "modules/alignment/kmer_mapper.hpp"
-#include "common/visualization/position_filler.hpp"
-#include "common/assembly_graph/paths/bidirectional_path.hpp"
+#include "visualization/position_filler.hpp"
+#include "assembly_graph/paths/bidirectional_path_container.hpp"
+#include "common/modules/alignment/rna/ss_coverage.hpp"
 
 namespace debruijn_graph {
 
@@ -50,6 +51,7 @@ struct graph_pack: private boost::noncopyable {
     PairedInfoIndicesT clustered_indices;
     PairedInfoIndicesT scaffolding_indices;
     LongReadContainerT single_long_reads;
+    vector<SSCoverageStorage> ss_coverage;
     GenomicInfo ginfo;
 
     GenomeStorage genome;
@@ -59,7 +61,7 @@ struct graph_pack: private boost::noncopyable {
     path_extend::PathContainer contig_paths;
 
     graph_pack(size_t k, const std::string &workdir, size_t lib_count,
-                        const std::string &genome = "",
+                        const std::vector<std::string> &genome = std::vector<std::string>(0),
                         size_t flanking_range = 50,
                         size_t max_mapping_gap = 0,
                         size_t max_gap_diff = 0,
@@ -71,6 +73,7 @@ struct graph_pack: private boost::noncopyable {
               clustered_indices(g, lib_count),
               scaffolding_indices(g, lib_count),
               single_long_reads(g, lib_count),
+              ss_coverage(lib_count, SSCoverageStorage(g)),
               genome(genome),
               edge_qual(g),
               edge_pos(g, max_mapping_gap + k, max_gap_diff),
@@ -121,8 +124,8 @@ struct graph_pack: private boost::noncopyable {
             edge_pos.Attach();
         }
         edge_pos.clear();
-        visualization::position_filler::FillPos(*this, genome.GetSequence(), "ref0");
-        visualization::position_filler::FillPos(*this, !genome.GetSequence(), "ref1");
+        visualization::position_filler::FillPos(*this, genome.str(), "ref0");
+        visualization::position_filler::FillPos(*this, ReverseComplement(genome.str()), "ref1");
     }
     
     void EnsureDebugInfo() {
diff --git a/src/common/pipeline/graphio.hpp b/src/common/pipeline/graphio.hpp
index 118b484..b8181fb 100644
--- a/src/common/pipeline/graphio.hpp
+++ b/src/common/pipeline/graphio.hpp
@@ -171,11 +171,6 @@ class DataPrinter {
         }
     }
 
-//    template<class C>
-//    void SaveEdgeAssociatedInfo(const C& c, ostream& out) const {
-//        SaveEdgeAssociatedInfo<decltype(C::operator[])>(boost::bind(&C::operator[], c, _1), out);
-//    }
-
     template<class C>
     void SaveEdgeAssociatedInfo(const C& c, ostream& out) const {
         out << component_.e_size() << endl;
@@ -220,7 +215,6 @@ class DataPrinter {
     void SaveEdgeSequences(const string& file_name) const {
         ofstream out(file_name + ".sqn");
         //todo switch to general function after its switching to fasta
-        //SaveEdgeAssociatedInfo<Sequence>(boost::bind(&Graph::EdgeNucls, component_.g(), _1), out);
         DEBUG("Saving sequences, " << file_name <<" created");
         for (auto iter = component_.e_begin(); iter != component_.e_end(); ++iter) {
             EdgeId e = *iter;
@@ -411,11 +405,6 @@ class DataScanner {
         }
     }
 
-//    template<class C>
-//    void LoadEdgeAssociatedInfo(const C& c, ostream& out) const {
-//        SaveEdgeAssociatedInfo<decltype(C::operator[])>(boost::bind(&C::operator[], c, _1), out);
-//    }
-
   public:
     virtual void LoadGraph(const string& file_name) = 0;
 
@@ -426,7 +415,7 @@ class DataScanner {
     }
 
     bool LoadFlankingCoverage(const string& file_name, FlankingCoverage<Graph>& flanking_cov) {
-        if (!path::FileExists(file_name + ".flcvr")) {
+        if (!fs::FileExists(file_name + ".flcvr")) {
             INFO("Flanking coverage saves are absent");
             return false;
         }
@@ -659,7 +648,7 @@ private:
 
 inline std::string MakeSingleReadsFileName(const std::string& file_name,
                                     size_t index) {
-    return file_name + "_paths_" + ToString(index) + ".mpr";
+    return file_name + "_paths_" + std::to_string(index) + ".mpr";
 }
 
 //helper methods
@@ -728,21 +717,21 @@ template<class Graph>
 void PrintUnclusteredIndices(const string& file_name, DataPrinter<Graph>& printer,
                              const UnclusteredPairedInfoIndicesT<Graph>& paired_indices) {
     for (size_t i = 0; i < paired_indices.size(); ++i)
-        PrintUnclusteredIndex(file_name + "_" + ToString(i), printer, paired_indices[i]);
+        PrintUnclusteredIndex(file_name + "_" + std::to_string(i), printer, paired_indices[i]);
 }
 
 template<class Graph>
 void PrintClusteredIndices(const string& file_name, DataPrinter<Graph>& printer,
                            const PairedInfoIndicesT<Graph>& paired_indices) {
     for (size_t i = 0; i < paired_indices.size(); ++i)
-        PrintClusteredIndex(file_name  + "_" + ToString(i), printer, paired_indices[i]);
+        PrintClusteredIndex(file_name  + "_" + std::to_string(i), printer, paired_indices[i]);
 }
 
 template<class Graph>
 void PrintScaffoldingIndices(const string& file_name, DataPrinter<Graph>& printer,
                            const PairedInfoIndicesT<Graph>& paired_indices) {
     for (size_t i = 0; i < paired_indices.size(); ++i)
-        PrintScaffoldingIndex(file_name  + "_" + ToString(i), printer, paired_indices[i]);
+        PrintScaffoldingIndex(file_name  + "_" + std::to_string(i), printer, paired_indices[i]);
 }
 
 template<class graph_pack>
@@ -920,7 +909,7 @@ void ScanPairedIndices(const std::string& file_name, DataScanner<Graph>& scanner
                        UnclusteredPairedInfoIndicesT<Graph>& paired_indices,
                        bool force_exists = true) {
     for (size_t i = 0; i < paired_indices.size(); ++i)
-        ScanPairedIndex(file_name  + "_" + ToString(i), scanner, paired_indices[i], force_exists);
+        ScanPairedIndex(file_name  + "_" + std::to_string(i), scanner, paired_indices[i], force_exists);
 }
 
 template<class Graph>
@@ -928,7 +917,7 @@ void ScanClusteredIndices(const std:: string& file_name, DataScanner<Graph>& sca
                           PairedInfoIndicesT<Graph>& paired_indices,
                           bool force_exists = true) {
     for (size_t i = 0; i < paired_indices.size(); ++i)
-        ScanClusteredIndex(file_name  + "_" + ToString(i), scanner, paired_indices[i], force_exists);
+        ScanClusteredIndex(file_name  + "_" + std::to_string(i), scanner, paired_indices[i], force_exists);
 }
 
 template<class Graph>
@@ -936,7 +925,7 @@ void ScanScaffoldingIndices(const std:: string& file_name, DataScanner<Graph>& s
                             PairedInfoIndicesT<Graph>& paired_indices,
                             bool force_exists = true) {
     for (size_t i = 0; i < paired_indices.size(); ++i)
-        ScanScaffoldingIndex(file_name  + "_" + ToString(i), scanner, paired_indices[i], force_exists);
+        ScanScaffoldingIndex(file_name  + "_" + std::to_string(i), scanner, paired_indices[i], force_exists);
 }
 
 template<class Graph>
@@ -944,7 +933,7 @@ void ScanScaffoldIndices(const string& file_name, DataScanner<Graph>& scanner,
         PairedInfoIndicesT<Graph>& scaffold_indices) {
 
     for (size_t i = 0; i < scaffold_indices.size(); ++i) {
-        ScanScaffoldIndex(file_name  + "_" + ToString(i), scanner, scaffold_indices[i]);
+        ScanScaffoldIndex(file_name  + "_" + std::to_string(i), scanner, scaffold_indices[i]);
     }
 }
 
diff --git a/src/common/pipeline/library.cpp b/src/common/pipeline/library.cpp
index 6ed907d..8e0d8da 100644
--- a/src/common/pipeline/library.cpp
+++ b/src/common/pipeline/library.cpp
@@ -6,7 +6,7 @@
 //***************************************************************************
 
 #include "pipeline/library.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/Errc.h"
diff --git a/src/common/pipeline/library.hpp b/src/common/pipeline/library.hpp
index 4598721..61249f0 100644
--- a/src/common/pipeline/library.hpp
+++ b/src/common/pipeline/library.hpp
@@ -8,8 +8,8 @@
 #ifndef __IO_LIBRARY_HPP__
 #define __IO_LIBRARY_HPP__
 
-#include "common/adt/chained_iterator.hpp"
-#include "common/adt/iterator_range.hpp"
+#include "adt/chained_iterator.hpp"
+#include "adt/iterator_range.hpp"
 
 #include <boost/iterator/iterator_facade.hpp>
 
@@ -88,7 +88,7 @@ public:
         inner_iterator right_;
     };
 
-    typedef chained_iterator<std::vector<std::string>::const_iterator> single_reads_iterator;
+    typedef typename adt::chained_iterator<std::vector<std::string>::const_iterator> single_reads_iterator;
 
     SequencingLibraryBase()
             : type_(LibraryType::PairedEnd), orientation_(LibraryOrientation::FR) {}
@@ -261,8 +261,8 @@ public:
 public:
     typedef typename LibraryStorage::iterator iterator;
     typedef typename LibraryStorage::const_iterator const_iterator;
-    typedef chained_iterator<typename Library::single_reads_iterator> single_reads_iterator;
-    typedef chained_iterator<typename Library::paired_reads_iterator> paired_reads_iterator;
+    typedef adt::chained_iterator<typename Library::single_reads_iterator> single_reads_iterator;
+    typedef adt::chained_iterator<typename Library::paired_reads_iterator> paired_reads_iterator;
 
     DataSet() {}
     explicit DataSet(const std::string &path) { load(path); }
diff --git a/src/common/pipeline/library.inl b/src/common/pipeline/library.inl
index cef8c21..cc8fbc6 100644
--- a/src/common/pipeline/library.inl
+++ b/src/common/pipeline/library.inl
@@ -55,7 +55,7 @@ void io::DataSet<Data>::load(const std::string &filename) {
         throw;
     }
     
-    std::string input_dir = path::parent_path(filename);
+    std::string input_dir = fs::parent_path(filename);
     if (input_dir[input_dir.length() - 1] != '/')
         input_dir += '/';
 
diff --git a/src/common/pipeline/stage.cpp b/src/common/pipeline/stage.cpp
index 3119b0a..b0cc748 100644
--- a/src/common/pipeline/stage.cpp
+++ b/src/common/pipeline/stage.cpp
@@ -18,7 +18,7 @@ namespace spades {
 void AssemblyStage::load(debruijn_graph::conj_graph_pack& gp,
                          const std::string &load_from,
                          const char* prefix) {
-    std::string p = path::append_path(load_from, prefix == NULL ? id_ : prefix);
+    std::string p = fs::append_path(load_from, prefix == NULL ? id_ : prefix);
     INFO("Loading current state from " << p);
 
     debruijn_graph::graphio::ScanAll(p, gp, false);
@@ -28,7 +28,7 @@ void AssemblyStage::load(debruijn_graph::conj_graph_pack& gp,
 void AssemblyStage::save(const debruijn_graph::conj_graph_pack& gp,
                          const std::string &save_to,
                          const char* prefix) const {
-    std::string p = path::append_path(save_to, prefix == NULL ? id_ : prefix);
+    std::string p = fs::append_path(save_to, prefix == NULL ? id_ : prefix);
     INFO("Saving current state to " << p);
 
     debruijn_graph::graphio::PrintAll(p, gp);
diff --git a/src/common/sequence/genome_storage.hpp b/src/common/sequence/genome_storage.hpp
index d790386..1b49e1f 100644
--- a/src/common/sequence/genome_storage.hpp
+++ b/src/common/sequence/genome_storage.hpp
@@ -10,23 +10,68 @@
 #include "sequence.hpp"
 #include "nucl.hpp"
 
+struct Chromosome {
+    std::string name;
+    std::string sequence;
+    Chromosome(string chr_name, string seq): name(chr_name), sequence(seq){}
+};
+
 class GenomeStorage {
-    std::string s_;
+//all chromosomes glued
+    std::string glued_genome_;
+    std::vector<Chromosome> full_genome_;
+
+    std::string ACGTise(const std::string &s) const {
+        std::stringstream ss;
+        std::string tmp(s);
+        transform(tmp.begin(), tmp.end(), tmp.begin(), ::toupper);
+        for (size_t i = 0; i < tmp.length(); i++){
+            if (is_nucl(tmp[i])) {
+                ss << tmp[i];
+            }
+        }
+        return ss.str();
+    }
+
 public:
     GenomeStorage() {
     }
 
-    GenomeStorage(const std::string &s): s_(s) {
+    GenomeStorage(const std::string &s): glued_genome_(s), full_genome_() {
+        full_genome_.push_back(Chromosome("genome", ACGTise(glued_genome_)));
+    }
+
+    GenomeStorage(const vector<Chromosome> &chromosomes): full_genome_(chromosomes) {
+        std::stringstream ss;
+        for (const auto &s: chromosomes) {
+            ss << ACGTise(s.sequence);
+//do we need a separator between?
+        }
+        glued_genome_ = ss.str();
+    }
+
+    GenomeStorage(const vector<string> &chromosomes): full_genome_() {
+        std::stringstream ss;
+        int count = 0;
+        for (const auto &s: chromosomes) {
+            count ++;
+            std::string fxd = ACGTise(s);
+            full_genome_.push_back(Chromosome("chr" + std::to_string(count), fxd));
+            ss << fxd;
+//do we need a separator between?
+        }
+        glued_genome_ = ss.str();
     }
 
+
     //TODO exterminate this where possible
     Sequence GetSequence() const {
         stringstream ss;
         size_t l = 0, r = 0;
-        for(size_t i = 0; i < s_.size(); i++) {
-            if (!is_nucl(s_[i]) ) {
+        for (size_t i = 0; i < glued_genome_.size(); i++) {
+            if (!is_nucl(glued_genome_[i]) ) {
                 if (r > l) {
-                    ss << s_.substr(l, r - l);
+                    ss << glued_genome_.substr(l, r - l);
                 }
                 r = i + 1;
                 l = i + 1;
@@ -35,21 +80,25 @@ public:
             }
         }
         if (r > l) {
-            ss << s_.substr(l, r - l);
+            ss << glued_genome_.substr(l, r - l);
         }
         return Sequence(ss.str());
     }
 
+    std::vector<Chromosome> GetChromosomes() const{
+        return full_genome_;
+    }
+
     void SetSequence(const Sequence &s) {
-        s_ = s.str();
+        glued_genome_ = s.str();
     }
 
     std::string str() const {
-        return s_;
+        return glued_genome_;
     }
 
     size_t size() const {
-        return s_.size();
+        return glued_genome_.size();
     }
 };
 
diff --git a/src/common/utils/levenshtein.hpp b/src/common/sequence/levenshtein.hpp
similarity index 96%
rename from src/common/utils/levenshtein.hpp
rename to src/common/sequence/levenshtein.hpp
index 9fad614..025c557 100644
--- a/src/common/utils/levenshtein.hpp
+++ b/src/common/sequence/levenshtein.hpp
@@ -9,7 +9,7 @@
 
 #include <string>
 #include <vector>
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
 /*
  * Little modified copy-paste from http://www.merriampark.com/ldcpp.htm
@@ -206,7 +206,7 @@ inline std::pair<std::pair<int, int>, std::string> best_edit_distance_cigar(cons
 
 //  if (min > 0) {
 //      for (int i = 0; i <= n; i++) {
-//        INFO(ToString(matrix[i]));
+//        INFO(std::to_string(matrix[i]));
 //      }
 //  }
 
@@ -228,7 +228,7 @@ inline std::pair<std::pair<int, int>, std::string> best_edit_distance_cigar(cons
         }
         if (cur_operation != last_operation) {
             if (last_operation != 0)
-                res = ToString(cnt_last_operation) + last_operation + res;
+                res = std::to_string(cnt_last_operation) + last_operation + res;
             last_operation = cur_operation;
             cnt_last_operation = 1;
         }
@@ -236,6 +236,6 @@ inline std::pair<std::pair<int, int>, std::string> best_edit_distance_cigar(cons
             cnt_last_operation++;
         }
     }
-    res = ToString(cnt_last_operation) + last_operation + res;
+    res = std::to_string(cnt_last_operation) + last_operation + res;
     return std::make_pair(std::make_pair(cur_pos_j, min_m), res);
 }
diff --git a/src/common/utils/range.hpp b/src/common/sequence/range.hpp
similarity index 95%
rename from src/common/utils/range.hpp
rename to src/common/sequence/range.hpp
index 2e05bed..0e9b7bd 100644
--- a/src/common/utils/range.hpp
+++ b/src/common/sequence/range.hpp
@@ -2,8 +2,6 @@
 
 #include "utils/verify.hpp"
 
-namespace omnigraph {
-
 struct Range {
 private:
     bool inside(size_t left, size_t right, size_t point) const {
@@ -85,8 +83,5 @@ public:
 };
 
 inline std::ostream& operator<<(std::ostream& os, const Range& range) {
-    os << "[" << (range.start_pos + 1) << " - " << range.end_pos << "]";
-    return os;
-}
-
+    return os << "[" << range.start_pos << ", " << range.end_pos << ")";
 }
diff --git a/src/common/sequence/rtseq.hpp b/src/common/sequence/rtseq.hpp
index 5bc27e7..0911ee5 100644
--- a/src/common/sequence/rtseq.hpp
+++ b/src/common/sequence/rtseq.hpp
@@ -20,7 +20,7 @@
 #include <array>
 #include <algorithm>
 #include "nucl.hpp"
-#include "utils/log.hpp"
+#include "math/log.hpp"
 #include "seq_common.hpp"
 #include "seq.hpp"
 #include "simple_seq.hpp"
@@ -255,6 +255,9 @@ public:
         }
     }
 
+    explicit RuntimeSeq(size_t k, const RuntimeSeq &seq)
+            : RuntimeSeq(k, seq.data_.data()) {}
+
     template<size_t size2_, typename T2 = T>
     explicit RuntimeSeq(const Seq<size2_, T2> &seq, bool) : size_(size2_) {
         VERIFY(size_ <= max_size_);
@@ -323,6 +326,10 @@ public:
             this->data_[cur] = 0;
     }
 
+    RuntimeSeq start(size_t K) const {
+        return RuntimeSeq(K, data_.data());
+    }
+    
     /**
      *  Reads sequence from the file (in the same format as BinWrite writes it)
      *  and returns false if error occured, true otherwise.
@@ -668,20 +675,20 @@ public:
         return operator[](0);
     }
 
-    static size_t GetHash(const DataType *data, size_t sz, uint32_t seed = 0) {
+    static size_t GetHash(const DataType *data, size_t sz, uint64_t seed = 0) {
         return CityHash64WithSeed((const char *) data, sz * sizeof(DataType), 0x9E3779B9 ^ seed);
     }
 
-    size_t GetHash(unsigned seed = 0) const {
+    size_t GetHash(uint64_t seed = 0) const {
         return GetHash(data_.data(), GetDataSize(size_), seed);
     }
 
     struct hash {
-        size_t operator()(const RuntimeSeq<max_size_, T> &seq, uint32_t seed = 0) const {
+        size_t operator()(const RuntimeSeq<max_size_, T> &seq, uint64_t seed = 0) const {
             return seq.GetHash(seed);
         }
 
-        size_t operator()(const DataType *data, size_t sz, unsigned seed = 0) {
+        size_t operator()(const DataType *data, size_t sz, uint64_t seed = 0) {
             return GetHash(data, sz, seed);
         }
     };
diff --git a/src/common/sequence/seq.hpp b/src/common/sequence/seq.hpp
index bcaaa72..34a40a0 100755
--- a/src/common/sequence/seq.hpp
+++ b/src/common/sequence/seq.hpp
@@ -36,7 +36,7 @@
 
 #include "utils/verify.hpp"
 #include "nucl.hpp"
-#include "utils/log.hpp"
+#include "math/log.hpp"
 #include "seq_common.hpp"
 
 
@@ -461,20 +461,20 @@ public:
         return operator[](0);
     }
 
-    static size_t GetHash(const DataType *data, size_t sz = DataSize, uint32_t seed = 0) {
+    static size_t GetHash(const DataType *data, size_t sz = DataSize, uint64_t seed = 0) {
         return CityHash64WithSeed((const char *) data, sz * sizeof(DataType), 0x9E3779B9 ^ seed);
     }
 
-    size_t GetHash(uint32_t seed = 0) const {
+    size_t GetHash(uint64_t seed = 0) const {
         return GetHash(data_.data(), DataSize, seed);
     }
 
     struct hash {
-        size_t operator()(const Seq<size_, T> &seq, uint32_t seed = 0) const {
+        size_t operator()(const Seq<size_, T> &seq, uint64_t seed = 0) const {
             return seq.GetHash(seed);
         }
 
-        size_t operator()(const DataType *data, size_t sz = DataSize, uint32_t seed = 0) {
+        size_t operator()(const DataType *data, size_t sz = DataSize, uint64_t seed = 0) {
             return GetHash(data, sz, seed);
         }
     };
diff --git a/src/common/sequence/sequence.hpp b/src/common/sequence/sequence.hpp
index aaaf21b..7f47f57 100755
--- a/src/common/sequence/sequence.hpp
+++ b/src/common/sequence/sequence.hpp
@@ -16,6 +16,9 @@
 #include "seq.hpp"
 #include "rtseq.hpp"
 
+#include <llvm/ADT/IntrusiveRefCntPtr.h>
+#include <llvm/Support/TrailingObjects.h>
+
 class Sequence {
     // Type to store Seq in Sequences
     typedef seq_element_type ST;
@@ -26,16 +29,36 @@ class Sequence {
     // Number of bits in STN (for faster div and mod)
     const static size_t STNBits = log_<STN, 2>::value;
 
-    template<typename T>
-    struct array_deleter {
-        void operator()(const T *p) { delete[] p; }
+    class ManagedNuclBuffer final : public llvm::ThreadSafeRefCountedBase<ManagedNuclBuffer>,
+                                    protected llvm::TrailingObjects<ManagedNuclBuffer, ST> {
+        friend TrailingObjects;
+
+        ManagedNuclBuffer() {}
+
+        ManagedNuclBuffer(size_t nucls, ST *buf) {
+            std::uninitialized_copy(buf, buf + Sequence::DataSize(nucls), data());
+        }
+
+      public:
+        static ManagedNuclBuffer *create(size_t nucls) {
+            void *mem = ::operator new(totalSizeToAlloc<ST>(Sequence::DataSize(nucls)));
+            return new (mem) ManagedNuclBuffer();
+        }
+
+        static ManagedNuclBuffer *create(size_t nucls, ST *data) {
+            void *mem = ::operator new(totalSizeToAlloc<ST>(Sequence::DataSize(nucls)));
+            return new (mem) ManagedNuclBuffer(nucls, data);
+        }
+
+        const ST *data() const { return getTrailingObjects<ST>(); }
+        ST *data() { return getTrailingObjects<ST>(); }
     };
 
 private:
     size_t from_;
     size_t size_;
     bool rtl_; // Right to left + complimentary (?)
-    std::shared_ptr<ST> data_;
+    llvm::IntrusiveRefCntPtr<ManagedNuclBuffer> data_;
 
     static size_t DataSize(size_t size) {
         return (size + STN - 1) >> STNBits;
@@ -44,7 +67,7 @@ private:
     template<typename S>
     void InitFromNucls(const S &s, bool rc = false) {
         size_t bytes_size = DataSize(size_);
-        ST *bytes = data_.get();
+        ST *bytes = data_->data();
 
         VERIFY(is_dignucl(s[0]) || is_nucl(s[0]));
 
@@ -95,64 +118,64 @@ private:
     }
 
 
+    Sequence(size_t size, int)
+            : from_(0), size_(size), rtl_(false), data_(ManagedNuclBuffer::create(size_)) {}
+
 public:
     /**
      * Sequence initialization (arbitrary size string)
      *
      * @param s ACGT or 0123-string
      */
-    explicit Sequence(const char *s, bool rc = false) :
-            from_(0), size_(strlen(s)), rtl_(false), data_(new ST[DataSize(size_)], array_deleter<ST>()) {
+    explicit Sequence(const char *s, bool rc = false)
+            : Sequence(strlen(s), 0) {
         InitFromNucls(s, rc);
     }
 
-    explicit Sequence(char *s, bool rc = false) :
-            from_(0), size_(strlen(s)), rtl_(false), data_(new ST[DataSize(size_)], array_deleter<ST>()) {
+    explicit Sequence(char *s, bool rc = false)
+            : Sequence(strlen(s), 0) {
         InitFromNucls(s, rc);
     }
 
     template<typename S>
-    explicit Sequence(const S &s, bool rc = false) :
-            from_(0), size_(s.size()), rtl_(false), data_(new ST[DataSize(size_)], array_deleter<ST>()) {
+    explicit Sequence(const S &s, bool rc = false)
+            : Sequence(s.size(), 0) {
         InitFromNucls(s, rc);
     }
 
-    Sequence() :
-            from_(0), size_(0), rtl_(false), data_(new ST[DataSize(size_)], array_deleter<ST>()) {
-        memset(data_.get(), 0, DataSize(size_));
+    Sequence()
+            : Sequence(size_t(0), 0) {
+        memset(data_->data(), 0, DataSize(size_));
     }
 
     template<size_t size2_>
-    explicit Sequence(const Seq<size2_> &kmer, size_t) :
-            from_(0), size_(kmer.size()), rtl_(false), data_(new ST[DataSize(size_)], array_deleter<ST>()) {
-
-        kmer.copy_data(data_.get());
+    explicit Sequence(const Seq<size2_> &kmer, size_t)
+            : Sequence(kmer.size(), 0) {
+        kmer.copy_data(data_->data());
     }
 
     template<size_t size2_>
-    explicit Sequence(const RuntimeSeq<size2_> &kmer, size_t) :
-            from_(0), size_(kmer.size()), rtl_(false), data_(new ST[DataSize(size_)], array_deleter<ST>()) {
-
-        kmer.copy_data(data_.get());
+    explicit Sequence(const RuntimeSeq<size2_> &kmer, size_t)
+            : Sequence(kmer.size(), 0) {
+        kmer.copy_data(data_->data());
     }
 
-    Sequence(const Sequence &seq, size_t from, size_t size, bool rtl) :
-            from_(from), size_(size), rtl_(rtl), data_(seq.data_) {
-    }
+    Sequence(const Sequence &seq, size_t from, size_t size, bool rtl)
+            : from_(from), size_(size), rtl_(rtl), data_(seq.data_) {}
 
-    Sequence(const Sequence &s) :
-            from_(s.from_), size_(s.size_), rtl_(s.rtl_), data_(s.data_) {
-    }
+    Sequence(const Sequence &s)
+            : Sequence(s, s.from_, s.size_, s.rtl_) {}
 
     ~Sequence() { }
 
     const Sequence &operator=(const Sequence &rhs) {
-        if (&rhs != this) {
-            from_ = rhs.from_;
-            size_ = rhs.size_;
-            rtl_ = rhs.rtl_;
-            data_ = rhs.data_;
-        }
+        if (&rhs == this)
+            return *this;
+
+        from_ = rhs.from_;
+        size_ = rhs.size_;
+        rtl_ = rhs.rtl_;
+        data_ = rhs.data_;
 
         return *this;
     }
@@ -160,7 +183,7 @@ public:
     char operator[](const size_t index) const {
         //todo can be put back after switching to distributing release without asserts
         //VERIFY(index < size_);
-        const ST *bytes = data_.get();
+        const ST *bytes = data_->data();
         if (rtl_) {
             size_t i = from_ + size_ - 1 - index;
             return complement((bytes[i >> STNBits] >> ((i & (STN - 1)) << 1)) & 3);
@@ -171,13 +194,11 @@ public:
     }
 
     bool operator==(const Sequence &that) const {
-        if (size_ != that.size_) {
+        if (size_ != that.size_)
             return false;
-        }
 
-        if (data_ == that.data_ && from_ == that.from_ && rtl_ == that.rtl_) {
+        if (data_ == that.data_ && from_ == that.from_ && rtl_ == that.rtl_)
             return true;
-        }
 
         for (size_t i = 0; i < size_; ++i) {
             if (this->operator[](i) != that[i]) {
@@ -275,12 +296,10 @@ public:
 
 private:
     inline bool ReadHeader(std::istream &file);
-
     inline bool WriteHeader(std::ostream &file) const;
 
 public:
     inline bool BinRead(std::istream &file);
-
     inline bool BinWrite(std::ostream &file) const;
 };
 
@@ -302,7 +321,7 @@ Seq<size2_> Sequence::fast_start() const {
     size_t start = from_ >> STNBits;
     size_t end = (from_ + size_ - 1) >> STNBits;
     size_t shift = (from_ & (STN - 1)) << 1;
-    const ST *bytes = data_.get();
+    const ST *bytes = data_->data();
 
     for (size_t i = start; i <= end; ++i) {
         result[i - start] = bytes[i] >> shift;
@@ -451,10 +470,10 @@ std::string Sequence::str() const {
 
 std::string Sequence::err() const {
     std::ostringstream oss;
-    oss << "{ *data=" << data_ <<
-    ", from_=" << from_ <<
-    ", size_=" << size_ <<
-    ", rtl_=" << int(rtl_) << " }";
+    oss << "{ *data=" << data_->data() <<
+            ", from_=" << from_ <<
+            ", size_=" << size_ <<
+            ", rtl_=" << int(rtl_) << " }";
     return oss.str();
 }
 
@@ -485,8 +504,8 @@ bool Sequence::WriteHeader(std::ostream &file) const {
 bool Sequence::BinRead(std::istream &file) {
     ReadHeader(file);
 
-    data_ = std::shared_ptr<ST>(new ST[DataSize(size_)], array_deleter<ST>());
-    file.read((char *) data_.get(), DataSize(size_) * sizeof(ST));
+    data_ = llvm::IntrusiveRefCntPtr<ManagedNuclBuffer>(ManagedNuclBuffer::create(size_));
+    file.read((char *) data_->data(), DataSize(size_) * sizeof(ST));
 
     return !file.fail();
 }
@@ -500,7 +519,7 @@ bool Sequence::BinWrite(std::ostream &file) const {
 
     WriteHeader(file);
 
-    file.write((const char *) data_.get(), DataSize(size_) * sizeof(ST));
+    file.write((const char *) data_->data(), DataSize(size_) * sizeof(ST));
 
     return !file.fail();
 }
@@ -536,6 +555,10 @@ public:
         return buf_.size();
     }
 
+    void clear() {
+        return buf_.clear();
+    }
+
     char operator[](const size_t index) const {
         VERIFY(index < buf_.size());
         return buf_[index];
diff --git a/src/common/sequence/sequence_tools.hpp b/src/common/sequence/sequence_tools.hpp
index f2231e2..abcb952 100644
--- a/src/common/sequence/sequence_tools.hpp
+++ b/src/common/sequence/sequence_tools.hpp
@@ -14,7 +14,7 @@
 
 #include "nucl.hpp"
 #include "sequence.hpp"
-#include "utils/levenshtein.hpp"
+#include "levenshtein.hpp"
 
 inline const std::string Reverse(const std::string &s) {
     return std::string(s.rbegin(), s.rend());
@@ -22,7 +22,7 @@ inline const std::string Reverse(const std::string &s) {
 
 inline const std::string Complement(const std::string &s) {
     std::string res(s.size(), 0);
-    transform(s.begin(), s.end(), res.begin(), nucl_complement);
+    std::transform(s.begin(), s.end(), res.begin(), nucl_complement);
     return res;
 }
 
diff --git a/src/common/sequence/simple_seq.hpp b/src/common/sequence/simple_seq.hpp
index 5bc144a..d818617 100644
--- a/src/common/sequence/simple_seq.hpp
+++ b/src/common/sequence/simple_seq.hpp
@@ -23,7 +23,7 @@
 
 #include "utils/verify.hpp"
 #include "nucl.hpp"
-#include "utils/log.hpp"
+#include "math/log.hpp"
 #include "seq_common.hpp"
 /**
  * @param T is max number of nucleotides, type for storage
diff --git a/src/common/stages/construction.cpp b/src/common/stages/construction.cpp
index 6116a62..a634480 100644
--- a/src/common/stages/construction.cpp
+++ b/src/common/stages/construction.cpp
@@ -22,7 +22,7 @@ void construct_graph(io::ReadStreamList<Read>& streams,
     config::debruijn_config::construction params = cfg::get().con;
     params.early_tc.enable &= !cfg::get().gap_closer_enable;
 
-    ReadStatistics stats = ConstructGraphWithCoverage(params, streams, gp.g,
+    utils::ReadStatistics stats = ConstructGraphWithCoverage(params, streams, gp.g,
                                                       gp.index, gp.flanking_cov, contigs_stream);
     size_t rl = stats.max_read_length_;
 
diff --git a/src/common/stages/simplification.cpp b/src/common/stages/simplification.cpp
index f0cd8a9..1f57a79 100644
--- a/src/common/stages/simplification.cpp
+++ b/src/common/stages/simplification.cpp
@@ -11,6 +11,7 @@
 #include "stages/simplification_pipeline/graph_simplification.hpp"
 #include "stages/simplification_pipeline/single_cell_simplification.hpp"
 #include "stages/simplification_pipeline/rna_simplification.hpp"
+#include "modules/simplification/cleaner.hpp"
 
 #include "simplification.hpp"
 
@@ -59,17 +60,17 @@ class GraphSimplifier {
         return true;
     }
 
-    void RemoveShortPolyATEdges(size_t max_length,
-                                HandlerF removal_handler = 0, size_t chunk_cnt = 1) {
+    void RemoveShortPolyATEdges(HandlerF removal_handler, size_t chunk_cnt) {
         INFO("Removing short polyAT");
         EdgeRemover<Graph> er(g_, removal_handler);
-        ATCondition<Graph> condition (g_, 0.8, max_length, false);
-        for (auto iter = g_.SmartEdgeBegin(); !iter.IsEnd(); ++iter){
+        ATCondition<Graph> condition(g_, 0.8, false);
+        for (auto iter = g_.SmartEdgeBegin(/*canonical only*/true); !iter.IsEnd(); ++iter){
             if (g_.length(*iter) == 1 && condition.Check(*iter)) {
                 er.DeleteEdgeNoCompress(*iter);
             }
         }
-        ParallelCompress(g_, chunk_cnt);
+        omnigraph::CompressAllVertices(g_, chunk_cnt);
+        omnigraph::CleanIsolatedVertices(g_, chunk_cnt);
     }
 
     void InitialCleaning() {
@@ -85,8 +86,12 @@ class GraphSimplifier {
                 algos);
 
         if (info_container_.mode() == config::pipeline_type::rna){
-            RemoveShortPolyATEdges(1, removal_handler_, info_container_.chunk_cnt());
-            PushValid(ShortPolyATEdgesRemoverInstance(g_, 1, removal_handler_, info_container_.chunk_cnt()), "Short PolyA/T Edges",algos) ;
+            //TODO create algo
+            RemoveShortPolyATEdges(removal_handler_, info_container_.chunk_cnt());
+            PushValid(std::make_shared<omnigraph::ParallelEdgeRemovingAlgorithm<Graph>>(g_, func::And(LengthUpperBound<Graph>(g_, 1),
+                                                                                                      ATCondition<Graph>(g_, 0.8, false)),
+                                                                                     info_container_.chunk_cnt(), removal_handler_, true),
+                      "Short PolyA/T Edges", algos) ;
             PushValid(ATTipClipperInstance(g_, removal_handler_, info_container_.chunk_cnt()), "AT Tips", algos);
         }
 
@@ -113,7 +118,7 @@ class GraphSimplifier {
                               removal_handler_),
                     "Initial ec remover",
                     algos);
-
+            
             PushValid(
                     LowFlankDisconnectorInstance(g_, gp_.flanking_cov,
                                                  simplif_cfg_.init_clean.disconnect_flank_cov, info_container_,
@@ -255,12 +260,6 @@ class GraphSimplifier {
         //FIXME need better configuration
 
         if (info_container_.mode() == config::pipeline_type::meta) {
-            PushValid(
-                    BRInstance(g_, simplif_cfg_.second_final_br,
-                                       info_container_, removal_handler_),
-                    "Yet another final bulge remover",
-                    algos);
-
             EdgePredicate<Graph> meta_thorn_condition
                     = And(LengthUpperBound<Graph>(g_, LengthThresholdFinder::MaxErroneousConnectionLength(
                                                                            g_.k(), simplif_cfg_.isec.max_ec_length_coefficient)),
@@ -286,17 +285,25 @@ class GraphSimplifier {
             PushValid(ATTipClipperInstance(g_, removal_handler_, info_container_.chunk_cnt()), "AT Tips", algos);
         }
 
+        PushValid(
+                LowCoverageEdgeRemoverInstance(g_,
+                                               simplif_cfg_.lcer,
+                                               info_container_),
+                "Removing edges with low coverage",
+                algos);
+
+        const size_t primary_launch_cnt = 2;
         size_t iteration = 0;
         bool enable_flag = true;
-        while (enable_flag) {
+        while (iteration < primary_launch_cnt || enable_flag) {
             enable_flag = false;
 
-            INFO("Iteration " << iteration);
+            INFO("Iteration " << iteration + 1);
 
             enable_flag |= FinalRemoveErroneousEdges();
             cnt_callback_.Report();
 
-            enable_flag |= RunAlgos(algos);
+            enable_flag |= RunAlgos(algos, iteration < primary_launch_cnt);
 
             iteration++;
 
@@ -401,7 +408,7 @@ public:
         //cannot stop simply if nothing changed, since threshold changes on every iteration
         while (iteration < simplif_cfg_.cycle_iter_count || graph_changed) {
             INFO("PROCEDURE == Simplification cycle, iteration " << iteration + 1);
-            graph_changed = RunAlgos(algos);
+            graph_changed = RunAlgos(algos, iteration == simplif_cfg_.cycle_iter_count - 1);
             ++iteration;
         }
 
@@ -536,8 +543,10 @@ void SimplificationCleanup::run(conj_graph_pack &gp, const char*) {
 
     auto isolated_edge_remover =
         IsolatedEdgeRemoverInstance(gp.g, cfg::get().simp.ier, info_container, (EdgeRemovalHandlerF<Graph>)nullptr);
-    if (isolated_edge_remover != nullptr)
+    if (isolated_edge_remover != nullptr) {
+        INFO("Removing isolated edges");
         isolated_edge_remover->Run();
+    }
 
     double low_threshold = gp.ginfo.trusted_bound();
     if (math::gr(low_threshold, 0.0)) {
diff --git a/src/common/stages/simplification_pipeline/graph_simplification.hpp b/src/common/stages/simplification_pipeline/graph_simplification.hpp
index 99937ed..49905a1 100644
--- a/src/common/stages/simplification_pipeline/graph_simplification.hpp
+++ b/src/common/stages/simplification_pipeline/graph_simplification.hpp
@@ -382,11 +382,12 @@ AlternativesAnalyzer<Graph> ParseBRConfig(const Graph &g,
     DEBUG("Length bound " << max_length);
 
     return AlternativesAnalyzer<Graph>(g, config.max_coverage,
-                                                    max_length,
-                                                    config.max_relative_coverage,
-                                                    config.max_delta,
-                                                    config.max_relative_delta,
-                                                    config.max_number_edges);
+                                       max_length,
+                                       config.max_relative_coverage,
+                                       config.max_delta,
+                                       config.max_relative_delta,
+                                       config.max_number_edges,
+                                       config.dijkstra_vertex_limit);
 }
 
 template<class Graph>
@@ -488,17 +489,21 @@ AlgoPtr<Graph> ComplexTipClipperInstance(Graph &g,
 
 template<class Graph>
 AlgoPtr<Graph> IsolatedEdgeRemoverInstance(Graph &g,
-                                           config::debruijn_config::simplification::isolated_edges_remover ier,
+                                           config::debruijn_config::simplification::isolated_edge_remover ier,
                                            const SimplifInfoContainer &info,
                                            EdgeRemovalHandlerF<Graph> removal_handler = 0) {
     if (!ier.enabled) {
         return nullptr;
     }
-    size_t max_length_any_cov = std::max(info.read_length(), ier.max_length_any_cov);
+    size_t max_length_any_cov = ier.use_rl_for_max_length_any_cov ?
+                                std::max(info.read_length(), ier.max_length_any_cov) : ier.max_length_any_cov;
+    size_t max_length = ier.use_rl_for_max_length ?
+                                std::max(info.read_length(), ier.max_length) : ier.max_length;
+
 
     auto condition = func::And(IsolatedEdgeCondition<Graph>(g),
                               func::Or(LengthUpperBound<Graph>(g, max_length_any_cov),
-                                      func::And(LengthUpperBound<Graph>(g, ier.max_length),
+                                      func::And(LengthUpperBound<Graph>(g, max_length),
                                                CoverageUpperBound<Graph>(g, ier.max_coverage))));
 
     return std::make_shared<omnigraph::ParallelEdgeRemovingAlgorithm<Graph>>(g,
@@ -608,15 +613,10 @@ AlgoPtr<Graph> BRInstance(Graph &g,
 
     auto alternatives_analyzer = ParseBRConfig(g, br_config);
 
-    auto candidate_finder = std::make_shared<omnigraph::ParallelInterestingElementFinder<Graph>>(
-                                                          omnigraph::NecessaryBulgeCondition(g,
-                                                                              alternatives_analyzer.max_length(),
-                                                                              alternatives_analyzer.max_coverage()),
-                                                          info.chunk_cnt());
     if (br_config.parallel) {
         INFO("Creating parallel br instance");
         return make_shared<omnigraph::ParallelBulgeRemover<Graph>>(g,
-                candidate_finder,
+                info.chunk_cnt(),
                 br_config.buff_size,
                 br_config.buff_cov_diff,
                 br_config.buff_cov_rel_diff,
@@ -627,7 +627,7 @@ AlgoPtr<Graph> BRInstance(Graph &g,
     } else {
         INFO("Creating br instance");
         return make_shared<omnigraph::BulgeRemover<Graph>>(g,
-                candidate_finder,
+                info.chunk_cnt(),
                 alternatives_analyzer,
                 nullptr,
                 removal_handler,
@@ -657,6 +657,25 @@ AlgoPtr<Graph> LowFlankDisconnectorInstance(Graph &g,
 }
 
 template<class Graph>
+AlgoPtr<Graph> LowCoverageEdgeRemoverInstance(Graph &g,
+                                              const config::debruijn_config::simplification::low_covered_edge_remover &lcer_config,
+                                              const SimplifInfoContainer &info) {
+    if (!lcer_config.enabled) {
+        return nullptr;
+    }
+    VERIFY(info.read_length() > g.k());
+    double threshold = lcer_config.coverage_threshold * double(info.read_length() - g.k()) / double(info.read_length());
+    INFO("Low coverage edge removal (LCER) activated and will remove edges of coverage lower than " << threshold);
+    return make_shared<ParallelEdgeRemovingAlgorithm<Graph, CoverageComparator<Graph>>>
+                        (g,
+                        CoverageUpperBound<Graph>(g, threshold),
+                        info.chunk_cnt(),
+                        (EdgeRemovalHandlerF<Graph>)nullptr,
+                        /*canonical_only*/true,
+                        CoverageComparator<Graph>(g));
+}
+
+template<class Graph>
 bool RemoveHiddenLoopEC(Graph &g,
                         const FlankingCoverage<Graph> &flanking_cov,
                         double determined_coverage_threshold,
diff --git a/src/common/stages/simplification_pipeline/rna_simplification.hpp b/src/common/stages/simplification_pipeline/rna_simplification.hpp
index 050fa61..ddc1aa7 100644
--- a/src/common/stages/simplification_pipeline/rna_simplification.hpp
+++ b/src/common/stages/simplification_pipeline/rna_simplification.hpp
@@ -7,15 +7,10 @@ namespace debruijn {
 namespace simplification {
 
 template<class Graph>
-AlgoPtr<Graph> ShortPolyATEdgesRemoverInstance(Graph &g, size_t max_length, EdgeRemovalHandlerF<Graph> removal_handler = 0, size_t chunk_cnt = 1) {
-    auto condition = func::And(ATCondition<Graph>(g, 0.8, max_length, false), LengthUpperBound<Graph>(g, 1));
-    return std::make_shared<omnigraph::ParallelEdgeRemovingAlgorithm<Graph>>(g, condition, chunk_cnt, removal_handler, true);
-}
-
-template<class Graph>
 AlgoPtr<Graph> ATTipClipperInstance(Graph &g, EdgeRemovalHandlerF<Graph> removal_handler = 0, size_t chunk_cnt = 1) {
 //TODO: review params 0.8, 200?
-    return std::make_shared<omnigraph::ParallelEdgeRemovingAlgorithm<Graph>>(g, ATCondition<Graph>(g, 0.8, 200, true), chunk_cnt, removal_handler, true);
+    return std::make_shared<omnigraph::ParallelEdgeRemovingAlgorithm<Graph>>(g, func::And(omnigraph::LengthUpperBound<Graph>(g, 200), ATCondition<Graph>(g, 0.8, true)),
+                                                                             chunk_cnt, removal_handler, true);
 }
 
 }
diff --git a/src/common/utils/CMakeLists.txt b/src/common/utils/CMakeLists.txt
index 40c2d20..9043fea 100644
--- a/src/common/utils/CMakeLists.txt
+++ b/src/common/utils/CMakeLists.txt
@@ -8,8 +8,8 @@
 project(utils CXX)
 
 set(utils_src
-    copy_file.cpp
-    path_helper.cpp
+        filesystem/copy_file.cpp
+        filesystem/path_helper.cpp
     logger/logger_impl.cpp)
 
 if (READLINE_FOUND)
diff --git a/src/common/utils/autocompletion.cpp b/src/common/utils/autocompletion.cpp
index bb79146..c636b76 100644
--- a/src/common/utils/autocompletion.cpp
+++ b/src/common/utils/autocompletion.cpp
@@ -11,7 +11,7 @@
 #include <cstring>
 #include <readline/readline.h>
 
-namespace online_visualization {
+namespace utils {
 
 std::vector<std::string> commands;
 
diff --git a/src/common/utils/autocompletion.hpp b/src/common/utils/autocompletion.hpp
index f6f04d2..e06e5d5 100644
--- a/src/common/utils/autocompletion.hpp
+++ b/src/common/utils/autocompletion.hpp
@@ -9,7 +9,7 @@
 #include <vector>
 #include <string>
 
-namespace online_visualization {
+namespace utils {
 
 void InitAutocompletion(const std::vector<std::string>& commands);
 
diff --git a/src/common/utils/cpp_utils.hpp b/src/common/utils/cpp_utils.hpp
index b59b7ab..ed8af0e 100644
--- a/src/common/utils/cpp_utils.hpp
+++ b/src/common/utils/cpp_utils.hpp
@@ -37,4 +37,9 @@ T identity_function(const T &t) {
     return t;
 }
 
+template<typename Base, typename T>
+inline bool instanceof(const T *ptr) {
+    return dynamic_cast<const Base *>(ptr) != nullptr;
+}
+
 } // namespace utils
diff --git a/src/common/utils/indices/kmer_extension_index.hpp b/src/common/utils/extension_index/kmer_extension_index.hpp
similarity index 89%
rename from src/common/utils/indices/kmer_extension_index.hpp
rename to src/common/utils/extension_index/kmer_extension_index.hpp
index b72be84..a9a3fb3 100644
--- a/src/common/utils/indices/kmer_extension_index.hpp
+++ b/src/common/utils/extension_index/kmer_extension_index.hpp
@@ -7,12 +7,12 @@
 
 #pragma once
 
-#include "perfect_hash_map.hpp"
-#include "utils/simple_tools.hpp"
-#include "storing_traits.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
+#include "utils/stl_utils.hpp"
+#include "utils/ph_map/storing_traits.hpp"
 #include <bitset>
 
-namespace debruijn_graph {
+namespace utils {
 
 inline uint8_t invert_byte_slow(uint8_t a) {
     size_t res = 0;
@@ -50,7 +50,6 @@ private:
     char GetUnique(uint8_t mask) const {
         static char next[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1,
                 -1, -1, -1 };
-        VERIFY(next[mask] != -1)
         return next[mask];
     }
 
@@ -59,18 +58,25 @@ private:
         return count[mask];
     }
 
-
     char inv_position(char nucl, bool as_is) const {
-        if(as_is)
+        if (as_is)
             return nucl;
         else
             return char(7 - nucl);
     }
 
-public:
-    explicit InOutMask(uint8_t mask = 0) : mask_(mask){
+    uint8_t outgoing() const {
+        return mask_ & 0xF;
+    }
+
+    uint8_t incoming() const {
+        return (mask_ >> 4) & 0xF;
     }
 
+public:
+    explicit InOutMask(uint8_t mask = 0)
+            : mask_(mask) {}
+
     uint8_t get_mask() const {
         return mask_;
     }
@@ -92,7 +98,7 @@ public:
         unsigned pmask = (unsigned) (1 << inv_position(char(pnucl + 4), as_is));
         if (!(mask_ & pmask)) {
 #           pragma omp atomic
-            mask_|= (unsigned char) pmask;
+            mask_ |= (unsigned char) pmask;
         }
     }
 
@@ -125,35 +131,35 @@ public:
     }
 
     bool IsDeadEnd() const {
-        return !(mask_ & 15);
+        return outgoing() == 0;
     }
 
     bool IsDeadStart() const {
-        return !(mask_ >> 4);
+        return incoming() == 0;
     }
 
     bool CheckUniqueOutgoing() const {
-        return CheckUnique(mask_ & 15);
+        return CheckUnique(outgoing());
     }
 
     bool CheckUniqueIncoming() const {
-        return CheckUnique(uint8_t(mask_ >> 4));
+        return CheckUnique(incoming());
     }
 
     char GetUniqueOutgoing() const {
-        return GetUnique(mask_ & 15);
+        return GetUnique(outgoing());
     }
 
     char GetUniqueIncoming() const {
-        return GetUnique(uint8_t(mask_ >> 4));
+        return GetUnique(incoming());
     }
 
     size_t OutgoingEdgeCount() const {
-        return Count(mask_ & 15);
+        return Count(outgoing());
     }
 
     size_t IncomingEdgeCount() const {
-        return Count(uint8_t(mask_ >> 4));
+        return Count(incoming());
     }
 };
 
@@ -163,8 +169,8 @@ Stream &operator<<(Stream& stream, const InOutMask &mask) {
 }
 
 template<class Seq>
-struct slim_kmer_index_traits : public kmer_index_traits<Seq> {
-    typedef kmer_index_traits<Seq> __super;
+struct slim_kmer_index_traits : public utils::kmer_index_traits<Seq> {
+    typedef utils::kmer_index_traits<Seq> __super;
 
     typedef MMappedRecordReader<typename Seq::DataType> FinalKMerStorage;
 
@@ -186,8 +192,8 @@ template<typename KeyWithHash>
 struct AbstractDeEdge {
     KeyWithHash start;
     KeyWithHash end;
-    AbstractDeEdge(KeyWithHash _start, KeyWithHash _end) : start(_start), end(_end) {
-    }
+    AbstractDeEdge(KeyWithHash s, KeyWithHash e)
+            : start(std::move(s)), end(std::move(e)) {}
 
     AbstractDeEdge<KeyWithHash> &operator=(const AbstractDeEdge<KeyWithHash> &that) {
         this->start = that.start;
@@ -299,8 +305,7 @@ public:
         return this->get_value(kwh).IncomingEdgeCount();
     }
 
-    ~DeBruijnExtensionIndex() {
-    }
+    ~DeBruijnExtensionIndex() { }
 
 private:
    DECL_LOGGER("ExtentionIndex");
diff --git a/src/common/utils/indices/kmer_extension_index_builder.hpp b/src/common/utils/extension_index/kmer_extension_index_builder.hpp
similarity index 95%
rename from src/common/utils/indices/kmer_extension_index_builder.hpp
rename to src/common/utils/extension_index/kmer_extension_index_builder.hpp
index 4ca9089..e286b0e 100644
--- a/src/common/utils/indices/kmer_extension_index_builder.hpp
+++ b/src/common/utils/extension_index/kmer_extension_index_builder.hpp
@@ -7,7 +7,9 @@
 #pragma once
 
 #include "kmer_extension_index.hpp"
-#include "kmer_splitters.hpp"
+#include "utils/kmer_mph/kmer_splitters.hpp"
+
+namespace utils {
 
 class DeBruijnExtensionIndexBuilder {
 public:
@@ -59,13 +61,13 @@ public:
 
 public:
     template<class Index, class Streams>
-    ReadStatistics BuildExtensionIndexFromStream(Index &index, Streams &streams, io::SingleStream* contigs_stream = 0,
+    ReadStatistics BuildExtensionIndexFromStream(Index &index, Streams &streams, io::SingleStream *contigs_stream = 0,
                                                  size_t read_buffer_size = 0) const {
         unsigned nthreads = (unsigned) streams.size();
 
         // First, build a k+1-mer index
         DeBruijnReadKMerSplitter<typename Streams::ReadT,
-                                 StoringTypeFilter<typename Index::storing_type>>
+                StoringTypeFilter<typename Index::storing_type>>
                 splitter(index.workdir(), index.k() + 1, 0xDEADBEEF, streams,
                          contigs_stream, read_buffer_size);
         KMerDiskCounter<RtSeq> counter(index.workdir(), splitter);
@@ -104,3 +106,4 @@ struct ExtensionIndexHelper {
     using DeBruijnExtensionIndexBuilderT = DeBruijnExtensionIndexBuilder;
 };
 
+}
diff --git a/src/common/utils/copy_file.cpp b/src/common/utils/filesystem/copy_file.cpp
similarity index 87%
rename from src/common/utils/copy_file.cpp
rename to src/common/utils/filesystem/copy_file.cpp
index 289ff34..79ae779 100644
--- a/src/common/utils/copy_file.cpp
+++ b/src/common/utils/filesystem/copy_file.cpp
@@ -7,7 +7,7 @@
 
 #include "copy_file.hpp"
 
-#include "utils/path_helper.hpp"
+#include "path_helper.hpp"
 #include "utils/logger/logger.hpp"
 
 #include <boost/algorithm/string.hpp>
@@ -23,31 +23,28 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-namespace path {
+namespace fs {
 
 namespace details {
 
-using namespace path;
-
 void copy_file(std::string from_path, std::string to_path) {
     using namespace std;
 
-    make_full_path(from_path);
-    make_full_path(to_path  );
+    from_path = make_full_path(from_path);
+    to_path = make_full_path(to_path);
 
     if (from_path == to_path)
         return;
 
     std::ifstream source(from_path, ios::binary);
-    std::ofstream dest  (to_path.c_str()  , ios::binary);
+    std::ofstream dest(to_path.c_str(), ios::binary);
 
     dest << source.rdbuf();
 }
 
-
 void hard_link(std::string from_path, std::string to_path) {
-    make_full_path(from_path);
-    make_full_path(to_path  );
+    from_path = make_full_path(from_path);
+    to_path = make_full_path(to_path);
 
     if (from_path == to_path)
         return;
@@ -96,7 +93,7 @@ files_t folders_in_folder(std::string const& path) {
 
 } // details
 
-path::files_t files_by_prefix(std::string const& path) {
+fs::files_t files_by_prefix(std::string const& path) {
     using namespace details;
     files_t files;
 
@@ -113,7 +110,7 @@ path::files_t files_by_prefix(std::string const& path) {
     return out_files;
 }
 
-void copy_files_by_prefix(path::files_t const& files, std::string const& to_folder) {
+void copy_files_by_prefix(fs::files_t const& files, std::string const& to_folder) {
     using namespace details;
 
     for (auto it = files.begin(); it != files.end(); ++it) {
@@ -124,7 +121,7 @@ void copy_files_by_prefix(path::files_t const& files, std::string const& to_fold
     }
 }
 
-void link_files_by_prefix(path::files_t const& files, std::string const& to_folder) {
+void link_files_by_prefix(fs::files_t const& files, std::string const& to_folder) {
     using namespace details;
 
     for (auto it = files.begin(); it != files.end(); ++it) {
@@ -149,7 +146,7 @@ void copy_files_by_ext(std::string const& from_folder, std::string const& to_fol
 
         for (auto it = folders.begin(); it != folders.end(); ++it) {
             std::string subdir = append_path(to_folder, filename(*it));
-            path:: make_dir(subdir);
+            fs:: make_dir(subdir);
             copy_files_by_ext(*it, subdir, ext, recursive);
         }
     }
diff --git a/src/common/utils/copy_file.hpp b/src/common/utils/filesystem/copy_file.hpp
similarity index 63%
rename from src/common/utils/copy_file.hpp
rename to src/common/utils/filesystem/copy_file.hpp
index 4f0e4ab..33ea92f 100644
--- a/src/common/utils/copy_file.hpp
+++ b/src/common/utils/filesystem/copy_file.hpp
@@ -5,14 +5,14 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/path_helper.hpp"
+#include "path_helper.hpp"
 #include <string>
 
-namespace path {
+namespace fs {
 
-path::files_t files_by_prefix(std::string const& path);
-void copy_files_by_prefix(path::files_t const& files, std::string const& to_folder);
-void link_files_by_prefix(path::files_t const& files, std::string const& to_folder);
+files_t files_by_prefix(std::string const& path);
+void copy_files_by_prefix(files_t const& files, std::string const& to_folder);
+void link_files_by_prefix(files_t const& files, std::string const& to_folder);
 void copy_files_by_ext(std::string const& from_folder, std::string const& to_folder, std::string const& ext, bool recursive);
 
 }
diff --git a/src/common/utils/file_limit.hpp b/src/common/utils/filesystem/file_limit.hpp
similarity index 97%
rename from src/common/utils/file_limit.hpp
rename to src/common/utils/filesystem/file_limit.hpp
index d97c791..5a4d9fa 100644
--- a/src/common/utils/file_limit.hpp
+++ b/src/common/utils/filesystem/file_limit.hpp
@@ -13,6 +13,8 @@
 
 #include "utils/verify.hpp"
 
+namespace utils {
+
 inline rlim_t limit_file(size_t limit) {
   struct rlimit rl;
 
@@ -31,3 +33,5 @@ inline rlim_t limit_file(size_t limit) {
 
   return rl.rlim_cur;
 }
+
+}
diff --git a/src/common/utils/path_helper.cpp b/src/common/utils/filesystem/path_helper.cpp
similarity index 97%
rename from src/common/utils/path_helper.cpp
rename to src/common/utils/filesystem/path_helper.cpp
index 4225f7e..b9dc5df 100644
--- a/src/common/utils/path_helper.cpp
+++ b/src/common/utils/filesystem/path_helper.cpp
@@ -5,7 +5,7 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/path_helper.hpp"
+#include "path_helper.hpp"
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -18,7 +18,7 @@
 #include <string>
 #include <vector>
 
-namespace path {
+namespace fs {
 
 bool make_dir(std::string const& folder) {
     return mkdir(folder.c_str(), 0755) == 0;
@@ -87,9 +87,10 @@ std::string current_dir() {
     return result;
 }
 
-void make_full_path(std::string& path) {
+std::string make_full_path(std::string const& path) {
     if (!boost::starts_with(path, "/"))  // relative path
-        path = append_path(current_dir(), path);
+        return append_path(current_dir(), path);
+    return path;
 }
 
 std::string filename(std::string const& path) {
@@ -122,7 +123,7 @@ std::string extension(std::string const& path) {
 std::string parent_path(std::string const& path) {
     std::string cpath(path);
 
-    make_full_path(cpath);
+    cpath = make_full_path(cpath);
     size_t slash_pos = cpath.find_last_of('/');
 
     return (slash_pos == 0 ? std::string("/") : cpath.substr(0, slash_pos));
diff --git a/src/common/utils/path_helper.hpp b/src/common/utils/filesystem/path_helper.hpp
similarity index 84%
rename from src/common/utils/path_helper.hpp
rename to src/common/utils/filesystem/path_helper.hpp
index 73b2ab5..b404212 100644
--- a/src/common/utils/path_helper.hpp
+++ b/src/common/utils/filesystem/path_helper.hpp
@@ -17,7 +17,7 @@
 #include "utils/logger/logger.hpp"
 #include "utils/verify.hpp"
 
-namespace path {
+namespace fs {
 //todo review and make names consistent!
 
 typedef std::vector<std::string> files_t;
@@ -34,8 +34,7 @@ std::string append_path(std::string const &prefix, std::string const &suffix);
 
 std::string current_dir();
 
-//todo why non-cons argument?!
-void make_full_path(std::string &path);
+std::string make_full_path(std::string const &path);
 
 std::string filename(std::string const &path);
 
@@ -71,4 +70,21 @@ std::string make_relative_path(std::string p, std::string base = current_dir());
 
 std::string MakeLaunchTimeDirName();
 
+class TmpFolderFixture
+{
+    std::string tmp_folder_;
+
+public:
+    TmpFolderFixture(std::string tmp_folder = "tmp") :
+            tmp_folder_(tmp_folder)
+    {
+        fs::make_dirs(tmp_folder_);
+    }
+
+    ~TmpFolderFixture()
+    {
+        fs::remove_dir(tmp_folder_);
+    }
+};
+
 }
diff --git a/src/common/utils/indices/editable_index.hpp b/src/common/utils/indices/editable_index.hpp
deleted file mode 100644
index 60b629e..0000000
--- a/src/common/utils/indices/editable_index.hpp
+++ /dev/null
@@ -1,270 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#pragma once
-
-#include "perfect_hash_map.hpp.hpp"
-
-namespace debruijn_graph {
-
-//template<class ValueType, class traits>
-//class EditableDeBruijnKMerIndex: public DeBruijnKMerIndex<ValueType, traits> {
-//public:
-//  typedef size_t KMerIdx;
-//private:
-//    typedef typename traits::SeqType KMer;
-//    typedef KMerIndex<traits>  KMerIndexT;
-//    typedef ValueType KMerIndexValueType;
-//    typedef std::vector<KMerIndexValueType> KMerIndexStorageType;
-//    typedef boost::bimap<KMer, size_t> KMerPushBackIndexType;
-//
-//    KMerPushBackIndexType push_back_index_;
-//    KMerIndexStorageType push_back_buffer_;
-//
-//    using DeBruijnKMerIndex<ValueType, traits>::index_;
-//    using DeBruijnKMerIndex<ValueType, traits>::data_;
-//    using DeBruijnKMerIndex<ValueType, traits>::kmers;
-//    using DeBruijnKMerIndex<ValueType, traits>::K_;
-//    using DeBruijnKMerIndex<ValueType, traits>::InvalidKMerIdx;
-//public:
-//  EditableDeBruijnKMerIndex(unsigned K, const std::string &workdir) :
-//          DeBruijnKMerIndex<ValueType, traits>(K, workdir) {
-//  }
-//
-//  KMerIdx seq_idx(const KMer &s) const {
-//      KMerIdx idx = index_.seq_idx(s);
-//
-//      // First, check whether we're insert index itself.
-//      if (contains(idx, s, /* check push back */false))
-//          return idx;
-//
-//      // Maybe we're inside push_back buffer then?
-//      auto it = push_back_index_.left.find(s);
-//      if (it != push_back_index_.left.end())
-//          return data_.size() + it->second;
-//
-//      return InvalidKMerIdx;
-//  }
-//
-//  KMerIndexValueType &operator[](const KMer &s) {
-//      return operator[](index_.seq_idx(s));
-//  }
-//
-//  const KMerIndexValueType &operator[](const KMer &s) const {
-//      return operator[](index_.seq_idx(s));
-//  }
-//
-//
-//  const KMerIndexValueType &operator[](KMerIdx idx) const {
-//      if (idx < this->data_.size())
-//          return this->data_[idx];
-//      return push_back_buffer_[idx - this->data_.size()];
-//  }
-//
-//  KMerIndexValueType &operator[](KMerIdx idx) {
-//      if (idx < this->data_.size())
-//          return this->data_[idx];
-//
-//      return push_back_buffer_[idx - this->data_.size()];
-//  }
-//
-//  size_t size() const {
-//      return this->data_.size() + push_back_buffer_.size();
-//  }
-//
-//  bool contains(const KMer &k) const {
-//      KMerIdx idx = seq_idx(k);
-//
-//      return idx != InvalidKMerIdx;
-//  }
-//  bool contains(KMerIdx idx) const {
-//      return idx < size();
-//  }
-//
-//  size_t insert(const KMer &s, const KMerIndexValueType &value) {
-//      size_t idx = push_back_buffer_.size();
-//      push_back_index_.insert(
-//              typename KMerPushBackIndexType::value_type(s, idx));
-//      push_back_buffer_.push_back(value);
-//
-//      return idx;
-//  }
-//
-//  KMer kmer(KMerIdx idx) const {
-//      VERIFY(contains(idx));
-//
-//      if (idx < this->data_.size()) {
-//          auto it = kmers->begin() + idx;
-//          return (typename traits::raw_create()(K_, *it));
-//      }
-//
-//      idx -= this->data_.size();
-//      return push_back_index_.right.find(idx)->second;
-//  }
-//
-//  template<class Writer>
-//  void BinWrite(Writer &writer) const {
-//      index_.serialize(writer);
-//      size_t sz = this->data_.size();
-//      writer.write((char*) &sz, sizeof(sz));
-//      writer.write((char*) &this->data_[0], sz * sizeof(data_[0]));
-//      sz = push_back_buffer_.size();
-//      writer.write((char*) &sz, sizeof(sz));
-//      writer.write((char*) &push_back_buffer_[0],
-//                     sz * sizeof(push_back_buffer_[0]));
-//      for (auto it = push_back_index_.left.begin(), e =
-//                       push_back_index_.left.end(); it != e; ++it) {
-//          size_t idx = it->second;
-//          KMer::BinWrite(writer, it->first);
-//          writer.write((char*) &idx, sizeof(idx));
-//          sz -= 0;
-//      }
-//        VERIFY(sz == 0);
-//      traits::raw_serialize(writer, kmers);
-//  }
-//
-//  template<class Reader>
-//  void BinRead(Reader &reader, const std::string &FileName) {
-//      clear();
-//      index_.deserialize(reader);
-//      size_t sz = 0;
-//      reader.read((char*) &sz, sizeof(sz));
-//      data_.resize(sz);
-//      reader.read((char*) &data_[0], sz * sizeof(data_[0]));
-//      reader.read((char*) &sz, sizeof(sz));
-//      push_back_buffer_.resize(sz);
-//      reader.read((char*) &push_back_buffer_[0],
-//                    sz * sizeof(push_back_buffer_[0]));
-//      for (size_t i = 0; i < sz; ++i) {
-//          KMer s(K_);
-//          size_t idx;
-//
-//          s.BinRead(reader);
-//          reader.read((char*) &idx, sizeof(idx));
-//
-//          push_back_index_.insert(
-//                  typename KMerPushBackIndexType::value_type(s, idx));
-//      }
-//
-//      kmers = traits::raw_deserialize(reader, FileName);
-//  }
-//
-//  void clear() {
-//      index_.clear();
-//      this->data_.clear();
-//      KMerIndexStorageType().swap(data_);
-//      push_back_index_.clear();
-//      push_back_buffer_.clear();
-//      delete kmers;
-//      kmers = NULL;
-//  }
-//
-//protected:
-//  bool contains(KMerIdx idx, const KMer &k,
-//                  bool check_push_back = true) const {
-//      // Sanity check
-//      if (idx == InvalidKMerIdx || idx >= size())
-//          return false;
-//
-//      if (idx < data_.size()) {
-//          auto it = kmers->begin() + idx;
-//          return (typename traits::raw_equal_to()(k, *it));
-//      }
-//
-//      if (check_push_back) {
-//          auto it = push_back_index_.right.find(idx - data_.size());
-//          return (it != push_back_index_.right.end() && it->second == k);
-//      }
-//
-//      return false;
-//  }
-//
-//};
-
-//template <class kmer_index_traits>
-//class EditableDeBruijnKMerIndexBuilder {
-// public:
-//  template <class IdType, class Read>
-//  size_t BuildIndexFromStream(EditableDeBruijnKMerIndex<IdType, kmer_index_traits> &index,
-//                              io::ReadStreamVector<io::IReader<Read> > &streams,
-//                              SingleReadStream* contigs_stream = 0) const;
-//
-//  template <class IdType, class Graph>
-//  void BuildIndexFromGraph(EditableDeBruijnKMerIndex<IdType, kmer_index_traits> &index,
-//                           const Graph &g) const;
-//
-// protected:
-//  template <class KMerCounter, class Index>
-//  void SortUniqueKMers(KMerCounter &counter, Index &index) const;
-//
-// protected:
-//  DECL_LOGGER("K-mer Index Building");
-//};
-
-//template <>
-//class EditableDeBruijnKMerIndexBuilder<kmer_index_traits<RtSeq>> {
-// public:
-//  template <class IdType, class Read>
-//  size_t BuildIndexFromStream(EditableDeBruijnKMerIndex<IdType, kmer_index_traits<RtSeq>> &index,
-//                              io::ReadStreamVector<io::IReader<Read> > &streams,
-//                              SingleReadStream* contigs_stream = 0) const {
-//    DeBruijnReadKMerSplitter<Read> splitter(index.workdir(),
-//                                            index.K(), 0,
-//                                            streams, contigs_stream);
-//    KMerDiskCounter<RtSeq> counter(index.workdir(), splitter);
-//    KMerIndexBuilder<typename DeBruijnKMerIndex<IdType, kmer_index_traits<RtSeq>>::KMerIndexT> builder(index.workdir(), 16, streams.size());
-//    size_t sz = builder.BuildIndex(index.index_, counter, /* save final */ true);
-//    index.data_.resize(sz);
-//
-//    if (!index.kmers)
-//      index.kmers = counter.GetFinalKMers();
-//
-//    SortUniqueKMers(counter, index);
-//
-//    return 0;
-//  }
-//
-//  template <class IdType, class Graph>
-//  void BuildIndexFromGraph(EditableDeBruijnKMerIndex<IdType, RtSeq> &index,
-//                           const Graph &g) const {
-//    DeBruijnGraphKMerSplitter<Graph> splitter(index.workdir(), index.K(), g);
-//    KMerDiskCounter<RtSeq> counter(index.workdir(), splitter);
-//    KMerIndexBuilder<typename DeBruijnKMerIndex<typename Graph::EdgeId, kmer_index_traits<RtSeq>>::KMerIndexT> builder(index.workdir(), 16, 1);
-//    size_t sz = builder.BuildIndex(index.index_, counter, /* save final */ true);
-//    index.data_.resize(sz);
-//
-//    if (!index.kmers)
-//      index.kmers = counter.GetFinalKMers();
-//
-//    SortUniqueKMers(counter, index);
-//  }
-//
-// protected:
-//  template <class KMerCounter, class Index>
-//  void SortUniqueKMers(KMerCounter &counter, Index &index) const {
-//    size_t swaps = 0;
-//    INFO("Arranging kmers in hash map order");
-//    for (auto I = index.kmers->begin(), E = index.kmers->end(); I != E; ++I) {
-//      size_t cidx = I - index.kmers->begin();
-//      size_t kidx = index.raw_seq_idx(*I);
-//      while (cidx != kidx) {
-//        auto J = index.kmers->begin() + kidx;
-//        using std::swap;
-//        swap(*I, *J);
-//        swaps += 1;
-//
-//        kidx = index.raw_seq_idx(*I);
-//      }
-//    }
-//    INFO("Done. Total swaps: " << swaps);
-//  }
-//
-// protected:
-//  DECL_LOGGER("K-mer Index Building");
-//};
-
-}
diff --git a/src/common/utils/indices/kmer_splitters.hpp b/src/common/utils/indices/kmer_splitters.hpp
deleted file mode 100644
index 4f3b087..0000000
--- a/src/common/utils/indices/kmer_splitters.hpp
+++ /dev/null
@@ -1,317 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#pragma once
-
-#include "io/reads/io_helper.hpp"
-#include "storing_traits.hpp"
-
-#include "utils/file_limit.hpp"
-#include "utils/mph_index/kmer_index_builder.hpp"
-
-namespace debruijn_graph {
-
-template<class StoringType>
-struct StoringTypeFilter {
-};
-
-template<>
-struct StoringTypeFilter<SimpleStoring> {
-    template<class Kmer>
-    bool filter(const Kmer &/*kmer*/) const {
-        return true;
-    }
-};
-
-template<>
-struct StoringTypeFilter<InvertableStoring> {
-    template<class Kmer>
-    bool filter(const Kmer &kmer) const {
-        return kmer.IsMinimal();
-    }
-};
-
-using RtSeqKMerSplitter = ::KMerSortingSplitter<RtSeq>;
-
-template<class KmerFilter>
-class DeBruijnKMerSplitter : public RtSeqKMerSplitter {
- private:
-  KmerFilter kmer_filter_;
- protected:
-  size_t read_buffer_size_;
- protected:
-  bool FillBufferFromSequence(const Sequence &seq,
-                              unsigned thread_id) {
-      if (seq.size() < this->K_)
-        return false;
-
-      RtSeq kmer = seq.start<RtSeq>(this->K_) >> 'A';
-      bool stop = false;
-      for (size_t j = this->K_ - 1; j < seq.size(); ++j) {
-        kmer <<= seq[j];
-        if (!kmer_filter_.filter(kmer))
-          continue;
-
-        stop |= this->push_back_internal(kmer, thread_id);
-      }
-      
-      return stop;
-  }
-
- public:
-  DeBruijnKMerSplitter(const std::string &work_dir,
-                       unsigned K, KmerFilter kmer_filter, size_t read_buffer_size = 0, uint32_t seed = 0)
-      : RtSeqKMerSplitter(work_dir, K, seed), kmer_filter_(kmer_filter), read_buffer_size_(read_buffer_size) {
-  }
- protected:
-  DECL_LOGGER("DeBruijnKMerSplitter");
-};
-
-struct ReadStatistics {
-  size_t reads_;
-  size_t max_read_length_;
-  size_t bases_;
-};
-
-template<class Read, class KmerFilter>
-class DeBruijnReadKMerSplitter : public DeBruijnKMerSplitter<KmerFilter> {
-  io::ReadStreamList<Read> &streams_;
-  io::SingleStream *contigs_;
-
-  template<class ReadStream>
-  ReadStatistics
-  FillBufferFromStream(ReadStream& stream, unsigned thread_id);
-
-  ReadStatistics rs_;
-
- public:
-  DeBruijnReadKMerSplitter(const std::string &work_dir,
-                           unsigned K, uint32_t seed,
-                           io::ReadStreamList<Read>& streams,
-                           io::SingleStream* contigs_stream = 0,
-                           size_t read_buffer_size = 0)
-      : DeBruijnKMerSplitter<KmerFilter>(work_dir, K, KmerFilter(), read_buffer_size, seed),
-      streams_(streams), contigs_(contigs_stream), rs_({0 ,0 ,0}) {}
-
-  path::files_t Split(size_t num_files) override;
-
-  size_t read_length() const { return rs_.max_read_length_; }
-  ReadStatistics stats() const { return rs_; }
-};
-
-template<class Read, class KmerFilter> template<class ReadStream>
-ReadStatistics
-DeBruijnReadKMerSplitter<Read, KmerFilter>::FillBufferFromStream(ReadStream &stream,
-                                                                 unsigned thread_id) {
-  typename ReadStream::ReadT r;
-  size_t reads = 0, rl = 0, bases = 0;
-
-  while (!stream.eof()) {
-    stream >> r;
-    rl = std::max(rl, r.size());
-    reads += 1;
-    bases += r.size();
-
-    if (this->FillBufferFromSequence(r.sequence(), thread_id))
-      break;
-  }
-  return { reads, rl, bases };
-}
-
-template<class Read, class KmerFilter>
-path::files_t DeBruijnReadKMerSplitter<Read, KmerFilter>::Split(size_t num_files) {
-  unsigned nthreads = (unsigned) streams_.size();
-
-  INFO("Splitting kmer instances into " << num_files << " buckets. This might take a while.");
-  path::files_t out = this->PrepareBuffers(num_files, nthreads, this->read_buffer_size_);
-
-  size_t counter = 0, rl = 0, bases = 0, n = 15;
-  streams_.reset();
-  while (!streams_.eof()) {
-#   pragma omp parallel for num_threads(nthreads) reduction(+ : counter) reduction(+ : bases) shared(rl)
-    for (unsigned i = 0; i < nthreads; ++i) {
-      ReadStatistics stats = FillBufferFromStream(streams_[i], i);
-      counter += stats.reads_;
-      bases += stats.bases_;
-
-      // There is no max reduction in C/C++ OpenMP... Only in FORTRAN :(
-#     pragma omp flush(rl)
-      if (stats.max_read_length_ > rl)
-#     pragma omp critical
-      {
-        rl = std::max(rl, stats.max_read_length_);
-      }
-    }
-
-    this->DumpBuffers(out);
-
-    if (counter >> n) {
-      INFO("Processed " << counter << " reads");
-      n += 1;
-    }
-  }
-
-  if (contigs_) {
-    INFO("Adding contigs from previous K");
-    unsigned cnt = 0;
-    contigs_->reset();
-    while (!contigs_->eof()) {
-      FillBufferFromStream(*contigs_, cnt);
-      this->DumpBuffers(out);
-      if (++cnt >= nthreads)
-        cnt = 0;
-    }
-  }
-
-  this->ClearBuffers();
-
-  INFO("Used " << counter << " reads. Maximum read length " << rl);
-  INFO("Average read length " << double(bases) / double(counter));
-  rs_ = { counter, rl, bases };
-
-  return out;
-}
-
-template<class Graph, class KmerFilter>
-class DeBruijnGraphKMerSplitter : public DeBruijnKMerSplitter<KmerFilter> {
-  typedef typename Graph::ConstEdgeIt EdgeIt;
-  typedef typename Graph::EdgeId EdgeId;
-
-  const Graph &g_;
-
-  size_t FillBufferFromEdges(EdgeIt &edge, unsigned thread_id);
-
- public:
-  DeBruijnGraphKMerSplitter(const std::string &work_dir,
-                            unsigned K, const Graph &g, size_t read_buffer_size = 0)
-      : DeBruijnKMerSplitter<KmerFilter>(work_dir, K, KmerFilter(), read_buffer_size), g_(g) {}
-
-  path::files_t Split(size_t num_files) override;
-};
-
-template<class Graph, class KmerFilter>
-size_t
-DeBruijnGraphKMerSplitter<Graph, KmerFilter>::FillBufferFromEdges(EdgeIt &edge,
-                                                                  unsigned thread_id) {
-  size_t seqs = 0;
-  for (; !edge.IsEnd(); ++edge) {
-    const Sequence &nucls = g_.EdgeNucls(*edge);
-
-    seqs += 1;
-    if (this->FillBufferFromSequence(nucls, thread_id))
-      break;
-  }
-
-  return seqs;
-}
-
-template<class Graph, class KmerFilter>
-path::files_t DeBruijnGraphKMerSplitter<Graph, KmerFilter>::Split(size_t num_files) {
-  INFO("Splitting kmer instances into " << num_files << " buckets. This might take a while.");
-
-  path::files_t out = this->PrepareBuffers(num_files, 1, this->read_buffer_size_);
-
-  size_t counter = 0, n = 10;
-  for (auto it = g_.ConstEdgeBegin(); !it.IsEnd(); ) {
-    counter += FillBufferFromEdges(it, 0);
-
-    this->DumpBuffers(out);
-
-    if (counter >> n) {
-      INFO("Processed " << counter << " edges");
-      n += 1;
-    }
-  }
-
-  INFO("Used " << counter << " sequences.");
-
-  this->ClearBuffers();
-  
-  return out;
-}
-
-
-template<class KmerFilter>
-class DeBruijnKMerKMerSplitter : public DeBruijnKMerSplitter<KmerFilter> {
-  typedef MMappedFileRecordArrayIterator<RtSeq::DataType> kmer_iterator;
-
-  unsigned K_source_;
-  std::vector<std::string> kmers_;
-  bool add_rc_;
-
-  size_t FillBufferFromKMers(kmer_iterator &kmer,
-                             unsigned thread_id);
-
- public:
-  DeBruijnKMerKMerSplitter(const std::string &work_dir,
-                           unsigned K_target, unsigned K_source, bool add_rc, size_t read_buffer_size = 0)
-      : DeBruijnKMerSplitter<KmerFilter>(work_dir, K_target, KmerFilter(), read_buffer_size),
-        K_source_(K_source), add_rc_(add_rc) {}
-
-  void AddKMers(const std::string &file) {
-    kmers_.push_back(file);
-  }
-
-  path::files_t Split(size_t num_files) override;
-};
-
-template<class KmerFilter>
-inline size_t DeBruijnKMerKMerSplitter<KmerFilter>::FillBufferFromKMers(kmer_iterator &kmer,
-                                                                        unsigned thread_id) {
-  size_t seqs = 0;
-  for (; kmer.good(); ++kmer) {
-    Sequence nucls(RtSeq(K_source_, *kmer));
-    seqs += 1;
-
-    bool stop = this->FillBufferFromSequence(nucls, thread_id);
-    if (add_rc_)
-      stop |= this->FillBufferFromSequence(!nucls, thread_id);
-
-    if (stop)
-      break;
-  }
-
-  return seqs;
-}
-
-template<class KmerFilter>
-path::files_t DeBruijnKMerKMerSplitter<KmerFilter>::Split(size_t num_files) {
-  unsigned nthreads = (unsigned) kmers_.size();
-
-  INFO("Splitting kmer instances into " << num_files << " buckets. This might take a while.");
-
-  path::files_t out = this->PrepareBuffers(num_files, nthreads, this->read_buffer_size_);
-
-  size_t counter = 0, n = 10;
-  std::vector<kmer_iterator> its;
-  its.reserve(nthreads);
-  for (auto it = kmers_.begin(), et = kmers_.end(); it != et; ++it)
-    its.emplace_back(*it, RtSeq::GetDataSize(K_source_));
-
-  while (std::any_of(its.begin(), its.end(),
-                     [](const kmer_iterator &it) { return it.good(); })) {
-#   pragma omp parallel for num_threads(nthreads) reduction(+ : counter)
-    for (unsigned i = 0; i < nthreads; ++i)
-      counter += FillBufferFromKMers(its[i], i);
-
-    this->DumpBuffers(out);
-
-    if (counter >> n) {
-      INFO("Processed " << counter << " kmers");
-      n += 1;
-    }
-  }
-
-  INFO("Used " << counter << " kmers.");
-
-  this->ClearBuffers();
-  
-  return out;
-}
-
-
-}
diff --git a/configs/debruijn/simplification.info.template b/src/common/utils/kmer_mph/1.cpp
similarity index 100%
copy from configs/debruijn/simplification.info.template
copy to src/common/utils/kmer_mph/1.cpp
diff --git a/src/common/utils/mph_index/CMakeLists.txt b/src/common/utils/kmer_mph/CMakeLists.txt
similarity index 88%
rename from src/common/utils/mph_index/CMakeLists.txt
rename to src/common/utils/kmer_mph/CMakeLists.txt
index cf07729..9e70962 100644
--- a/src/common/utils/mph_index/CMakeLists.txt
+++ b/src/common/utils/kmer_mph/CMakeLists.txt
@@ -7,7 +7,7 @@
 
 project(mph_index CXX)
 
-add_library(mph_index STATIC bitpair_vector.cpp)
+add_library(mph_index STATIC 1.cpp)
 
 target_link_libraries(mph_index cityhash)
 
diff --git a/src/common/utils/mph_index/kmer_index.hpp b/src/common/utils/kmer_mph/kmer_index.hpp
similarity index 80%
rename from src/common/utils/mph_index/kmer_index.hpp
rename to src/common/utils/kmer_mph/kmer_index.hpp
index 28b429d..8227f03 100644
--- a/src/common/utils/mph_index/kmer_index.hpp
+++ b/src/common/utils/kmer_mph/kmer_index.hpp
@@ -6,20 +6,22 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "mphf.hpp"
-#include "base_hash.hpp"
-
 #include "kmer_index_traits.hpp"
 
+#include <boomphf/BooPHF.h>
+#include <city/city.h>
+
 #include <vector>
 #include <cmath>
 
+namespace utils {
+
 template<class Index>
 class KMerIndexBuilder;
 
 template<class traits>
 class KMerIndex {
- public:
+public:
   typedef traits kmer_index_traits;
   typedef typename traits::SeqType          KMerSeq;
   typedef typename traits::hash_function    hash_function;
@@ -27,11 +29,19 @@ class KMerIndex {
   typedef typename traits::KMerRawReference KMerRawReference;
   typedef size_t IdxType;
 
- private:
-  using KMerDataIndex = emphf::mphf<emphf::city_hasher>;
+private:
+  struct hash_function128 {
+    std::pair<uint64_t, uint64_t> operator()(const KMerSeq &k) const{
+      return CityHash128((const char *)k.data(), k.data_size() * sizeof(typename KMerSeq::DataType));
+    }
+    std::pair<uint64_t, uint64_t> operator()(const KMerRawReference k) const {
+      return CityHash128((const char *)k.data(), k.size() * sizeof(typename KMerSeq::DataType));
+    }
+  };
   typedef KMerIndex __self;
+  typedef boomphf::mphf<hash_function128> KMerDataIndex;
 
- public:
+public:
   KMerIndex(): index_(NULL), num_buckets_(0), size_(0) {}
 
   KMerIndex(const KMerIndex&) = delete;
@@ -60,7 +70,7 @@ class KMerIndex {
           return;
       size_ = 0;
       for (size_t i = 0; i < num_buckets_; i++)
-          size_ += index_[i].size();
+        size_ += index_[i].size();
   }
 
   size_t size() const {
@@ -70,15 +80,13 @@ class KMerIndex {
   size_t seq_idx(const KMerSeq &s) const {
     size_t bucket = seq_bucket(s);
 
-    return bucket_starts_[bucket] +
-            index_[bucket].lookup(s, typename traits::KMerSeqAdaptor());
+    return bucket_starts_[bucket] + index_[bucket].lookup(s);
   }
 
   size_t raw_seq_idx(const KMerRawReference data) const {
     size_t bucket = raw_seq_bucket(data);
 
-    return bucket_starts_[bucket] +
-            index_[bucket].lookup(data, typename traits::KMerRawReferenceAdaptor());
+    return bucket_starts_[bucket] + index_[bucket].lookup(data);
   }
 
   template<class Writer>
@@ -127,3 +135,4 @@ class KMerIndex {
 
   friend class KMerIndexBuilder<__self>;
 };
+}
diff --git a/src/common/utils/mph_index/kmer_index_builder.hpp b/src/common/utils/kmer_mph/kmer_index_builder.hpp
similarity index 57%
rename from src/common/utils/mph_index/kmer_index_builder.hpp
rename to src/common/utils/kmer_mph/kmer_index_builder.hpp
index 1d72db1..847e4ab 100644
--- a/src/common/utils/mph_index/kmer_index_builder.hpp
+++ b/src/common/utils/kmer_mph/kmer_index_builder.hpp
@@ -10,24 +10,20 @@
 
 #include "io/kmers/mmapped_reader.hpp"
 #include "io/kmers/mmapped_writer.hpp"
-#include "common/adt/pointer_iterator.hpp"
 #include "common/adt/kmer_vector.hpp"
 
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include "utils/logger/logger.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 
-#include "utils/memory_limit.hpp"
-#include "utils/file_limit.hpp"
+#include "utils/perf/memory_limit.hpp"
+#include "utils/filesystem/file_limit.hpp"
 
 #include "adt/iterator_range.hpp"
 #include "adt/loser_tree.hpp"
 
-#include "mphf.hpp"
-#include "base_hash.hpp"
-#include "hypergraph.hpp"
-#include "hypergraph_sorter_seq.hpp"
+#include "boomphf/BooPHF.h"
 
 #include <libcxx/sort.hpp>
 
@@ -46,151 +42,10 @@
 #include <vector>
 #include <cmath>
 
-template<class Seq>
-class KMerSplitter {
- public:
-  typedef typename Seq::hash hash_function;
-
-  KMerSplitter(const std::string &work_dir, unsigned K, uint32_t seed = 0)
-      : work_dir_(work_dir), K_(K), seed_(seed) {}
-
-  virtual ~KMerSplitter() {}
-
-  virtual path::files_t Split(size_t num_files) = 0;
-
-  size_t kmer_size() const {
-    return Seq::GetDataSize(K_) * sizeof(typename Seq::DataType);
-  }
-
-  unsigned K() const { return K_; }
-
- protected:
-  const std::string &work_dir_;
-  hash_function hash_;
-  unsigned K_;
-  uint32_t seed_;
-
-  DECL_LOGGER("K-mer Splitting");
-};
-
-template<class Seq>
-class KMerSortingSplitter : public KMerSplitter<Seq> {
- public:
-  KMerSortingSplitter(const std::string &work_dir, unsigned K, uint32_t seed = 0)
-      : KMerSplitter<Seq>(work_dir, K, seed), cell_size_(0), num_files_(0) {}
-
- protected:
-  using SeqKMerVector = KMerVector<Seq>;
-  using KMerBuffer = std::vector<SeqKMerVector>;
-
-  std::vector<KMerBuffer> kmer_buffers_;
-  size_t cell_size_;
-  size_t num_files_;
-
-  path::files_t PrepareBuffers(size_t num_files, unsigned nthreads, size_t reads_buffer_size) {
-    num_files_ = num_files;
-    
-    // Determine the set of output files
-    path::files_t out;
-    for (unsigned i = 0; i < num_files_; ++i)
-      out.push_back(this->GetRawKMersFname(i));
-
-    size_t file_limit = num_files_ + 2*nthreads;
-    size_t res = limit_file(file_limit);
-    if (res < file_limit) {
-      WARN("Failed to setup necessary limit for number of open files. The process might crash later on.");
-      WARN("Do 'ulimit -n " << file_limit << "' in the console to overcome the limit");
-    }
-
-    if (reads_buffer_size == 0) {
-      reads_buffer_size = 536870912ull;
-      size_t mem_limit =  (size_t)((double)(get_free_memory()) / (nthreads * 3));
-      INFO("Memory available for splitting buffers: " << (double)mem_limit / 1024.0 / 1024.0 / 1024.0 << " Gb");
-      reads_buffer_size = std::min(reads_buffer_size, mem_limit);
-    }
-    cell_size_ = reads_buffer_size / (num_files_ * this->kmer_size());
-    // Set sane minimum cell size
-    if (cell_size_ < 16384)
-      cell_size_ = 16384;
-
-    INFO("Using cell size of " << cell_size_);
-    kmer_buffers_.resize(nthreads);
-    for (unsigned i = 0; i < nthreads; ++i) {
-      KMerBuffer &entry = kmer_buffers_[i];
-      entry.resize(num_files_, KMerVector<Seq>(this->K_, (size_t) (1.1 * (double) cell_size_)));
-    }
-
-    return out;
-  }
-  
-  bool push_back_internal(const Seq &seq, unsigned thread_id) {
-    KMerBuffer &entry = kmer_buffers_[thread_id];
-
-    size_t idx = this->GetFileNumForSeq(seq, (unsigned)num_files_);
-    entry[idx].push_back(seq);
-    return entry[idx].size() > cell_size_;
-  }
-  
-  void DumpBuffers(const path::files_t &ostreams) {
-    VERIFY(ostreams.size() == num_files_ && kmer_buffers_[0].size() == num_files_);
+#include "kmer_splitters.hpp"
 
-#   pragma omp parallel for
-    for (unsigned k = 0; k < num_files_; ++k) {
-      // Below k is thread id!
-      
-      size_t sz = 0;
-      for (size_t i = 0; i < kmer_buffers_.size(); ++i)
-        sz += kmer_buffers_[i][k].size();
-
-      KMerVector<Seq> SortBuffer(this->K_, sz);
-      for (auto & entry : kmer_buffers_) {
-        const auto &buffer = entry[k];
-        for (size_t j = 0; j < buffer.size(); ++j)
-          SortBuffer.push_back(buffer[j]);
-      }
-      libcxx::sort(SortBuffer.begin(), SortBuffer.end(), typename KMerVector<Seq>::less2_fast());
-      auto it = std::unique(SortBuffer.begin(), SortBuffer.end(), typename KMerVector<Seq>::equal_to());
-
-#     pragma omp critical
-      {
-        size_t cnt =  it - SortBuffer.begin();
-
-        // Write k-mers
-        FILE *f = fopen(ostreams[k].c_str(), "ab");
-        VERIFY_MSG(f, "Cannot open temporary file to write");
-        fwrite(SortBuffer.data(), SortBuffer.el_data_size(), cnt, f);
-        fclose(f);
-
-        // Write index
-        f = fopen((ostreams[k] + ".idx").c_str(), "ab");
-        VERIFY_MSG(f, "Cannot open temporary file to write");
-        fwrite(&cnt, sizeof(cnt), 1, f);
-        fclose(f);
-      }
-    }
-
-    for (auto & entry : kmer_buffers_)
-      for (auto & eentry : entry)
-        eentry.clear();
-  }
-
-  void ClearBuffers() {
-    for (auto & entry : kmer_buffers_)
-      for (auto & eentry : entry) {
-        eentry.clear();
-        eentry.shrink_to_fit();
-      }
-  }
-  
-  std::string GetRawKMersFname(unsigned suffix) const {
-    return path::append_path(this->work_dir_, "kmers.raw." + std::to_string(suffix));
-  }
+namespace utils {
 
-  unsigned GetFileNumForSeq(const Seq &s, unsigned total) const {
-    return (unsigned)(this->hash_(s, this->seed_) % total);
-  }
-
-};
 
 template<class Seq, class traits = kmer_index_traits<Seq> >
 class KMerCounter {
@@ -222,7 +77,7 @@ class KMerDiskCounter : public KMerCounter<Seq> {
 public:
   KMerDiskCounter(const std::string &work_dir, KMerSplitter<Seq> &splitter)
       : work_dir_(work_dir), splitter_(splitter) {
-    std::string prefix = path::append_path(work_dir, "kmers_XXXXXX");
+    std::string prefix = fs::append_path(work_dir, "kmers_XXXXXX");
     char *tempprefix = strcpy(new char[prefix.length() + 1], prefix.c_str());
     VERIFY_MSG(-1 != (fd_ = ::mkstemp(tempprefix)), "Cannot create temporary file");
     kmer_prefix_ = tempprefix;
@@ -245,9 +100,11 @@ public:
 
   size_t Count(unsigned num_buckets, unsigned num_threads) override {
     unsigned K = splitter_.K();
+    unsigned num_files = num_buckets * num_threads;
 
     // Split k-mers into buckets.
-    path::files_t raw_kmers = splitter_.Split(num_buckets * num_threads);
+    INFO("Splitting kmer instances into " << num_files << " buckets using " << num_threads << " threads. This might take a while.");
+    fs::files_t raw_kmers = splitter_.Split(num_files, num_threads);
 
     INFO("Starting k-mer counting.");
     size_t kmers = 0;
@@ -256,6 +113,10 @@ public:
       kmers += MergeKMers(raw_kmers[iFile], GetUniqueKMersFname(iFile), K);
     }
     INFO("K-mer counting done. There are " << kmers << " kmers in total. ");
+    if (!kmers) {
+      FATAL_ERROR("No kmers were extracted from reads. Check the read lengths and k-mer length settings");
+      exit(-1);
+    }
 
     INFO("Merging temporary buckets.");
     for (unsigned i = 0; i < num_buckets; ++i) {
@@ -333,13 +194,13 @@ private:
       for (size_t sz : index) {
         auto end = std::next(beg, sz);
         ranges.push_back(adt::make_range(beg, end));
-        VERIFY(std::is_sorted(beg, end, array_less<typename Seq::DataType>()));
+        VERIFY(std::is_sorted(beg, end, adt::array_less<typename Seq::DataType>()));
         beg = end;
       }
 
       // Construct tree on top entries of runs
       adt::loser_tree<decltype(beg),
-                      array_less<typename Seq::DataType>> tree(ranges);
+              adt::array_less<typename Seq::DataType>> tree(ranges);
 
       if (tree.empty()) {
         FILE *g = fopen(ofname.c_str(), "ab");
@@ -349,14 +210,14 @@ private:
       }
 
       // Write it down!
-      KMerVector<Seq> buf(K, 1024*1024);
+      adt::KMerVector<Seq> buf(K, 1024*1024);
       auto pval = tree.pop();
       size_t total = 0;
       while (!tree.empty()) {
           buf.clear();
           for (size_t cnt = 0; cnt < buf.capacity() && !tree.empty(); ) {
               auto cval = tree.pop();
-              if (!array_equal_to<typename Seq::DataType>()(pval, cval)) {
+              if (!adt::array_equal_to<typename Seq::DataType>()(pval, cval)) {
                   buf.push_back(pval);
                   pval = cval;
                   cnt += 1;
@@ -382,11 +243,11 @@ private:
       return total;
     } else {
       // Sort the stuff
-      libcxx::sort(ins.begin(), ins.end(), array_less<typename Seq::DataType>());
+      libcxx::sort(ins.begin(), ins.end(), adt::array_less<typename Seq::DataType>());
 
       // FIXME: Use something like parallel version of unique_copy but with explicit
       // resizing.
-      auto it = std::unique(ins.begin(), ins.end(), array_equal_to<typename Seq::DataType>());
+      auto it = std::unique(ins.begin(), ins.end(), adt::array_equal_to<typename Seq::DataType>());
 
       MMappedRecordArrayWriter<typename Seq::DataType> os(ofname, Seq::GetDataSize(K));
       os.resize(it - ins.begin());
@@ -436,40 +297,16 @@ size_t KMerIndexBuilder<Index>::BuildIndex(Index &index, KMerCounter<Seq> &count
 
   INFO("Building perfect hash indices");
 
-  // Index building requires up to 40 bytes per k-mer. Limit number of threads depending on the memory limit.
-  unsigned num_threads = num_threads_;
-# ifdef SPADES_USE_JEMALLOC
-  const size_t *cmem = 0;
-  size_t clen = sizeof(cmem);
-
-  je_mallctl("stats.cactive", &cmem, &clen, NULL, 0);
-  size_t bucket_size = (36 * kmers + kmers * counter.kmer_size()) / num_buckets_;
-  num_threads = std::min<unsigned>((unsigned) ((get_memory_limit() - *cmem) / bucket_size), num_threads);
-  if (num_threads < 1)
-    num_threads = 1;
-  if (num_threads < num_threads_)
-    WARN("Number of threads was limited down to " << num_threads << " in order to fit the memory limits during the index construction");
-# endif
-
-# pragma omp parallel for shared(index) num_threads(num_threads)
+# pragma omp parallel for shared(index) num_threads(num_threads_)
   for (unsigned iFile = 0; iFile < num_buckets_; ++iFile) {
     typename KMerIndex<kmer_index_traits>::KMerDataIndex &data_index = index.index_[iFile];
     auto bucket = counter.GetBucket(iFile, !save_final);
     size_t sz = bucket->end() - bucket->begin();
     index.bucket_starts_[iFile + 1] = sz;
-    typename kmer_index_traits::KMerRawReferenceAdaptor adaptor;
-    size_t max_nodes = (size_t(std::ceil(double(sz) * 1.23)) + 2) / 3 * 3;
-    if (max_nodes >= uint64_t(1) << 32) {
-        emphf::hypergraph_sorter_seq<emphf::hypergraph<uint64_t> > sorter;
-        typename KMerIndex<kmer_index_traits>::KMerDataIndex(sorter,
-                                                             sz, emphf::range(bucket->begin(), bucket->end()),
-                                                             adaptor).swap(data_index);
-    } else {
-        emphf::hypergraph_sorter_seq<emphf::hypergraph<uint32_t> > sorter;
-        typename KMerIndex<kmer_index_traits>::KMerDataIndex(sorter,
-                                                             sz, emphf::range(bucket->begin(), bucket->end()),
-                                                             adaptor).swap(data_index);
-    }
+
+    data_index = typename Index::KMerDataIndex(sz,
+                                               boomphf::range(bucket->begin(), bucket->end()),
+                                               1, 2.0, false, false);
   }
 
   // Finally, record the sizes of buckets.
@@ -484,3 +321,4 @@ size_t KMerIndexBuilder<Index>::BuildIndex(Index &index, KMerCounter<Seq> &count
   index.count_size();
   return kmers;
 }
+}
diff --git a/src/common/utils/mph_index/kmer_index_traits.hpp b/src/common/utils/kmer_mph/kmer_index_traits.hpp
similarity index 76%
rename from src/common/utils/mph_index/kmer_index_traits.hpp
rename to src/common/utils/kmer_mph/kmer_index_traits.hpp
index 4656720..ad2f524 100644
--- a/src/common/utils/mph_index/kmer_index_traits.hpp
+++ b/src/common/utils/kmer_mph/kmer_index_traits.hpp
@@ -6,7 +6,8 @@
 //***************************************************************************
 
 #include "io/kmers/mmapped_reader.hpp"
-#include "mphf.hpp"
+
+namespace utils {
 
 template<class Seq>
 struct kmer_index_traits {
@@ -21,7 +22,7 @@ struct kmer_index_traits {
 
   struct raw_equal_to {
     bool operator()(const Seq &lhs, const KMerRawReference rhs) {
-      return (array_equal_to<typename Seq::DataType>()(lhs.data(), lhs.data_size(), rhs));
+      return (adt::array_equal_to<typename Seq::DataType>()(lhs.data(), lhs.data_size(), rhs));
     }
   };
 
@@ -35,28 +36,14 @@ struct kmer_index_traits {
   };
 
   struct hash_function {
-    uint64_t operator()(const Seq &k) const{
-      return typename Seq::hash()(k);
+    uint64_t operator()(const Seq &k, uint64_t seed = 0) const{
+        return typename Seq::hash()(k, (uint32_t)seed);
     }
-    uint64_t operator()(const KMerRawReference k) const {
-      return typename Seq::hash()(k.data(), k.size());
+    uint64_t operator()(const KMerRawReference k, uint64_t seed = 0) const {
+        return typename Seq::hash()(k.data(), k.size(), (uint32_t)seed);
     }
   };
 
-  struct KMerRawReferenceAdaptor {
-      emphf::byte_range_t operator()(const KMerRawReference k) const {
-          const uint8_t * data = (const uint8_t*)k.data();
-          return std::make_pair(data, data + k.data_size());
-      }
-  };
-
-  struct KMerSeqAdaptor {
-      emphf::byte_range_t operator()(const Seq &k) const {
-          const uint8_t * data = (const uint8_t*)k.data();
-          return std::make_pair(data, data + k.data_size() * sizeof(typename Seq::DataType));
-      }
-  };
-
   template<class Writer>
   static void raw_serialize(Writer &writer, RawKMerStorage *data) {
     size_t sz = data->data_size(), elcnt = data->elcnt();
@@ -90,3 +77,4 @@ struct kmer_index_traits {
   }
 
 };
+}
diff --git a/src/common/utils/kmer_mph/kmer_splitters.hpp b/src/common/utils/kmer_mph/kmer_splitters.hpp
new file mode 100644
index 0000000..8f053e8
--- /dev/null
+++ b/src/common/utils/kmer_mph/kmer_splitters.hpp
@@ -0,0 +1,394 @@
+//***************************************************************************
+//* Copyright (c) 2015 Saint Petersburg State University
+//* Copyright (c) 2011-2014 Saint Petersburg Academic University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
+
+#pragma once
+
+#include "io/reads/io_helper.hpp"
+#include "utils/filesystem/file_limit.hpp"
+
+namespace utils {
+
+template<class Seq>
+class KMerSplitter {
+public:
+    typedef typename Seq::hash hash_function;
+
+    KMerSplitter(const std::string &work_dir, unsigned K, uint32_t seed = 0)
+            : work_dir_(work_dir), K_(K), seed_(seed) {}
+
+    virtual ~KMerSplitter() {}
+
+    virtual fs::files_t Split(size_t num_files, unsigned nthreads) = 0;
+
+    size_t kmer_size() const {
+        return Seq::GetDataSize(K_) * sizeof(typename Seq::DataType);
+    }
+
+    unsigned K() const { return K_; }
+
+protected:
+    const std::string &work_dir_;
+    hash_function hash_;
+    unsigned K_;
+    uint32_t seed_;
+
+    DECL_LOGGER("K-mer Splitting");
+};
+
+template<class Seq>
+class KMerSortingSplitter : public KMerSplitter<Seq> {
+public:
+    KMerSortingSplitter(const std::string &work_dir, unsigned K, uint32_t seed = 0)
+            : KMerSplitter<Seq>(work_dir, K, seed), cell_size_(0), num_files_(0) {}
+
+protected:
+    using SeqKMerVector = adt::KMerVector<Seq>;
+    using KMerBuffer = std::vector<SeqKMerVector>;
+
+    std::vector<KMerBuffer> kmer_buffers_;
+    size_t cell_size_;
+    size_t num_files_;
+
+    fs::files_t PrepareBuffers(size_t num_files, unsigned nthreads, size_t reads_buffer_size) {
+        num_files_ = num_files;
+
+        // Determine the set of output files
+        fs::files_t out;
+        for (unsigned i = 0; i < num_files_; ++i)
+            out.push_back(this->GetRawKMersFname(i));
+
+        size_t file_limit = num_files_ + 2*nthreads;
+        size_t res = limit_file(file_limit);
+        if (res < file_limit) {
+            WARN("Failed to setup necessary limit for number of open files. The process might crash later on.");
+            WARN("Do 'ulimit -n " << file_limit << "' in the console to overcome the limit");
+        }
+
+        if (reads_buffer_size == 0) {
+            reads_buffer_size = 536870912ull;
+            size_t mem_limit =  (size_t)((double)(utils::get_free_memory()) / (nthreads * 3));
+            INFO("Memory available for splitting buffers: " << (double)mem_limit / 1024.0 / 1024.0 / 1024.0 << " Gb");
+            reads_buffer_size = std::min(reads_buffer_size, mem_limit);
+        }
+        cell_size_ = reads_buffer_size / (num_files_ * this->kmer_size());
+        // Set sane minimum cell size
+        if (cell_size_ < 16384)
+            cell_size_ = 16384;
+
+        INFO("Using cell size of " << cell_size_);
+        kmer_buffers_.resize(nthreads);
+        for (unsigned i = 0; i < nthreads; ++i) {
+            KMerBuffer &entry = kmer_buffers_[i];
+            entry.resize(num_files_, adt::KMerVector<Seq>(this->K_, (size_t) (1.1 * (double) cell_size_)));
+        }
+
+        return out;
+    }
+
+    bool push_back_internal(const Seq &seq, unsigned thread_id) {
+        KMerBuffer &entry = kmer_buffers_[thread_id];
+
+        size_t idx = this->GetFileNumForSeq(seq, (unsigned)num_files_);
+        entry[idx].push_back(seq);
+        return entry[idx].size() > cell_size_;
+    }
+
+    void DumpBuffers(const fs::files_t &ostreams) {
+        VERIFY(ostreams.size() == num_files_ && kmer_buffers_[0].size() == num_files_);
+
+#   pragma omp parallel for
+        for (unsigned k = 0; k < num_files_; ++k) {
+            // Below k is thread id!
+
+            size_t sz = 0;
+            for (size_t i = 0; i < kmer_buffers_.size(); ++i)
+                sz += kmer_buffers_[i][k].size();
+
+            adt::KMerVector<Seq> SortBuffer(this->K_, sz);
+            for (auto & entry : kmer_buffers_) {
+                const auto &buffer = entry[k];
+                for (size_t j = 0; j < buffer.size(); ++j)
+                    SortBuffer.push_back(buffer[j]);
+            }
+            libcxx::sort(SortBuffer.begin(), SortBuffer.end(), typename adt::KMerVector<Seq>::less2_fast());
+            auto it = std::unique(SortBuffer.begin(), SortBuffer.end(), typename adt::KMerVector<Seq>::equal_to());
+
+#     pragma omp critical
+            {
+                size_t cnt =  it - SortBuffer.begin();
+
+                // Write k-mers
+                FILE *f = fopen(ostreams[k].c_str(), "ab");
+                VERIFY_MSG(f, "Cannot open temporary file to write");
+                fwrite(SortBuffer.data(), SortBuffer.el_data_size(), cnt, f);
+                fclose(f);
+
+                // Write index
+                f = fopen((ostreams[k] + ".idx").c_str(), "ab");
+                VERIFY_MSG(f, "Cannot open temporary file to write");
+                fwrite(&cnt, sizeof(cnt), 1, f);
+                fclose(f);
+            }
+        }
+
+        for (auto & entry : kmer_buffers_)
+            for (auto & eentry : entry)
+                eentry.clear();
+    }
+
+    void ClearBuffers() {
+        for (auto & entry : kmer_buffers_)
+            for (auto & eentry : entry) {
+                eentry.clear();
+                eentry.shrink_to_fit();
+            }
+    }
+
+    std::string GetRawKMersFname(unsigned suffix) const {
+        return fs::append_path(this->work_dir_, "kmers.raw." + std::to_string(suffix));
+    }
+
+    unsigned GetFileNumForSeq(const Seq &s, unsigned total) const {
+        return (unsigned)(this->hash_(s, this->seed_) % total);
+    }
+
+};
+
+using RtSeqKMerSplitter = KMerSortingSplitter<RtSeq>;
+
+template<class KmerFilter>
+class DeBruijnKMerSplitter : public RtSeqKMerSplitter {
+ private:
+  KmerFilter kmer_filter_;
+ protected:
+  size_t read_buffer_size_;
+ protected:
+  bool FillBufferFromSequence(const Sequence &seq,
+                              unsigned thread_id) {
+      if (seq.size() < this->K_)
+        return false;
+
+      RtSeq kmer = seq.start<RtSeq>(this->K_) >> 'A';
+      bool stop = false;
+      for (size_t j = this->K_ - 1; j < seq.size(); ++j) {
+        kmer <<= seq[j];
+        if (!kmer_filter_.filter(kmer))
+          continue;
+
+        stop |= this->push_back_internal(kmer, thread_id);
+      }
+
+      return stop;
+  }
+
+    bool FillBufferFromSequence(const RtSeq &seq,
+                                unsigned thread_id) {
+      if (seq.size() < this->K_)
+        return false;
+
+      RtSeq kmer = seq.start(this->K_) >> 'A';
+      bool stop = false;
+      for (size_t j = this->K_ - 1; j < seq.size(); ++j) {
+        kmer <<= seq[j];
+        if (!kmer_filter_.filter(kmer))
+          continue;
+
+        stop |= this->push_back_internal(kmer, thread_id);
+      }
+
+      return stop;
+  }
+
+ public:
+  DeBruijnKMerSplitter(const std::string &work_dir,
+                       unsigned K, KmerFilter kmer_filter, size_t read_buffer_size = 0, uint32_t seed = 0)
+      : RtSeqKMerSplitter(work_dir, K, seed), kmer_filter_(kmer_filter), read_buffer_size_(read_buffer_size) {
+  }
+ protected:
+  DECL_LOGGER("DeBruijnKMerSplitter");
+};
+
+struct ReadStatistics {
+  size_t reads_;
+  size_t max_read_length_;
+  size_t bases_;
+};
+
+template<class Read, class KmerFilter>
+class DeBruijnReadKMerSplitter : public DeBruijnKMerSplitter<KmerFilter> {
+  io::ReadStreamList<Read> &streams_;
+  io::SingleStream *contigs_;
+
+  template<class ReadStream>
+  ReadStatistics
+  FillBufferFromStream(ReadStream& stream, unsigned thread_id);
+
+  ReadStatistics rs_;
+
+ public:
+  DeBruijnReadKMerSplitter(const std::string &work_dir,
+                           unsigned K, uint32_t seed,
+                           io::ReadStreamList<Read>& streams,
+                           io::SingleStream* contigs_stream = 0,
+                           size_t read_buffer_size = 0)
+      : DeBruijnKMerSplitter<KmerFilter>(work_dir, K, KmerFilter(), read_buffer_size, seed),
+      streams_(streams), contigs_(contigs_stream), rs_({0 ,0 ,0}) {}
+
+  fs::files_t Split(size_t num_files, unsigned nthreads) override;
+
+  size_t read_length() const { return rs_.max_read_length_; }
+  ReadStatistics stats() const { return rs_; }
+};
+
+template<class Read, class KmerFilter> template<class ReadStream>
+ReadStatistics
+DeBruijnReadKMerSplitter<Read, KmerFilter>::FillBufferFromStream(ReadStream &stream,
+                                                                 unsigned thread_id) {
+  typename ReadStream::ReadT r;
+  size_t reads = 0, rl = 0, bases = 0;
+
+  while (!stream.eof()) {
+    stream >> r;
+    rl = std::max(rl, r.size());
+    reads += 1;
+    bases += r.size();
+
+    if (this->FillBufferFromSequence(r.sequence(), thread_id))
+      break;
+  }
+  return { reads, rl, bases };
+}
+
+template<class Read, class KmerFilter>
+fs::files_t DeBruijnReadKMerSplitter<Read, KmerFilter>::Split(size_t num_files, unsigned nthreads) {
+  fs::files_t out = this->PrepareBuffers(num_files, nthreads, this->read_buffer_size_);
+
+  size_t counter = 0, rl = 0, bases = 0, n = 15;
+  streams_.reset();
+  while (!streams_.eof()) {
+#   pragma omp parallel for num_threads(nthreads) reduction(+ : counter) reduction(+ : bases) shared(rl)
+    for (unsigned i = 0; i < (unsigned)streams_.size(); ++i) {
+      ReadStatistics stats = FillBufferFromStream(streams_[i], i);
+      counter += stats.reads_;
+      bases += stats.bases_;
+
+      // There is no max reduction in C/C++ OpenMP... Only in FORTRAN :(
+#     pragma omp flush(rl)
+      if (stats.max_read_length_ > rl)
+#     pragma omp critical
+      {
+        rl = std::max(rl, stats.max_read_length_);
+      }
+    }
+
+    this->DumpBuffers(out);
+
+    if (counter >> n) {
+      INFO("Processed " << counter << " reads");
+      n += 1;
+    }
+  }
+
+  if (contigs_) {
+    INFO("Adding contigs from previous K");
+    unsigned cnt = 0;
+    contigs_->reset();
+    while (!contigs_->eof()) {
+      FillBufferFromStream(*contigs_, cnt);
+      this->DumpBuffers(out);
+      if (++cnt >= nthreads)
+        cnt = 0;
+    }
+  }
+
+  this->ClearBuffers();
+
+  INFO("Used " << counter << " reads. Maximum read length " << rl);
+  INFO("Average read length " << double(bases) / double(counter));
+  rs_ = { counter, rl, bases };
+
+  return out;
+}
+
+template<class KmerFilter>
+class DeBruijnKMerKMerSplitter : public DeBruijnKMerSplitter<KmerFilter> {
+  typedef MMappedFileRecordArrayIterator<RtSeq::DataType> kmer_iterator;
+
+  unsigned K_source_;
+  std::vector<std::string> kmers_;
+  bool add_rc_;
+
+  size_t FillBufferFromKMers(kmer_iterator &kmer,
+                             unsigned thread_id);
+
+ public:
+  DeBruijnKMerKMerSplitter(const std::string &work_dir,
+                           unsigned K_target, unsigned K_source, bool add_rc, size_t read_buffer_size = 0)
+      : DeBruijnKMerSplitter<KmerFilter>(work_dir, K_target, KmerFilter(), read_buffer_size),
+        K_source_(K_source), add_rc_(add_rc) {}
+
+  void AddKMers(const std::string &file) {
+    kmers_.push_back(file);
+  }
+
+    fs::files_t Split(size_t num_files, unsigned nthreads) override;
+};
+
+template<class KmerFilter>
+inline size_t DeBruijnKMerKMerSplitter<KmerFilter>::FillBufferFromKMers(kmer_iterator &kmer,
+                                                                        unsigned thread_id) {
+  size_t seqs = 0;
+  for (; kmer.good(); ++kmer) {
+    RtSeq nucls(K_source_, *kmer);
+    seqs += 1;
+
+    bool stop = this->FillBufferFromSequence(nucls, thread_id);
+    if (add_rc_)
+      stop |= this->FillBufferFromSequence(!nucls, thread_id);
+
+    if (stop)
+      break;
+  }
+
+  return seqs;
+}
+
+template<class KmerFilter>
+fs::files_t DeBruijnKMerKMerSplitter<KmerFilter>::Split(size_t num_files, unsigned nthreads) {
+  unsigned nit = (unsigned) kmers_.size();
+
+  fs::files_t out = this->PrepareBuffers(num_files, nthreads, this->read_buffer_size_);
+
+  size_t counter = 0, n = 10;
+  std::vector<kmer_iterator> its;
+  its.reserve(nit);
+  for (auto it = kmers_.begin(), et = kmers_.end(); it != et; ++it)
+    its.emplace_back(*it, RtSeq::GetDataSize(K_source_));
+
+  while (std::any_of(its.begin(), its.end(),
+                     [](const kmer_iterator &it) { return it.good(); })) {
+#   pragma omp parallel for num_threads(nthreads) reduction(+ : counter)
+    for (unsigned i = 0; i < nit; ++i)
+      counter += FillBufferFromKMers(its[i], i);
+
+    this->DumpBuffers(out);
+
+    if (counter >> n) {
+      INFO("Processed " << counter << " kmers");
+      n += 1;
+    }
+  }
+
+  INFO("Used " << counter << " kmers.");
+
+  this->ClearBuffers();
+
+  return out;
+}
+
+
+}
diff --git a/src/common/utils/logger/log_writers.hpp b/src/common/utils/logger/log_writers.hpp
index 666c03f..587fdce 100644
--- a/src/common/utils/logger/log_writers.hpp
+++ b/src/common/utils/logger/log_writers.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 #include "logger.hpp"
 
 #include <iostream>
@@ -15,29 +15,43 @@
 #include "config.hpp"
 
 #include <iostream>
+#include <mutex>
 
 namespace logging {
 
 struct console_writer : public writer {
-#ifdef SPADES_USE_JEMALLOC
 
     void write_msg(double time, size_t cmem, size_t max_rss, level l, const char *file, size_t line_num,
                    const char *source, const char *msg) {
-        std::cout << fmt::format("{:14s} {:>5s} / {:<5s} {:6.6s} {:24.24s} ({:26.26s}:{:4d})   {:s}",
-                                 human_readable_time(time), human_readable_memory(cmem),
-                                 human_readable_memory(max_rss), logging::level_name(l),
-                                 source, path::filename(file), int(line_num), msg)
-        << std::endl;
+        if (cmem != -1ull)
+            std::cout << fmt::format("{:14s} {:>5s} / {:<5s} {:6.6s} {:24.24s} ({:26.26s}:{:4d})   {:s}",
+                                     utils::human_readable_time(time), utils::human_readable_memory(cmem),
+                                     utils::human_readable_memory(max_rss), logging::level_name(l),
+                                     source, fs::filename(file), int(line_num), msg)
+            << std::endl;
+        else
+            std::cout << fmt::format("{:14s} {:^5s} {:6.6s} {:24.24s} ({:26.26s}:{:4d})   {:s}",
+                                     utils::human_readable_time(time), utils::human_readable_memory(max_rss),
+                                     logging::level_name(l), source, fs::filename(file), int(line_num), msg)
+                      << std::endl;
+    }
+
+};
+
+class mutex_writer : public writer {
+    std::mutex writer_mutex_;
+    std::shared_ptr<writer> writer_;
+
+public:
+
+    mutex_writer(std::shared_ptr<writer> writer) : writer_(writer) {}
+
+    void write_msg(double time, size_t cmem, size_t max_rss, level l, const char *file, size_t line_num,
+                   const char *source, const char *msg) override {
+        std::lock_guard<std::mutex> guard(writer_mutex_);
+        writer_->write_msg(time, cmem, max_rss, l, file, line_num, source, msg);
     }
 
-#else
-void write_msg(double time, size_t max_rss, level l, const char* file, size_t line_num, const char* source, const char* msg) {
-  std::cout << fmt::format("{:14s} {:^5s} {:6.6s} {:24.24s} ({:26.26s}:{:4d})   {:s}",
-                           human_readable_time(time), human_readable_memory(max_rss), logging::level_name(l),
-                           source, path::filename(file), int(line_num), msg)
-            << std::endl;
-}
-#endif
 };
 
 } // logging
diff --git a/src/common/utils/logger/logger.hpp b/src/common/utils/logger/logger.hpp
index c088aed..a4c06a9 100644
--- a/src/common/utils/logger/logger.hpp
+++ b/src/common/utils/logger/logger.hpp
@@ -6,7 +6,7 @@
 //***************************************************************************
 
 #pragma once
-#include "utils/perfcounter.hpp"
+#include "utils/perf/perfcounter.hpp"
 
 #include <vector>
 #include <unordered_map>
@@ -47,11 +47,8 @@ inline std::string level_name(level l)
 /////////////////////////////////////////////////////
 struct writer
 {
-#ifdef SPADES_USE_JEMALLOC
   virtual void write_msg(double time_in_sec, size_t cmem, size_t max_rss, level l, const char* file, size_t line_num, const char* source, const char* msg) = 0;
-#else
-    virtual void write_msg(double time_in_sec, size_t max_rss, level l, const char* file, size_t line_num, const char* source, const char* msg) = 0;
-#endif
+
   virtual ~writer(){}
 };
 
@@ -97,7 +94,7 @@ struct logger
 private:
     properties                 props_  ;
     std::vector<writer_ptr>    writers_;
-    perf_counter            timer_  ;
+    utils::perf_counter            timer_  ;
 };
 
 std::shared_ptr<logger>& __logger();
diff --git a/src/common/utils/logger/logger_impl.cpp b/src/common/utils/logger/logger_impl.cpp
index 4b8ce6b..0c29937 100644
--- a/src/common/utils/logger/logger_impl.cpp
+++ b/src/common/utils/logger/logger_impl.cpp
@@ -97,28 +97,27 @@ bool logger::need_log(level desired_level, const char* source) const {
     return desired_level >= source_level;
 }
 
-#ifdef SPADES_USE_JEMALLOC
 
 void logger::log(level desired_level, const char* file, size_t line_num, const char* source, const char* msg) {
   double time = timer_.time();
+  size_t mem = -1ull;
+  size_t max_rss;
+
+#ifdef SPADES_USE_JEMALLOC
   const size_t *cmem = 0, *cmem_max = 0;
   size_t clen = sizeof(cmem);
 
   je_mallctl("stats.cactive", &cmem, &clen, NULL, 0);
   je_mallctl("stats.cactive_max", &cmem_max, &clen, NULL, 0);
-
-  for (auto it = writers_.begin(); it != writers_.end(); ++it)
-    (*it)->write_msg(time, (*cmem) / 1024, (*cmem_max) / 1024, desired_level, file, line_num, source, msg);
-}
+  mem = (*cmem) / 1024;
+  max_rss = (*cmem_max) / 1024;
 #else
-void logger::log(level desired_level, const char* file, size_t line_num, const char* source, const char* msg) {
-  double time = timer_.time();
-  size_t max_rss = get_max_rss();
+  max_rss = get_max_rss();
+#endif
 
   for (auto it = writers_.begin(); it != writers_.end(); ++it)
-    (*it)->write_msg(time, max_rss, desired_level, file, line_num, source, msg);
+    (*it)->write_msg(time, mem, max_rss, desired_level, file, line_num, source, msg);
 }
-#endif
 
 //
 void logger::add_writer(writer_ptr ptr)
diff --git a/src/common/utils/md5.h b/src/common/utils/md5.h
index 471dc5a..bc52933 100644
--- a/src/common/utils/md5.h
+++ b/src/common/utils/md5.h
@@ -39,6 +39,8 @@
 #include <stdio.h>
 #include <string.h>
 
+namespace utils {
+
 #pragma region MD5 defines
 // Constants for MD5Transform routine.
 #define S11 7
@@ -390,4 +392,6 @@ public:
   }
 } ;
 
+}
+
 #endif
diff --git a/src/common/utils/mph_index/base_hash.hpp b/src/common/utils/mph_index/base_hash.hpp
deleted file mode 100644
index f8482ca..0000000
--- a/src/common/utils/mph_index/base_hash.hpp
+++ /dev/null
@@ -1,293 +0,0 @@
-#pragma once
-
-#include <cstdint>
-#include <tuple>
-#include <algorithm>
-#include <cstring>
-#include <city/city.h>
-#include "common.hpp"
-
-namespace emphf {
-
-    inline uint64_t unaligned_load64(uint8_t const* from)
-    {
-        uint64_t tmp;
-        memcpy(reinterpret_cast<char*>(&tmp), from, 8);
-        // XXX(ot): reverse bytes in big-endian architectures
-        return tmp;
-    }
-
-
-    struct jenkins64_hasher {
-
-        typedef uint64_t seed_t;
-        typedef uint64_t hash_t;
-        typedef std::tuple<hash_t, hash_t, hash_t> hash_triple_t;
-
-        jenkins64_hasher()
-        {}
-
-        jenkins64_hasher(uint64_t seed)
-            : m_seed(seed)
-        {}
-
-        template <typename Rng>
-        static jenkins64_hasher generate(Rng& rng)
-        {
-            return jenkins64_hasher(rng());
-        }
-
-        // Adapted from http://www.burtleburtle.net/bob/c/lookup8.c
-        hash_triple_t operator()(byte_range_t s) const
-        {
-            using std::get;
-            hash_triple_t h(m_seed, m_seed, 0x9e3779b97f4a7c13ULL);
-
-            size_t len = (size_t)(s.second - s.first);
-            uint8_t const* cur = s.first;
-            uint8_t const* end = s.second;
-
-            while (end - cur >= 24) {
-                get<0>(h) += unaligned_load64(cur);
-                cur += 8;
-                get<1>(h) += unaligned_load64(cur);
-                cur += 8;
-                get<2>(h) += unaligned_load64(cur);
-                cur += 8;
-
-                mix(h);
-            }
-
-            get<2>(h) += len;
-
-            switch (end - cur) {
-            case 23: get<2>(h) += (uint64_t(cur[22]) << 56);
-            case 22: get<2>(h) += (uint64_t(cur[21]) << 48);
-            case 21: get<2>(h) += (uint64_t(cur[20]) << 40);
-            case 20: get<2>(h) += (uint64_t(cur[19]) << 32);
-            case 19: get<2>(h) += (uint64_t(cur[18]) << 24);
-            case 18: get<2>(h) += (uint64_t(cur[17]) << 16);
-            case 17: get<2>(h) += (uint64_t(cur[16]) << 8);
-                // the first byte of c is reserved for the length
-            case 16: get<1>(h) += (uint64_t(cur[15]) << 56);
-            case 15: get<1>(h) += (uint64_t(cur[14]) << 48);
-            case 14: get<1>(h) += (uint64_t(cur[13]) << 40);
-            case 13: get<1>(h) += (uint64_t(cur[12]) << 32);
-            case 12: get<1>(h) += (uint64_t(cur[11]) << 24);
-            case 11: get<1>(h) += (uint64_t(cur[10]) << 16);
-            case 10: get<1>(h) += (uint64_t(cur[ 9]) << 8);
-            case  9: get<1>(h) += (uint64_t(cur[ 8]));
-            case  8: get<0>(h) += (uint64_t(cur[ 7]) << 56);
-            case  7: get<0>(h) += (uint64_t(cur[ 6]) << 48);
-            case  6: get<0>(h) += (uint64_t(cur[ 5]) << 40);
-            case  5: get<0>(h) += (uint64_t(cur[ 4]) << 32);
-            case  4: get<0>(h) += (uint64_t(cur[ 3]) << 24);
-            case  3: get<0>(h) += (uint64_t(cur[ 2]) << 16);
-            case  2: get<0>(h) += (uint64_t(cur[ 1]) << 8);
-            case  1: get<0>(h) += (uint64_t(cur[ 0]));
-            case 0: break; // nothing to add
-            default: assert(false);
-            }
-
-            mix(h);
-
-            return h;
-        }
-
-        // rehash a hash triple
-        hash_triple_t operator()(hash_triple_t h) const
-        {
-            std::get<0>(h) += m_seed;
-            std::get<1>(h) += m_seed;
-            std::get<2>(h) += 0x9e3779b97f4a7c13ULL;
-
-            mix(h);
-
-            return h;
-        }
-
-        void swap(jenkins64_hasher& other)
-        {
-            std::swap(m_seed, other.m_seed);
-        }
-
-        void save(std::ostream& os) const
-        {
-            os.write(reinterpret_cast<char const*>(&m_seed), sizeof(m_seed));
-        }
-
-        void load(std::istream& is)
-        {
-            is.read(reinterpret_cast<char*>(&m_seed), sizeof(m_seed));
-        }
-
-        seed_t seed() const
-        {
-            return m_seed;
-        }
-
-    protected:
-
-        static void mix(hash_triple_t& h)
-        {
-            uint64_t& a = std::get<0>(h);
-            uint64_t& b = std::get<1>(h);
-            uint64_t& c = std::get<2>(h);
-
-            a -= b; a -= c; a ^= (c >> 43);
-            b -= c; b -= a; b ^= (a << 9);
-            c -= a; c -= b; c ^= (b >> 8);
-            a -= b; a -= c; a ^= (c >> 38);
-            b -= c; b -= a; b ^= (a << 23);
-            c -= a; c -= b; c ^= (b >> 5);
-            a -= b; a -= c; a ^= (c >> 35);
-            b -= c; b -= a; b ^= (a << 49);
-            c -= a; c -= b; c ^= (b >> 11);
-            a -= b; a -= c; a ^= (c >> 12);
-            b -= c; b -= a; b ^= (a << 18);
-            c -= a; c -= b; c ^= (b >> 22);
-        }
-
-        seed_t m_seed;
-    };
-
-
-    // This is basically a wrapper to jenkins64_hasher that uses a
-    // 32-bit seed and returns 32-bit hashes by truncation
-    struct jenkins32_hasher {
-
-        typedef uint32_t seed_t;
-        typedef uint32_t hash_t;
-        typedef std::tuple<hash_t, hash_t, hash_t> hash_triple_t;
-
-        jenkins32_hasher()
-        {}
-
-        jenkins32_hasher(uint32_t seed)
-            : m_seed(seed)
-        {}
-
-        template <typename Rng>
-        static jenkins32_hasher generate(Rng& rng)
-        {
-            return jenkins32_hasher((uint32_t)rng());
-        }
-
-        hash_triple_t operator()(byte_range_t s) const
-        {
-            using std::get;
-            auto h64 = jenkins64_hasher(seed64())(s);
-            return hash_triple_t((uint32_t)get<0>(h64),
-                                 (uint32_t)get<1>(h64),
-                                 (uint32_t)get<2>(h64));
-        }
-
-        hash_triple_t operator()(hash_triple_t h) const
-        {
-            using std::get;
-            auto h64 = jenkins64_hasher::hash_triple_t(get<0>(h),
-                                                       get<1>(h),
-                                                       get<2>(h));
-            h64 = jenkins64_hasher(seed64())(h64);
-            return hash_triple_t((uint32_t)get<0>(h64),
-                                 (uint32_t)get<1>(h64),
-                                 (uint32_t)get<2>(h64));
-        }
-
-        void swap(jenkins32_hasher& other)
-        {
-            std::swap(m_seed, other.m_seed);
-        }
-
-        void save(std::ostream& os) const
-        {
-            os.write(reinterpret_cast<char const*>(&m_seed), sizeof(m_seed));
-        }
-
-        void load(std::istream& is)
-        {
-            is.read(reinterpret_cast<char*>(&m_seed), sizeof(m_seed));
-        }
-
-        seed_t seed() const
-        {
-            return m_seed;
-        }
-
-    protected:
-
-        uint64_t seed64() const
-        {
-            return (uint64_t(m_seed) << 32) | m_seed;
-        }
-
-        seed_t m_seed;
-
-    };
-
-
-    struct city_hasher {
-        typedef uint64_t seed_t;
-        typedef uint64_t hash_t;
-        typedef std::tuple<hash_t, hash_t, hash_t> hash_triple_t;
-
-        city_hasher()
-        {}
-
-        city_hasher(uint64_t seed)
-            : m_seed(seed)
-        {}
-
-        template <typename Rng>
-        static city_hasher generate(Rng& rng) {
-            return city_hasher(rng());
-        }
-
-        hash_triple_t operator()(byte_range_t s) const {
-            city_uint128 ch = CityHash128WithSeed((char*)s.first, s.second - s.first, {m_seed, 0x9e3779b97f4a7c13ULL});
-            hash_triple_t h(ch.first, 0x9e3779b97f4a7c13ULL, ch.second);
-            mix(h);
-            
-            return h;
-        }
-
-        void swap(city_hasher& other) {
-            std::swap(m_seed, other.m_seed);
-        }
-
-        void save(std::ostream& os) const {
-            os.write(reinterpret_cast<char const*>(&m_seed), sizeof(m_seed));
-        }
-
-        void load(std::istream& is) {
-            is.read(reinterpret_cast<char*>(&m_seed), sizeof(m_seed));
-        }
-
-        seed_t seed() const {
-            return m_seed;
-        }
-
-    protected:
-        seed_t m_seed;
-
-        static void mix(hash_triple_t& h) {
-            uint64_t& a = std::get<0>(h);
-            uint64_t& b = std::get<1>(h);
-            uint64_t& c = std::get<2>(h);
-
-            a -= b; a -= c; a ^= (c >> 43);
-            b -= c; b -= a; b ^= (a << 9);
-            c -= a; c -= b; c ^= (b >> 8);
-            a -= b; a -= c; a ^= (c >> 38);
-            b -= c; b -= a; b ^= (a << 23);
-            c -= a; c -= b; c ^= (b >> 5);
-            a -= b; a -= c; a ^= (c >> 35);
-            b -= c; b -= a; b ^= (a << 49);
-            c -= a; c -= b; c ^= (b >> 11);
-            a -= b; a -= c; a ^= (c >> 12);
-            b -= c; b -= a; b ^= (a << 18);
-            c -= a; c -= b; c ^= (b >> 22);
-        }
-    };
-
-}
diff --git a/src/common/utils/mph_index/bitpair_vector.cpp b/src/common/utils/mph_index/bitpair_vector.cpp
deleted file mode 100644
index de151bb..0000000
--- a/src/common/utils/mph_index/bitpair_vector.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//
-// Created by anton on 3/22/16.
-//
-
-#include "bitpair_vector.hpp"
-
-#include <iostream>
-
-void emphf::bitpair_vector::resize(uint64_t n) {
-    // can only grow, for now
-    assert(n >= size());
-    m_size = n;
-    m_bits.resize((m_size + 31) / 32);
-}
-
-size_t emphf::bitpair_vector::size() const {
-    return m_size;
-}
-
-size_t emphf::bitpair_vector::mem_size() const {
-    return m_bits.size() * sizeof(m_bits[0]);
-}
-
-uint64_t emphf::bitpair_vector::operator[](uint64_t pos) const {
-    return (m_bits[pos / 32] >> ((pos % 32) * 2)) % 4;
-}
-
-void emphf::bitpair_vector::set(uint64_t pos, uint64_t val) {
-    assert(val < 4);
-    uint64_t word_pos = pos / 32;
-    uint64_t word_offset = (pos % 32) * 2;
-    m_bits[word_pos] &= ~(3ULL << word_offset);
-    m_bits[word_pos] |= val << word_offset;
-}
-
-uint64_t emphf::bitpair_vector::range_nonzeros(uint64_t begin, uint64_t end) const {
-    assert(begin <= end);
-    assert(end <= size());
-
-    uint64_t word_begin = begin / 32;
-    uint64_t offset_begin = (begin % 32) * 2;
-    uint64_t word_end = end / 32;
-    uint64_t offset_end = (end % 32) * 2;
-    uint64_t r = 0;
-
-    uint64_t word = (m_bits[word_begin] >> offset_begin) << offset_begin;
-    for (uint64_t w = word_begin; w < word_end; ++w) {
-        r += nonzero_pairs(word);
-        word = m_bits[w + 1];
-    }
-
-    uint64_t mask = (uint64_t(1) << offset_end) - 1;
-    r += nonzero_pairs(word & mask);
-
-    return r;
-}
-
-void emphf::bitpair_vector::swap(bitpair_vector& other) {
-            std::swap(m_size, other.m_size);
-            m_bits.swap(other.m_bits);
-        }
-
-
-void emphf::bitpair_vector::save(std::ostream& os) const {
-            os.write(reinterpret_cast<char const*>(&m_size), sizeof(m_size));
-            os.write(reinterpret_cast<char const*>(m_bits.data()), (std::streamsize)(sizeof(m_bits[0]) * m_bits.size()));
-        }
-
-void emphf::bitpair_vector::load(std::istream& is) {
-            is.read(reinterpret_cast<char*>(&m_size), sizeof(m_size));
-            m_bits.resize((m_size + 31) / 32);
-            is.read(reinterpret_cast<char*>(m_bits.data()), (std::streamsize)(sizeof(m_bits[0]) * m_bits.size()));
-        }
-
-std::vector<uint64_t> const &emphf::bitpair_vector::data() const {
-    return m_bits;
-}
diff --git a/src/common/utils/mph_index/bitpair_vector.hpp b/src/common/utils/mph_index/bitpair_vector.hpp
deleted file mode 100644
index 0ecd88e..0000000
--- a/src/common/utils/mph_index/bitpair_vector.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include "common.hpp"
-#include <vector>
-
-namespace emphf {
-
-    class bitpair_vector {
-    public:
-        bitpair_vector(): m_size(0) {}
-        bitpair_vector(uint64_t n): m_size(0){resize(n);}
-        void resize(uint64_t n);
-        size_t size() const;
-        size_t mem_size() const;
-        uint64_t operator[](uint64_t pos) const;
-        void set(uint64_t pos, uint64_t val);
-        uint64_t range_nonzeros(uint64_t begin, uint64_t end) const;
-        void swap(bitpair_vector& other);
-        void save(std::ostream& os) const;
-        void load(std::istream& is);
-        std::vector<uint64_t> const & data() const;
-    protected:
-        std::vector<uint64_t> m_bits;
-        uint64_t m_size;
-    };
-
-}
diff --git a/src/common/utils/mph_index/common.hpp b/src/common/utils/mph_index/common.hpp
deleted file mode 100644
index b39e686..0000000
--- a/src/common/utils/mph_index/common.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <cstdint>
-#include <iterator>
-#include <memory>
-#include <cassert>
-
-#include "emphf_config.hpp"
-
-namespace emphf {
-
-    template <typename Iterator>
-    struct iter_range
-    {
-        iter_range(Iterator b, Iterator e)
-            : m_begin(b)
-            , m_end(e)
-        {}
-
-        Iterator begin() const
-        { return m_begin; }
-
-        Iterator end() const
-        { return m_end; }
-
-        Iterator m_begin, m_end;
-    };
-
-    typedef std::pair<uint8_t const*, uint8_t const*> byte_range_t;
-
-    struct identity_adaptor
-    {
-        byte_range_t operator()(byte_range_t s) const
-        {
-            return s;
-        }
-    };
-
-    template <typename Iterator>
-    iter_range<Iterator> range(Iterator begin, Iterator end)
-    {
-        return iter_range<Iterator>(begin, end);
-    }
-
-    inline uint64_t nonzero_pairs(uint64_t x)
-    {
-        static const uint64_t ones_step_4  = 0x1111111111111111ULL;
-        x = (x | (x >> 1)) & (0x5 * ones_step_4);
-
-#if EMPHF_USE_POPCOUNT
-        return (uint64_t)__builtin_popcountll(x);
-#else
-        static const uint64_t ones_step_8  = 0x0101010101010101ULL;
-        x = (x & 3 * ones_step_4) + ((x >> 2) & 3 * ones_step_4);
-        x = (x + (x >> 4)) & 0x0f * ones_step_8;
-        return (x * ones_step_8) >> 56;
-#endif
-    }
-
-    inline uint64_t msb(uint64_t x)
-    {
-        assert(x);
-        return 63 - __builtin_clzll(x);
-    }
-
-}
diff --git a/src/common/utils/mph_index/emphf_config.hpp b/src/common/utils/mph_index/emphf_config.hpp
deleted file mode 100644
index 9a131c0..0000000
--- a/src/common/utils/mph_index/emphf_config.hpp
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-#define EMPHF_USE_POPCOUNT 1
-#ifndef EMPHF_USE_POPCOUNT
-#    define EMPHF_USE_POPCOUNT 0
-#endif
diff --git a/src/common/utils/mph_index/hypergraph.hpp b/src/common/utils/mph_index/hypergraph.hpp
deleted file mode 100644
index 9436a98..0000000
--- a/src/common/utils/mph_index/hypergraph.hpp
+++ /dev/null
@@ -1,137 +0,0 @@
-#pragma once
-
-#include <tuple>
-
-namespace emphf {
-
-    template <typename NodeType>
-    struct hypergraph {
-
-        typedef NodeType node_t; // last value is used as sentinel
-
-        struct hyperedge {
-            // deliberately do not initialize, to avoid polluting the
-            // page cache when initializing large mmapped arrays
-            hyperedge()
-            {}
-
-            hyperedge(NodeType v0_, NodeType v1_, NodeType v2_)
-                : v0(v0_)
-                , v1(v1_)
-                , v2(v2_)
-            {}
-
-            friend inline
-            std::ostream& operator<<(std::ostream& os, hyperedge const& t)
-            {
-                os << "("
-                   << t.v0 << ", "
-                   << t.v1 << ", "
-                   << t.v2 << ")";
-                return os;
-            }
-
-            friend inline
-            bool operator<(hyperedge const& lhs, hyperedge const& rhs)
-            {
-                return
-                    std::make_tuple(lhs.v0, lhs.v1, lhs.v2) <
-                    std::make_tuple(rhs.v0, rhs.v1, rhs.v2);
-            }
-
-            friend inline
-            bool operator==(hyperedge const& lhs, hyperedge const& rhs)
-            {
-                return
-                    lhs.v0 == rhs.v0 &&
-                    lhs.v1 == rhs.v1 &&
-                    lhs.v2 == rhs.v2;
-            }
-
-            friend inline
-            bool operator!=(hyperedge const& lhs, hyperedge const& rhs)
-            {
-                return !(lhs == rhs);
-            }
-
-            NodeType v0, v1, v2;
-        };
-
-        static hyperedge sentinel()
-        {
-            return hyperedge(-node_t(1), -node_t(1), -node_t(1));
-        }
-
-        struct xored_adj_list {
-            xored_adj_list(node_t degree_= 0, node_t v1s_ = 0, node_t v2s_ = 0)
-                : degree(degree_)
-                , v1s(v1s_)
-                , v2s(v2s_)
-            {}
-
-            void add_edge(hyperedge const& edge)
-            {
-                degree += 1;
-                xor_edge(edge);
-            }
-
-            void delete_edge(hyperedge const& edge)
-            {
-                assert(degree >= 1);
-                degree -= 1;
-                xor_edge(edge);
-            }
-
-            hyperedge edge_from(node_t v0) const
-            {
-                assert(degree == 1);
-                return hyperedge(v0, v1s, v2s);
-            }
-
-            node_t degree;
-            node_t v1s;
-            node_t v2s;
-
-        private:
-
-            void xor_edge(hyperedge const& edge)
-            {
-                assert(edge.v1 < edge.v2);
-                v1s ^= edge.v1;
-                v2s ^= edge.v2;
-            }
-
-        };
-    };
-
-    // a brief note about hyperedge orientations: throughout the
-    // code we keep the invariant that for every hyperedge (v0,
-    // v1, v2) it holds v1 < v2. This leaves only three
-    // orientations, which we index with 0, 1, and 2 depending on
-    // whether v0 is the first, second, or third smallest node. We
-    // call the 0-orientation "canonical".
-    template <typename HyperEdge>
-    static unsigned orientation(HyperEdge const& t)
-    {
-        // although it should be v0 < v1 < v2, sometimes we
-        // compare sentinel edges
-        assert(t.v1 <= t.v2);
-        return (t.v0 > t.v1) + (t.v0 > t.v2);
-    }
-
-    template <typename HyperEdge>
-    static HyperEdge canonicalize_edge(HyperEdge t)
-    {
-        assert(t.v1 <= t.v2);
-        if (t.v0 > t.v2) {
-            std::swap(t.v0, t.v2);
-        }
-
-        if (t.v0 > t.v1) {
-            std::swap(t.v0, t.v1);
-        }
-
-        assert(orientation(t) == 0);
-        return t;
-    }
-}
diff --git a/src/common/utils/mph_index/hypergraph_sorter_seq.hpp b/src/common/utils/mph_index/hypergraph_sorter_seq.hpp
deleted file mode 100644
index 9adfdc3..0000000
--- a/src/common/utils/mph_index/hypergraph_sorter_seq.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-#pragma once
-
-#include <cassert>
-#include <cstdint>
-#include <tuple>
-#include <cmath>
-#include <vector>
-#include <iterator>
-#include <algorithm>
-#include <stdexcept>
-
-#include "common.hpp"
-#include "hypergraph.hpp"
-
-#include "utils/logger/logger.hpp"
-
-namespace emphf {
-
-    template <typename HypergraphType>
-    class hypergraph_sorter_seq {
-    public:
-        typedef HypergraphType hg;
-        typedef typename hg::node_t node_t;
-        typedef typename hg::hyperedge hyperedge;
-        typedef typename hg::xored_adj_list xored_adj_list;
-
-        hypergraph_sorter_seq()
-        {}
-
-        template <typename Range, typename EdgeGenerator>
-        bool try_generate_and_sort(Range const& input_range,
-                                   EdgeGenerator const& edge_gen,
-                                   size_t n,
-                                   size_t hash_domain,
-                                   bool verbose = true)
-        {
-            using std::get;
-            std::vector<xored_adj_list> adj_lists;
-
-            size_t m = hash_domain * 3;
-
-            // do all the allocations upfront
-            m_peeling_order.clear();
-            m_peeling_order.reserve(n);
-            adj_lists.resize(m);
-
-            // generate edges
-            if (verbose) {
-                //logger() << "Generating hyperedges and populating adjacency lists"
-                //         << std::endl;
-            }
-
-            for (auto const& val: input_range) {
-                auto edge = edge_gen(val);
-                // canonical by construction
-                assert(orientation(edge) == 0);
-
-                adj_lists[edge.v0].add_edge(edge);
-
-                std::swap(edge.v0, edge.v1);
-                adj_lists[edge.v0].add_edge(edge);
-
-                std::swap(edge.v0, edge.v2);
-                adj_lists[edge.v0].add_edge(edge);
-            }
-
-            // peel
-            if (verbose) {
-                // logger() << "Peeling" << std::endl;
-            }
-
-            auto visit = [&](node_t v0) {
-                if (adj_lists[v0].degree == 1) {
-                    auto edge = adj_lists[v0].edge_from(v0);
-                    m_peeling_order.push_back(edge);
-
-                    edge = canonicalize_edge(edge);
-                    adj_lists[edge.v0].delete_edge(edge);
-
-                    std::swap(edge.v0, edge.v1);
-                    adj_lists[edge.v0].delete_edge(edge);
-
-                    std::swap(edge.v0, edge.v2);
-                    adj_lists[edge.v0].delete_edge(edge);
-                }
-            };
-
-            size_t queue_position = 0;
-            for (node_t v0 = 0; v0 < m; ++v0) {
-                visit(v0);
-
-                while (queue_position < m_peeling_order.size()) {
-                    auto const& cur_edge = m_peeling_order[queue_position];
-
-                    visit(cur_edge.v1);
-                    visit(cur_edge.v2);
-                    queue_position += 1;
-                }
-            }
-
-            if (m_peeling_order.size() < n) {
-                if (verbose) {
-                    // logger() << "Hypergraph is not peelable: "
-                    //         << (n - m_peeling_order.size()) << " edges remaining"
-                    //         << std::endl;
-                }
-                return false;
-            }
-
-            assert(m_peeling_order.size() == n);
-
-            return true;
-        }
-
-        typedef typename std::vector<hyperedge>::const_reverse_iterator
-        peeling_iterator;
-
-        std::pair<peeling_iterator, peeling_iterator>
-        get_peeling_order() const
-        {
-            return std::make_pair(m_peeling_order.crbegin(),
-                                  m_peeling_order.crend());
-        }
-
-    private:
-
-        size_t m_hash_domain;
-        std::vector<hyperedge> m_peeling_order;
-    };
-}
diff --git a/src/common/utils/mph_index/mphf.hpp b/src/common/utils/mph_index/mphf.hpp
deleted file mode 100644
index 3327fef..0000000
--- a/src/common/utils/mph_index/mphf.hpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#pragma once
-
-#include <random>
-
-#include "bitpair_vector.hpp"
-#include "ranked_bitpair_vector.hpp"
-
-#include "utils/logger/logger.hpp"
-
-namespace emphf {
-
-    template <typename BaseHasher>
-    class mphf {
-    public:
-        mphf()
-        {}
-
-        template <typename HypergraphSorter, typename Range, typename Adaptor>
-        mphf(HypergraphSorter& sorter, size_t n,
-             Range const& input_range, Adaptor adaptor,
-             double gamma = 1.23)
-            : m_n(n)
-            , m_hash_domain(std::max((size_t(std::ceil(double(m_n) * gamma)) + 2) / 3, size_t(2)))
-        {
-            typedef typename HypergraphSorter::node_t node_t;
-            typedef typename HypergraphSorter::hyperedge hyperedge;
-            typedef decltype(*std::begin(input_range)) value_type;
-
-            size_t nodes_domain = m_hash_domain * 3;
-
-            if (nodes_domain >= std::numeric_limits<node_t>::max()) {
-                throw std::invalid_argument("Too many nodes for node_t");
-            }
-
-            auto edge_gen = [&](value_type s) {
-                using std::get;
-                auto hashes = m_hasher(adaptor(s));
-                return hyperedge((node_t)(get<0>(hashes) % m_hash_domain),
-                                 (node_t)(m_hash_domain +
-                                          (get<1>(hashes) % m_hash_domain)),
-                                 (node_t)(2 * m_hash_domain +
-                                          (get<2>(hashes) % m_hash_domain)));
-            };
-
-            std::mt19937_64 rng(37); // deterministic seed
-
-            for (size_t trial = 0; ; ++trial) {
-                //logger() << "Hypergraph generation: trial " << trial << std::endl;
-
-                m_hasher = BaseHasher::generate(rng);
-                if (sorter.try_generate_and_sort(input_range, edge_gen,
-                                                 m_n, m_hash_domain)) break;
-            }
-
-            auto peeling_order = sorter.get_peeling_order();
-            bitpair_vector bv(nodes_domain);
-
-            //logger() << "Assigning values" << std::endl;
-
-            for (auto edge = peeling_order.first;
-                 edge != peeling_order.second;
-                 ++edge) {
-
-                uint64_t target = orientation(*edge);
-                uint64_t assigned = bv[edge->v1] + bv[edge->v2];
-
-                // "assigned values" must be nonzeros to be ranked, so
-                // if the result is 0 we assign 3
-                bv.set(edge->v0, ((target - assigned + 9) % 3) ?: 3);
-            }
-
-            m_bv.build(std::move(bv));
-        }
-
-        uint64_t size() const
-        {
-            return m_n;
-        }
-
-        size_t mem_size() const {
-            return m_bv.mem_size();
-        }
-
-        BaseHasher const& base_hasher() const
-        {
-            return m_hasher;
-        }
-
-        template <typename T, typename Adaptor>
-        uint64_t lookup(const T &val, Adaptor adaptor)
-        {
-            using std::get;
-            auto hashes = m_hasher(adaptor(val));
-            uint64_t nodes[3] = {get<0>(hashes) % m_hash_domain,
-                                 m_hash_domain + (get<1>(hashes) % m_hash_domain),
-                                 2 * m_hash_domain + (get<2>(hashes) % m_hash_domain)};
-
-            uint64_t hidx = (m_bv[nodes[0]] + m_bv[nodes[1]] + m_bv[nodes[2]]) % 3;
-            return m_bv.rank(nodes[hidx]);
-        }
-
-        void swap(mphf& other)
-        {
-            std::swap(m_n, other.m_n);
-            std::swap(m_hash_domain, other.m_hash_domain);
-            m_hasher.swap(other.m_hasher);
-            m_bv.swap(other.m_bv);
-        }
-
-        void save(std::ostream& os) const
-        {
-            os.write(reinterpret_cast<char const*>(&m_n), sizeof(m_n));
-            os.write(reinterpret_cast<char const*>(&m_hash_domain),
-                     sizeof(m_hash_domain));
-            m_hasher.save(os);
-            m_bv.save(os);
-        }
-
-        void load(std::istream& is)
-        {
-            is.read(reinterpret_cast<char*>(&m_n), sizeof(m_n));
-            is.read(reinterpret_cast<char*>(&m_hash_domain),
-                    sizeof(m_hash_domain));
-            m_hasher.load(is);
-            m_bv.load(is);
-        }
-
-
-    private:
-
-        uint64_t m_n;
-        uint64_t m_hash_domain;
-        BaseHasher m_hasher;
-        ranked_bitpair_vector m_bv;
-    };
-}
diff --git a/src/common/utils/mph_index/ranked_bitpair_vector.hpp b/src/common/utils/mph_index/ranked_bitpair_vector.hpp
deleted file mode 100644
index 8488417..0000000
--- a/src/common/utils/mph_index/ranked_bitpair_vector.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#pragma once
-
-#include <cstdint>
-
-#include "common.hpp"
-#include "bitpair_vector.hpp"
-
-namespace emphf {
-
-    class ranked_bitpair_vector {
-    public:
-
-        ranked_bitpair_vector()
-        {}
-
-        void build(bitpair_vector&& bv)
-        {
-            m_bv.swap(bv);
-
-            uint64_t cur_rank = 0;
-            auto const& words = m_bv.data();
-            for (size_t i = 0; i < words.size(); ++i) {
-                if (((i * 32) % pairs_per_block) == 0) {
-                    m_block_ranks.push_back(cur_rank);
-                }
-                cur_rank += nonzero_pairs(words[i]);
-            }
-        }
-
-        size_t size() const
-        {
-            return m_bv.size();
-        }
-
-        size_t mem_size() const {
-            return m_bv.mem_size() + m_block_ranks.size() * sizeof(m_block_ranks[0]);
-        }
-
-        uint64_t operator[](uint64_t pos) const
-        {
-            return m_bv[pos];
-        }
-
-        uint64_t rank(uint64_t pos) const
-        {
-            uint64_t word_idx = pos / 32;
-            uint64_t word_offset = pos % 32;
-            uint64_t block = pos / pairs_per_block;
-            uint64_t r = m_block_ranks[block];
-
-            for (uint64_t w = block * pairs_per_block / 32; w < word_idx; ++w) {
-                r += nonzero_pairs(m_bv.data()[w]);
-            }
-
-            uint64_t mask = (uint64_t(1) << (word_offset * 2)) - 1;
-            r += nonzero_pairs(m_bv.data()[word_idx] & mask);
-
-            return r;
-        }
-
-        void swap(ranked_bitpair_vector& other)
-        {
-            m_bv.swap(other.m_bv);
-            m_block_ranks.swap(other.m_block_ranks);
-        }
-
-        void save(std::ostream& os) const
-        {
-            m_bv.save(os);
-            assert(m_block_ranks.size() ==
-                   (m_bv.size() + pairs_per_block - 1) / pairs_per_block);
-            os.write(reinterpret_cast<char const*>(m_block_ranks.data()),
-                     (std::streamsize)(sizeof(m_block_ranks[0]) * m_block_ranks.size()));
-        }
-
-        void load(std::istream& is)
-        {
-            m_bv.load(is);
-            m_block_ranks.resize((m_bv.size() + pairs_per_block - 1) / pairs_per_block);
-            is.read(reinterpret_cast<char*>(m_block_ranks.data()),
-                    (std::streamsize)(sizeof(m_block_ranks[0]) * m_block_ranks.size()));
-        }
-
-    protected:
-
-        static const uint64_t pairs_per_block = 512;
-        bitpair_vector m_bv;
-        std::vector<uint64_t> m_block_ranks;
-    };
-
-}
diff --git a/src/common/utils/openmp_wrapper.h b/src/common/utils/parallel/openmp_wrapper.h
similarity index 100%
rename from src/common/utils/openmp_wrapper.h
rename to src/common/utils/parallel/openmp_wrapper.h
diff --git a/src/common/utils/parallel_wrapper.hpp b/src/common/utils/parallel/parallel_wrapper.hpp
similarity index 100%
rename from src/common/utils/parallel_wrapper.hpp
rename to src/common/utils/parallel/parallel_wrapper.hpp
diff --git a/src/common/utils/memory.hpp b/src/common/utils/perf/memory.hpp
similarity index 91%
rename from src/common/utils/memory.hpp
rename to src/common/utils/perf/memory.hpp
index b0ce5e3..062140f 100755
--- a/src/common/utils/memory.hpp
+++ b/src/common/utils/perf/memory.hpp
@@ -21,6 +21,7 @@
 #include <fstream>
 #include <string>
 
+namespace utils {
 //////////////////////////////////////////////////////////////////////////////
 //
 // process_mem_usage(unsigned long &, long &) - takes two longs by reference,
@@ -29,18 +30,17 @@
 //
 // On failure, returns 0, 0
 
-void process_mem_usage(unsigned long& vm_usage, long& resident_set)
-{
+void process_mem_usage(unsigned long &vm_usage, long &resident_set) {
    using std::ios_base;
    using std::ifstream;
    using std::string;
 
-   vm_usage     = 0;
+   vm_usage = 0;
    resident_set = 0;
 
    // 'file' stat seems to give the most reliable results
    //
-   ifstream stat_stream("/proc/self/stat",ios_base::in);
+   ifstream stat_stream("/proc/self/stat", ios_base::in);
 
    // dummy vars for leading entries in stat that we don't care about
    //
@@ -62,8 +62,9 @@ void process_mem_usage(unsigned long& vm_usage, long& resident_set)
    stat_stream.close();
 
    long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
-   vm_usage     = vsize / 1024;
+   vm_usage = vsize / 1024;
    resident_set = rss * page_size_kb;
 }
 
+}
 #endif /* MEMORY_HPP_ */
diff --git a/src/common/utils/memory_limit.hpp b/src/common/utils/perf/memory_limit.hpp
similarity index 97%
rename from src/common/utils/memory_limit.hpp
rename to src/common/utils/perf/memory_limit.hpp
index 5aee818..d15ce45 100644
--- a/src/common/utils/memory_limit.hpp
+++ b/src/common/utils/perf/memory_limit.hpp
@@ -27,6 +27,10 @@
 
 #endif
 
+#include <common/utils/logger/logger.hpp>
+
+namespace utils {
+
 inline void limit_memory(size_t limit) {
     rlimit rl;
     if (sizeof(rlim_t) < 8) {
@@ -91,7 +95,8 @@ inline size_t get_used_memory() {
 #endif
 }
 
-
 inline size_t get_free_memory() {
     return get_memory_limit() - get_used_memory();
 }
+
+}
diff --git a/src/common/utils/perfcounter.hpp b/src/common/utils/perf/perfcounter.hpp
similarity index 64%
rename from src/common/utils/perfcounter.hpp
rename to src/common/utils/perf/perfcounter.hpp
index 3487888..660ad53 100644
--- a/src/common/utils/perfcounter.hpp
+++ b/src/common/utils/perf/perfcounter.hpp
@@ -10,28 +10,24 @@
 #include <string>
 #include <cppformat/format.h>
 
-struct perf_counter
-{
-    perf_counter()
-    {
+namespace utils {
+struct perf_counter {
+    perf_counter() {
         reset();
     }
 
-    double time() const
-    {
+    double time() const {
         struct timeval now;
         gettimeofday(&now, NULL);
 
-        return (double)(now.tv_sec - time_.tv_sec) + (double)(now.tv_usec - time_.tv_usec) * 1e-6;
+        return (double) (now.tv_sec - time_.tv_sec) + (double) (now.tv_usec - time_.tv_usec) * 1e-6;
     }
 
-    double time_ms() const
-    {
+    double time_ms() const {
         return time() * 1e3;
     }
 
-    void reset()
-    {
+    void reset() {
         gettimeofday(&time_, NULL);
     }
 
@@ -40,15 +36,14 @@ private:
 };
 
 
-inline std::string human_readable_time(double time_in_sec)
-{
+inline std::string human_readable_time(double time_in_sec) {
 //    assert(time_in_sec > 0);
 
-    size_t msec  = size_t(time_in_sec * 1000) % 1000;
-    size_t sec   = size_t(time_in_sec);
+    size_t msec = size_t(time_in_sec * 1000) % 1000;
+    size_t sec = size_t(time_in_sec);
     size_t hours = sec / 3600;
-    size_t mins  = (sec / 60) % 60;
-    sec         %= 60;
+    size_t mins = (sec / 60) % 60;
+    sec %= 60;
 
     return fmt::format("{:3d}:{:02d}:{:02d}.{:03d}", hours, mins, sec, msec);
 }
@@ -61,8 +56,7 @@ inline std::string human_readable_memory(size_t max_rss) {
     }
 }
 
-struct avg_perf_counter
-{
+struct avg_perf_counter {
     avg_perf_counter(/*const string& name*/)// : name_(name)
     {
         reset();
@@ -72,52 +66,48 @@ struct avg_perf_counter
 //        cout << "Time in counter " << name_ << ": " << human_readable_time(time()) << endl;
 //    }
 
-    int start(int ret = 0)
-    {
+    int start(int ret = 0) {
         p_cnt_.reset();
         return ret;
     }
 
-    int stop(int ret = 0)
-    {
+    int stop(int ret = 0) {
         counter_++;
         whole_time_ += p_cnt_.time();
         return ret;
     }
-    double time() const
-    {
+
+    double time() const {
         return whole_time_;
     }
-    size_t counts()
-    {
+
+    size_t counts() {
         return counter_;
     }
-    double time_ms() const
-    {
+
+    double time_ms() const {
         return time() * 1e3;
     }
 
-    double avg_time() const
-    {
-        return counter_ > 0 ? whole_time_/(double)counter_ : 0.;
+    double avg_time() const {
+        return counter_ > 0 ? whole_time_ / (double) counter_ : 0.;
     }
 
-    double avg_time_ms() const
-    {
+    double avg_time_ms() const {
         return avg_time() * 1e3;
     }
 
-    void reset()
-    {
+    void reset() {
         p_cnt_.reset();
         whole_time_ = 0;
         counter_ = 0;
     }
 
 private:
-  const std::string name_;
-  perf_counter p_cnt_;
-  double whole_time_;
-  size_t counter_;
+    const std::string name_;
+    perf_counter p_cnt_;
+    double whole_time_;
+    size_t counter_;
 
 };
+}
diff --git a/src/common/utils/indices/key_with_hash.hpp b/src/common/utils/ph_map/key_with_hash.hpp
similarity index 99%
rename from src/common/utils/indices/key_with_hash.hpp
rename to src/common/utils/ph_map/key_with_hash.hpp
index 57e5a5a..46aa86b 100644
--- a/src/common/utils/indices/key_with_hash.hpp
+++ b/src/common/utils/ph_map/key_with_hash.hpp
@@ -9,7 +9,7 @@
 
 #include "storing_traits.hpp"
 
-namespace debruijn_graph {
+namespace utils {
 
 template<typename Key, class HashFunction>
 class SimpleKeyWithHash {
diff --git a/src/common/utils/indices/perfect_hash_map.hpp b/src/common/utils/ph_map/perfect_hash_map.hpp
similarity index 97%
rename from src/common/utils/indices/perfect_hash_map.hpp
rename to src/common/utils/ph_map/perfect_hash_map.hpp
index 857efc9..86cfe61 100644
--- a/src/common/utils/indices/perfect_hash_map.hpp
+++ b/src/common/utils/ph_map/perfect_hash_map.hpp
@@ -6,11 +6,11 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/openmp_wrapper.h"
-#include "utils/path_helper.hpp"
+#include "utils/parallel/openmp_wrapper.h"
+#include "utils/filesystem/path_helper.hpp"
 #include "io/kmers/kmer_iterator.hpp"
 
-#include "utils/mph_index/kmer_index.hpp"
+#include "utils/kmer_mph/kmer_index.hpp"
 
 #include "key_with_hash.hpp"
 #include "values.hpp"
@@ -20,7 +20,7 @@
 #include <cstdlib>
 #include <cstdint>
 
-namespace debruijn_graph {
+namespace utils {
 
 template<class K, class traits>
 class IndexWrapper {
@@ -50,7 +50,7 @@ public:
             : index_ptr_(std::make_shared<KMerIndexT>())
             , k_((unsigned) k) {
         //fixme string literal
-        workdir_ = path::make_temp_dir(workdir, "kmeridx");
+        workdir_ = fs::make_temp_dir(workdir, "kmeridx");
     }
 
     IndexWrapper(size_t k, const std::string &workdir, std::shared_ptr<KMerIndexT> index_ptr)
@@ -59,7 +59,7 @@ public:
     }
 
     ~IndexWrapper() {
-        path::remove_dir(workdir_);
+        fs::remove_dir(workdir_);
     }
 
     void clear() {
diff --git a/src/common/utils/indices/perfect_hash_map_builder.hpp b/src/common/utils/ph_map/perfect_hash_map_builder.hpp
similarity index 87%
rename from src/common/utils/indices/perfect_hash_map_builder.hpp
rename to src/common/utils/ph_map/perfect_hash_map_builder.hpp
index c8d6972..7066387 100644
--- a/src/common/utils/indices/perfect_hash_map_builder.hpp
+++ b/src/common/utils/ph_map/perfect_hash_map_builder.hpp
@@ -5,12 +5,12 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/mph_index/kmer_index_builder.hpp"
+#include "utils/kmer_mph/kmer_index_builder.hpp"
+#include "utils/kmer_mph/kmer_splitters.hpp"
 
 #include "perfect_hash_map.hpp"
-#include "kmer_splitters.hpp"
 
-namespace debruijn_graph {
+namespace utils {
 
 struct PerfectHashMapBuilder {
     template<class K, class V, class traits, class StoringType, class Counter>
@@ -90,13 +90,4 @@ size_t BuildIndexFromStream(Index &index,
     return 0;
 }
 
-template<class Index, class Graph>
-void BuildIndexFromGraph(Index &index, const Graph &g, size_t read_buffer_size = 0) {
-    DeBruijnGraphKMerSplitter<Graph,
-                              StoringTypeFilter<typename Index::storing_type>>
-            splitter(index.workdir(), index.k(), g, read_buffer_size);
-    KMerDiskCounter<RtSeq> counter(index.workdir(), splitter);
-    BuildIndex(index, counter, 16, 1);
-}
-
 }
diff --git a/src/common/utils/indices/storing_traits.hpp b/src/common/utils/ph_map/storing_traits.hpp
similarity index 83%
rename from src/common/utils/indices/storing_traits.hpp
rename to src/common/utils/ph_map/storing_traits.hpp
index 0904cd4..09c06ca 100644
--- a/src/common/utils/indices/storing_traits.hpp
+++ b/src/common/utils/ph_map/storing_traits.hpp
@@ -15,8 +15,7 @@
 
 #include "values.hpp"
 
-namespace debruijn_graph {
-
+namespace utils {
 
 struct SimpleStoring {
     template<class K, class V>
@@ -78,4 +77,24 @@ struct InvertableStoring {
 
 typedef InvertableStoring DefaultStoring;
 
+template<class StoringType>
+struct StoringTypeFilter {
+};
+
+template<>
+struct StoringTypeFilter<SimpleStoring> {
+    template<class Kmer>
+    bool filter(const Kmer &/*kmer*/) const {
+        return true;
+    }
+};
+
+template<>
+struct StoringTypeFilter<InvertableStoring> {
+    template<class Kmer>
+    bool filter(const Kmer &kmer) const {
+        return kmer.IsMinimal();
+    }
+};
+
 }
diff --git a/src/common/utils/indices/values.hpp b/src/common/utils/ph_map/values.hpp
similarity index 98%
rename from src/common/utils/indices/values.hpp
rename to src/common/utils/ph_map/values.hpp
index 858f1f4..66cf750 100644
--- a/src/common/utils/indices/values.hpp
+++ b/src/common/utils/ph_map/values.hpp
@@ -13,7 +13,7 @@
  *      Author: anton
  */
 
-namespace debruijn_graph {
+namespace utils {
 
 template<class V>
 class ValueArray {
diff --git a/src/common/utils/segfault_handler.hpp b/src/common/utils/segfault_handler.hpp
index 2512ba5..f1c99e2 100644
--- a/src/common/utils/segfault_handler.hpp
+++ b/src/common/utils/segfault_handler.hpp
@@ -11,8 +11,11 @@
 #include "utils/stacktrace.hpp"
 #include "boost/noncopyable.hpp"
 
+#include <functional>
 #include <signal.h>
 
+namespace utils {
+
 struct segfault_handler : boost::noncopyable {
     typedef std::function<void()> callback_t;
 
@@ -56,3 +59,5 @@ private:
 private:
     seg_handler_t old_func_;
 };
+
+}
diff --git a/src/common/utils/simple_tools.hpp b/src/common/utils/simple_tools.hpp
deleted file mode 100644
index c47f70f..0000000
--- a/src/common/utils/simple_tools.hpp
+++ /dev/null
@@ -1,189 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-/*
- * simple_tools.hpp
- *
- *  Created on: 27.05.2011
- *      Author: vyahhi
- */
-
-#ifndef SIMPLE_TOOLS_HPP_
-#define SIMPLE_TOOLS_HPP_
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "utils/verify.hpp"
-#include "io/reads/ireader.hpp"
-#include "utils/path_helper.hpp"
-#include <memory>
-#include <string>
-#include <set>
-#include <vector>
-
-/**
- * Converts anything to string (using ostringstream).
- */
-template <typename T>
-std::string ToString(const T& t) {
-    std::ostringstream ss;
-    ss << t;
-    return ss.str();
-}
-
-template <typename T>
-std::string ToString(const T& t, size_t length) {
-    std::ostringstream ss;
-    ss << t;
-    std::string result = ss.str();
-    while(result.size() < length)
-        result = "0" + result;
-    return result;
-}
-
-template <typename T>
-std::string ToString(std::vector<T>& t) {
-    std::ostringstream ss;
-    ss << "Size "<<t.size()<<": [";
-    for (auto it = t.begin(); it != t.end(); ++it)
-        ss<<*it<<", ";
-    ss<<"]";
-    return ss.str();
-}
-
-template <typename T>
-std::string ToString(std::set<T>& t) {
-    std::ostringstream ss;
-    ss << "Size "<<t.size()<<": [";
-    for (auto it = t.begin(); it != t.end(); ++it)
-        ss<<*it<<", ";
-    ss<<"]";
-    return ss.str();
-}
-
-template<typename T>
-inline const std::pair<T, T> ReversePair(std::pair<T, T> ep) {
-  return std::pair<T, T>(ep.second, ep.first);
-}
-
-template <class ContainerT1, class ContainerT2>
-void push_back_all(ContainerT1& target, const ContainerT2& to_insert) {
-    target.insert(target.end(), to_insert.begin(), to_insert.end());
-}
-
-template <class ContainerT1, class ContainerT2>
-void insert_all(ContainerT1& target, const ContainerT2& to_insert) {
-    target.insert(to_insert.begin(), to_insert.end());
-}
-
-template<class MapT>
-std::set<typename MapT::key_type> key_set(const MapT& m) {
-    std::set<typename MapT::key_type> answer;
-    for (auto it = m.begin(); it != m.end(); ++it) {
-        answer.insert(it->first);
-    }
-    return answer;
-}
-
-template<class MapT>
-std::set<typename MapT::mapped_type> value_set(const MapT& m) {
-    std::set<typename MapT::mapped_type> answer;
-    for (auto it = m.begin(); it != m.end(); ++it) {
-        answer.insert(it->second);
-    }
-    return answer;
-}
-
-template <class MapT>
-const typename MapT::mapped_type& get(const MapT& from, const typename MapT::key_type& key) {
-    auto it = from.find(key);
-    VERIFY(it != from.end());
-    return it->second;
-}
-
-template <class MapT>
-typename MapT::mapped_type& get(MapT& from, const typename MapT::key_type& key) {
-    auto it = from.find(key);
-    VERIFY(it != from.end());
-    return it->second;
-}
-
-template <class MMapT>
-const std::vector<typename MMapT::mapped_type> get_all(const MMapT& from, const typename MMapT::key_type& key) {
-    std::vector<typename MMapT::mapped_type> answer;
-    for (auto it = from.lower_bound(key); it != from.upper_bound(key); ++it) {
-        answer.push_back(it->second);
-    }
-    return answer;
-}
-
-class TmpFolderFixture
-{
-    std::string tmp_folder_;
-
-public:
-    TmpFolderFixture(std::string tmp_folder = "tmp") :
-        tmp_folder_(tmp_folder)
-    {
-        path::make_dirs(tmp_folder_);
-    }
-
-    ~TmpFolderFixture()
-    {
-        path::remove_dir(tmp_folder_);
-    }
-};
-
-namespace std
-{
-template<class T1, class T2>
-std::ostream& operator<< (std::ostream& os, std::pair<T1, T2> const& pair)
-{
-    return os << "(" << pair.first << ", " << pair.second << ")";
-}
-//}
-
-//namespace omnigraph
-//{
-template<class T>
-std::ostream& operator<< (std::ostream& os, const std::vector<T>& v)
-{
-     os << "[";
-     std::string delim = "";
-     for (auto it = v.begin(); it != v.end(); ++it) {
-         os << delim << *it;
-         delim = ", ";
-     }
-//     std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, ", "));
-     os << "]";
-     return os;
-}
-
-template<class T>
-std::ostream& operator<< (std::ostream& os, const std::set<T>& set)
-{
-    os << "{";
-    bool delim = false;
-    for (const auto& i : set) {
-        if (delim) os << ", ";
-        os << i;
-        delim = true;
-    }
-    os << "}";
-    return os;
-}
-
-}
-
-template<typename Base, typename T>
-inline bool instanceof(const T *ptr) {
-    return dynamic_cast<const Base *>(ptr) != nullptr;
-}
-
-#endif /* SIMPLE_TOOLS_HPP_ */
diff --git a/src/common/utils/stacktrace.hpp b/src/common/utils/stacktrace.hpp
index 3ab5947..ad90c97 100644
--- a/src/common/utils/stacktrace.hpp
+++ b/src/common/utils/stacktrace.hpp
@@ -16,16 +16,17 @@
 #include <execinfo.h>
 #include <iostream>
 
-inline void print_stacktrace()
-{
+namespace utils {
+
+inline void print_stacktrace() {
     std::cout << "=== Stack Trace ===" << std::endl;
 
     const size_t max_stack_size = 1000;
 
-    void* stack_pointers[max_stack_size];
+    void *stack_pointers[max_stack_size];
     int count = backtrace(stack_pointers, max_stack_size);
 
-    char** func_names = backtrace_symbols(stack_pointers, count);
+    char **func_names = backtrace_symbols(stack_pointers, count);
 
     // Print the stack trace
     for (int i = 0; i < count; ++i)
@@ -34,3 +35,5 @@ inline void print_stacktrace()
     // Free the string pointers
     free(func_names);
 }
+
+}
diff --git a/src/common/utils/standard_base.hpp b/src/common/utils/standard_base.hpp
index fac6fcf..3483667 100644
--- a/src/common/utils/standard_base.hpp
+++ b/src/common/utils/standard_base.hpp
@@ -87,16 +87,16 @@ using boost::noncopyable;
 #include "utils/stacktrace.hpp"
 
 // path manipulation instead of boost filesystem
-#include "utils/path_helper.hpp"
-using path::make_dir;
-using path::remove_dir;
+#include "filesystem/path_helper.hpp"
+using fs::make_dir;
+using fs::remove_dir;
 
 #ifndef NDEBUG
 namespace boost {
 inline void assertion_failed(char const * expr, char const * function,
                              char const * file, long line) {
   std::cerr << "Aborted by assert: " << std::endl;
-  print_stacktrace();
+  utils::print_stacktrace();
 #if __DARWIN_UNIX03
   __assert_rtn (expr, file, (int)line, function);
 #elif __DARWIN
@@ -110,7 +110,7 @@ inline void assertion_failed_msg(char const * expr, char const * msg,
                                  char const * function, char const * file,
                                  long line) {
   std::cerr << "Aborted by assert: " << msg << std::endl;
-  print_stacktrace();
+  utils::print_stacktrace();
 #if __DARWIN_UNIX03
   __assert_rtn (expr, file, (int)line, function);
 #elif __DARWIN
diff --git a/src/common/utils/stl_utils.hpp b/src/common/utils/stl_utils.hpp
new file mode 100644
index 0000000..900fbe2
--- /dev/null
+++ b/src/common/utils/stl_utils.hpp
@@ -0,0 +1,141 @@
+//***************************************************************************
+//* Copyright (c) 2015 Saint Petersburg State University
+//* Copyright (c) 2011-2014 Saint Petersburg Academic University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
+
+#pragma once
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "utils/verify.hpp"
+#include "io/reads/ireader.hpp"
+#include "filesystem/path_helper.hpp"
+#include <memory>
+#include <string>
+#include <set>
+#include <vector>
+
+namespace utils {
+
+template<class Container>
+std::string ContainerToString(const Container &c) {
+    std::ostringstream ss;
+    ss << "Size " << c.size() << ": [";
+    for (const auto &el : c)
+        ss << el << ", ";
+    ss << "]";
+    return ss.str();
+}
+
+template<typename T>
+inline const std::pair<T, T> ReversePair(std::pair<T, T> ep) {
+    return std::pair<T, T>(ep.second, ep.first);
+}
+
+template<class ContainerT1, class ContainerT2>
+void push_back_all(ContainerT1 &target, const ContainerT2 &to_insert) {
+    target.insert(target.end(), to_insert.begin(), to_insert.end());
+}
+
+template<class ContainerT1, class ContainerT2>
+void insert_all(ContainerT1 &target, const ContainerT2 &to_insert) {
+    target.insert(to_insert.begin(), to_insert.end());
+}
+
+template<class MapT>
+std::set<typename MapT::key_type> key_set(const MapT &m) {
+    std::set<typename MapT::key_type> answer;
+    for (auto it = m.begin(); it != m.end(); ++it) {
+        answer.insert(it->first);
+    }
+    return answer;
+}
+
+template<class MapT>
+std::set<typename MapT::mapped_type> value_set(const MapT &m) {
+    std::set<typename MapT::mapped_type> answer;
+    for (auto it = m.begin(); it != m.end(); ++it) {
+        answer.insert(it->second);
+    }
+    return answer;
+}
+
+template<class MapT>
+const typename MapT::mapped_type &get(const MapT &from, const typename MapT::key_type &key) {
+    auto it = from.find(key);
+    VERIFY(it != from.end());
+    return it->second;
+}
+
+template<class MapT>
+typename MapT::mapped_type &get(MapT &from, const typename MapT::key_type &key) {
+    auto it = from.find(key);
+    VERIFY(it != from.end());
+    return it->second;
+}
+
+template<class MMapT>
+const std::vector<typename MMapT::mapped_type> get_all(const MMapT &from, const typename MMapT::key_type &key) {
+    std::vector<typename MMapT::mapped_type> answer;
+    for (auto it = from.lower_bound(key); it != from.upper_bound(key); ++it) {
+        answer.push_back(it->second);
+    }
+    return answer;
+}
+
+template<class Container, class F>
+std::string join(const Container &c,
+                 const std::string &delim = ", ",
+                 F str_f = [] (typename Container::value_type t) { return std::to_string(t); }) {
+    std::stringstream ss;
+    std::string d = "";
+    for (const auto &item : c) {
+        ss << d << str_f(item);
+        d = delim;
+    }
+    return ss.str();
+}
+
+}
+
+namespace std {
+template<class T1, class T2>
+std::ostream &operator<<(std::ostream &os, std::pair<T1, T2> const &pair) {
+    return os << "(" << pair.first << ", " << pair.second << ")";
+}
+//}
+
+//namespace omnigraph
+//{
+template<class T>
+std::ostream &operator<<(std::ostream &os, const std::vector<T> &v) {
+    os << "[";
+    std::string delim = "";
+    for (auto it = v.begin(); it != v.end(); ++it) {
+        os << delim << *it;
+        delim = ", ";
+    }
+//     std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, ", "));
+    os << "]";
+    return os;
+}
+
+template<class T>
+std::ostream &operator<<(std::ostream &os, const std::set<T> &set) {
+    os << "{";
+    bool delim = false;
+    for (const auto &i : set) {
+        if (delim) os << ", ";
+        os << i;
+        delim = true;
+    }
+    os << "}";
+    return os;
+}
+
+}
+
diff --git a/src/common/utils/verify.hpp b/src/common/utils/verify.hpp
index b677a3e..9a0c2bb 100644
--- a/src/common/utils/verify.hpp
+++ b/src/common/utils/verify.hpp
@@ -7,7 +7,6 @@
 
 #pragma once
 #include "utils/stacktrace.hpp"
-#include "boost/current_function.hpp"
 #include <sstream>
 #include <iostream>
 #include <cassert>
@@ -15,15 +14,15 @@
 #define VERIFY(expr)                                             \
     do {                                                         \
         if(!(expr))\
-            print_stacktrace();\
+            utils::print_stacktrace();\
         assert(expr);                                            \
     } while(0);
 
 #define VERIFY_MSG(expr, msg)                                           \
     if (!(expr)) {                                                      \
         std::stringstream ss;                                           \
-        print_stacktrace();\
-        ss << "Verification of expression '" << #expr << "' failed in function '" <<  BOOST_CURRENT_FUNCTION << \
+        utils::print_stacktrace();\
+        ss << "Verification of expression '" << #expr << "' failed in function '" <<  __PRETTY_FUNCTION__ << \
                 "'. In file '" << __FILE__ << "' on line " << __LINE__ << ". Message '" << msg << "'." ; \
         std::cout << ss.str() << std::endl;                             \
         std::cerr << ss.str() << std::endl;                             \
diff --git a/src/common/visualization/graph_labeler.hpp b/src/common/visualization/graph_labeler.hpp
index 8690af7..fad6595 100644
--- a/src/common/visualization/graph_labeler.hpp
+++ b/src/common/visualization/graph_labeler.hpp
@@ -7,9 +7,9 @@
 
 #pragma once
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "utils/standard_base.hpp"
-#include "common/assembly_graph/handlers/edges_position_handler.hpp"
+#include "assembly_graph/handlers/edges_position_handler.hpp"
 
 namespace visualization {
 
@@ -163,7 +163,7 @@ public:
     LengthGraphLabeler(const Graph &g) : base(g) {}
 
     /*virtual*/ std::string label(EdgeId e) const {
-        return ToString(this->graph().length(e));
+        return std::to_string(this->graph().length(e));
     }
 
 };
@@ -178,7 +178,7 @@ public:
 
     std::string label(EdgeId e) const {
         double coverage = this->graph().coverage(e);
-        return " {Cov:" + ToString(coverage) + "}";
+        return " {Cov:" + std::to_string(coverage) + "}";
     }
 };
 
@@ -287,7 +287,7 @@ public:
     }
 
     virtual std::string label(VertexId vertexId) const {
-        return ToString(vertexId.int_id());
+        return std::to_string(vertexId.int_id());
     }
 
     virtual std::string label(EdgeId edgeId) const {
@@ -296,7 +296,7 @@ public:
         ret_label += "Positions:\\n" + edges_positions_.str(edgeId);
         size_t len = g_.length(edgeId);
         double cov = g_.coverage(edgeId);
-        ret_label += "Len(cov): " + ToString(len) + "(" + ToString(cov) + ")";
+        ret_label += "Len(cov): " + std::to_string(len) + "(" + std::to_string(cov) + ")";
         return ret_label;
     }
 
diff --git a/src/common/visualization/graph_print_utils.hpp b/src/common/visualization/graph_print_utils.hpp
index 0c2f978..65cecee 100755
--- a/src/common/visualization/graph_print_utils.hpp
+++ b/src/common/visualization/graph_print_utils.hpp
@@ -247,9 +247,9 @@ private:
     string constructTableEntry(SingleVertex v/*, const string &label, const string &href*/) {
         stringstream ss;
         ss << "<TR>";
-        ss << constructPortCell(ToString(v.id_) + "_in", v.href_, v.fill_color_);
+        ss << constructPortCell(std::to_string(v.id_) + "_in", v.href_, v.fill_color_);
         ss << constructLabelCell(v.label_, v.href_, v.fill_color_);
-        ss << constructPortCell(ToString(v.id_) + "_out", v.href_, v.fill_color_);
+        ss << constructPortCell(std::to_string(v.id_) + "_out", v.href_, v.fill_color_);
         ss << "</TR>\n";
         return ss.str();
     }
@@ -257,9 +257,9 @@ private:
     string constructReverceTableEntry(SingleVertex v/*, const string &label, const string &href*/) {
         stringstream ss;
         ss << "<TR>";
-        ss << constructPortCell(ToString(v.id_) + "_out", v.href_, v.fill_color_);
+        ss << constructPortCell(std::to_string(v.id_) + "_out", v.href_, v.fill_color_);
         ss << constructLabelCell(v.label_, v.href_, v.fill_color_);
-        ss << constructPortCell(ToString(v.id_) + "_in", v.href_, v.fill_color_);
+        ss << constructPortCell(std::to_string(v.id_) + "_in", v.href_, v.fill_color_);
         ss << "</TR>\n";
         return ss.str();
     }
diff --git a/src/common/visualization/position_filler.hpp b/src/common/visualization/position_filler.hpp
index e0e61b3..dc701fc 100644
--- a/src/common/visualization/position_filler.hpp
+++ b/src/common/visualization/position_filler.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include "common/modules/alignment/sequence_mapper.hpp"
+#include "modules/alignment/sequence_mapper.hpp"
 #include "assembly_graph/handlers/edges_position_handler.hpp"
 #include "io/reads/wrapper_collection.hpp"
 #include "io/reads/io_helper.hpp"
@@ -19,44 +19,49 @@ namespace position_filler {
 template<class Graph>
 class PosFiller {
     typedef typename Graph::EdgeId EdgeId;
-    typedef std::shared_ptr<debruijn_graph::SequenceMapper < Graph>> MapperPtr;
+    typedef std::shared_ptr<debruijn_graph::SequenceMapper<Graph>> MapperPtr;
     const Graph &g_;
     MapperPtr mapper_;
     omnigraph::EdgesPositionHandler<Graph> &edge_pos_;
 
 public:
+    typedef omnigraph::MappingPath<EdgeId> MappingPath;
+
     PosFiller(const Graph &g, MapperPtr mapper,
               omnigraph::EdgesPositionHandler<Graph> &edge_pos) :
             g_(g), mapper_(mapper), edge_pos_(edge_pos) {
 
     }
 
-    void Process(const Sequence &s, string name) const {
-        //todo stupid conversion!
-        return Process(io::SingleRead(name, s.str()));
+    MappingPath Process(const string &s, const string &name) const {
+        return Process(io::SingleRead(name, s));
+    }
+
+    MappingPath Process(const Sequence &s, const string &name) const {
+        return Process(s.str(), name);
     }
 
-    void Process(const io::SingleRead &read) const {
-        omnigraph::MappingPath<EdgeId> path = mapper_->MapRead(read);
+    MappingPath Process(const io::SingleRead &read) const {
+        MappingPath path = mapper_->MapRead(read);
         const string name = read.name();
         int cur_pos = 0;
-        TRACE("Contig " << name << " mapped on " << path.size()
-                        << " fragments.");
+        TRACE("Contig " << name << " mapped on " << path.size() << " fragments.");
         for (size_t i = 0; i < path.size(); i++) {
             EdgeId ei = path[i].first;
             omnigraph::MappingRange mr = path[i].second;
             int len = (int) (mr.mapped_range.end_pos - mr.mapped_range.start_pos);
-            if (i > 0) if (path[i - 1].first != ei) if (g_.EdgeStart(ei) != g_.EdgeEnd(path[i - 1].first)) {
-                TRACE(
-                        "Contig " << name
-                                  << " mapped on not adjacent edge. Position in contig is "
-                                  << path[i - 1].second.initial_range.start_pos
-                                     + 1
-                                  << "--"
-                                  << path[i - 1].second.initial_range.end_pos
-                                  << " and "
-                                  << mr.initial_range.start_pos + 1
-                                  << "--" << mr.initial_range.end_pos);
+            if (i > 0 &&
+                path[i - 1].first != ei &&
+                g_.EdgeStart(ei) != g_.EdgeEnd(path[i - 1].first)) {
+                TRACE("Contig " << name
+                          << " mapped on not adjacent edge. Position in contig is "
+                          << path[i - 1].second.initial_range.start_pos
+                             + 1
+                          << "--"
+                          << path[i - 1].second.initial_range.end_pos
+                          << " and "
+                          << mr.initial_range.start_pos + 1
+                          << "--" << mr.initial_range.end_pos);
             }
             edge_pos_.AddEdgePosition(ei, name, mr.initial_range.start_pos,
                                       mr.initial_range.end_pos,
@@ -64,6 +69,7 @@ public:
                                       mr.mapped_range.end_pos);
             cur_pos += len;
         }
+        return path;
     }
 
     void Process(io::SingleStream &stream) const {
@@ -79,7 +85,7 @@ private:
 };
 
 template<class gp_t>
-void FillPos(gp_t &gp, const string &contig_file, string prefix, bool with_rc = false) {
+void FillPos(gp_t &gp, const string &contig_file, string prefix, bool with_rc) {
     PosFiller<typename gp_t::graph_t> pos_filler(gp.g, debruijn_graph::MapperInstance(gp), gp.edge_pos);
     auto irs = std::make_shared<io::PrefixAddingReaderWrapper>(io::EasyStream(contig_file, with_rc, false),
                                                                prefix);
@@ -87,7 +93,7 @@ void FillPos(gp_t &gp, const string &contig_file, string prefix, bool with_rc =
 }
 
 template<class gp_t>
-void FillPos(gp_t &gp, const Sequence &s, string name) {
+void FillPos(gp_t &gp, const string &s, string name) {
     PosFiller<typename gp_t::graph_t> pos_filler(gp.g, debruijn_graph::MapperInstance(gp), gp.edge_pos);
     pos_filler.Process(s, name);
 }
diff --git a/src/common/visualization/visualization_utils.hpp b/src/common/visualization/visualization_utils.hpp
index 34ec334..f749e4b 100644
--- a/src/common/visualization/visualization_utils.hpp
+++ b/src/common/visualization/visualization_utils.hpp
@@ -40,7 +40,7 @@ void DrawComponentsOfShortEdges(const Graph &g, const string &output_dir, size_t
                                 size_t sources) {
     vector<typename Graph::EdgeId> short_edges;
     std::string pics_folder_ =
-            output_dir + ToString(min_length) + "_" + ToString(sinks) + "_" + ToString(sources) + "_" +
+            output_dir + std::to_string(min_length) + "_" + std::to_string(sinks) + "_" + std::to_string(sources) + "_" +
             "pics_polymorphic/";
     make_dir(pics_folder_);
     INFO("Writing pics with components consisting of short edges to " + pics_folder_);
@@ -69,14 +69,14 @@ void DrawComponentsOfShortEdges(const Graph &g, const string &output_dir, size_t
             graph_labeler::CoverageGraphLabeler<Graph> labeler2(component.g());
             graph_labeler::CompositeLabeler<Graph> compositeLabeler(labeler, labeler2);
             WriteComponentSinksSources(component,
-                                       pics_folder_ + ToString(g.int_id(*component.vertices().begin()))
+                                       pics_folder_ + std::to_string(g.int_id(*component.vertices().begin()))
                                        + ".dot", visualization::graph_colorer::DefaultColorer(g),
                                        compositeLabeler);
-            INFO("Component is written to " + ToString(g.int_id(*component.vertices().begin())) + ".dot");
+            INFO("Component is written to " + std::to_string(g.int_id(*component.vertices().begin())) + ".dot");
 
             //            PrintComponent(component,
 //                                pics_folder_ + "ShortComponents/"
-//                                        + ToString(gp.g.int_id(component.vertices_[0]))
+//                                        + std::to_string(gp.g.int_id(component.vertices_[0]))
 //                                         + ".dot");
         }
     }
@@ -197,7 +197,7 @@ public:
         shared_ptr<graph_colorer::GraphColorer<Graph>> resulting_colorer = make_shared<graph_colorer::CompositeGraphColorer<Graph>>(
                 colorer_, edge_colorer);
 
-        string fn = output_folder_ + "/edge_" + ToString(g_.int_id(e)) + add_label + ".dot";
+        string fn = output_folder_ + "/edge_" + std::to_string(g_.int_id(e)) + add_label + ".dot";
         visualization::visualization_utils::WriteComponent(omnigraph::EdgeNeighborhood<Graph>(g_, e, 50, 250), fn, resulting_colorer,
                                       labeler_);
     }
diff --git a/src/projects/cap/assembly_compare.hpp b/src/projects/cap/assembly_compare.hpp
index ec86be5..0fc6f23 100644
--- a/src/projects/cap/assembly_compare.hpp
+++ b/src/projects/cap/assembly_compare.hpp
@@ -9,7 +9,7 @@
 
 #include "pipeline/graph_pack.hpp"
 #include "pipeline/graphio.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "modules/simplification/cleaner.hpp"
 #include "io/reads/splitting_wrapper.hpp"
 #include "io/reads/multifile_reader.hpp"
diff --git a/src/projects/cap/assembly_problem_detection.hpp b/src/projects/cap/assembly_problem_detection.hpp
index 6ad4075..99a1b3c 100644
--- a/src/projects/cap/assembly_problem_detection.hpp
+++ b/src/projects/cap/assembly_problem_detection.hpp
@@ -189,8 +189,8 @@
 //
 //    vector<EdgeId> IncidentEdges(VertexId v) {
 //        vector<EdgeId> ans;
-//        push_back_all(ans, gp_.g.IncomingEdges(v));
-//        push_back_all(ans, gp_.g.OutgoingEdges(v));
+//        utils::push_back_all(ans, gp_.g.IncomingEdges(v));
+//        utils::push_back_all(ans, gp_.g.OutgoingEdges(v));
 //        return ans;
 //    }
 //
@@ -407,7 +407,7 @@
 //        CompositeLabeler<Graph> labeler(basic_labeler, pos_labeler);
 //        GraphComponent<Graph> component = omnigraph::EdgeNeighborhood(g_, e);
 //        auto colorer = coloring_.ConstructColorer(component);
-//        visualization::visualization_utils::WriteComponent(component, folder + ToString(g_.int_id(e)) + "_loc.dot", colorer, labeler);
+//        visualization::visualization_utils::WriteComponent(component, folder + std::to_string(g_.int_id(e)) + "_loc.dot", colorer, labeler);
 //    }
 //
 ////    bool CheckEdges(const vector<EdgeId>& edges) {
diff --git a/src/projects/cap/cap_commands.hpp b/src/projects/cap/cap_commands.hpp
index 1c0c945..686cf83 100644
--- a/src/projects/cap/cap_commands.hpp
+++ b/src/projects/cap/cap_commands.hpp
@@ -11,7 +11,7 @@
 #include "cap_environment_manager.hpp"
 #include "mosaic.hpp"
 #include "io/reads/sequence_reader.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/path/path_helper.hpp"
 
 namespace online_visualization {
 
@@ -508,10 +508,10 @@ class BlocksToGRIMMFormat : public LocalCommand<CapEnvironment> {
       std::string file_from = args[1],
                   file_to = args[2];
 
-      path::make_full_path(file_from);
-      path::make_full_path(file_to);
+      file_from = fs::make_full_path(file_from);
+      file_to = fs::make_full_path(file_to);
 
-      std::string dir = path::parent_path(file_to);
+      std::string dir = fs::parent_path(file_to);
       cap::utils::MakeDirPath(dir);
 
       BlockPrinter<Graph>::ConvertBlocksToGRIMM(file_from, file_to);
diff --git a/src/projects/cap/cap_environment_manager.hpp b/src/projects/cap/cap_environment_manager.hpp
index 33a39f3..f748c1e 100644
--- a/src/projects/cap/cap_environment_manager.hpp
+++ b/src/projects/cap/cap_environment_manager.hpp
@@ -119,7 +119,7 @@ class CapEnvironmentManager {
   template <class gp_t>
   void SaveCurrentStreams(const gp_t &/* gp */, const std::string &dir) const {
         for (size_t i = 0; i < env_->genomes_.size(); ++i) {
-      std::string output_filename = dir + path::filename(env_->init_genomes_paths_[i]);
+      std::string output_filename = dir + fs::filename(env_->init_genomes_paths_[i]);
             if (!output_filename.empty()) {
                 Contig contig;
                 io::osequencestream out_stream(output_filename);
diff --git a/src/projects/cap/cap_kmer_index.hpp b/src/projects/cap/cap_kmer_index.hpp
index 5b7414b..de73589 100644
--- a/src/projects/cap/cap_kmer_index.hpp
+++ b/src/projects/cap/cap_kmer_index.hpp
@@ -10,8 +10,8 @@
 #include "compare_standard.hpp"
 #include "longseq.hpp"
 #include "polynomial_hash.hpp"
-#include "common/adt/kmer_map.hpp"
-#include "utils/indices/edge_position_index.hpp"
+#include "adt/kmer_map.hpp"
+#include "assembly_graph/index/edge_position_index.hpp"
 
 #include "io/reads/sequence_reader.hpp"
 #include "utils/mph_index/base_hash.hpp"
diff --git a/src/projects/cap/compare_standard.hpp b/src/projects/cap/compare_standard.hpp
index 426b3f5..620274b 100644
--- a/src/projects/cap/compare_standard.hpp
+++ b/src/projects/cap/compare_standard.hpp
@@ -14,9 +14,9 @@
 
 // utils
 #include "utils/cpp_utils.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/path/path_helper.hpp"
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
 // longseq
 #include "longseq.hpp"
diff --git a/src/projects/cap/comparison_utils.hpp b/src/projects/cap/comparison_utils.hpp
index 2dddb7a..5fdeacf 100644
--- a/src/projects/cap/comparison_utils.hpp
+++ b/src/projects/cap/comparison_utils.hpp
@@ -8,7 +8,7 @@
 #pragma once
 
 #include "pipeline/graphio.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "assembly_graph/core/graph.hpp"
 #include "coordinates_handler.hpp"
 #include "math/xmath.h"
@@ -70,7 +70,7 @@ void ConstructGraph(Graph& g, Index& index,
 */
 
 inline Sequence ReadGenome(const string& filename) {
-    path::CheckFileExistenceFATAL(filename);
+    fs::CheckFileExistenceFATAL(filename);
     io::FileReadStream genome_stream(filename);
     return ReadSequence(genome_stream);
 }
@@ -84,7 +84,7 @@ void WriteGenome(const Sequence& genome, const string& filename) {
 inline vector<io::SingleRead> MakeReads(const vector<Sequence>& ss) {
     vector<io::SingleRead> ans;
     for (size_t i = 0; i < ss.size(); ++i) {
-        ans.push_back(io::SingleRead("read_" + ToString(i), ss[i].str()));
+        ans.push_back(io::SingleRead("read_" + std::to_string(i), ss[i].str()));
     }
     return ans;
 }
@@ -109,7 +109,7 @@ inline vector<Sequence> AllSequences(ContigStream& stream) {
 }
 
 inline vector<Sequence> ReadContigs(const string& filename) {
-    path::CheckFileExistenceFATAL(filename);
+    fs::CheckFileExistenceFATAL(filename);
     io::FileReadStream genome_stream(filename);
     return AllSequences(genome_stream);
 }
diff --git a/src/projects/cap/deprecated/tools_deprecated.cpp b/src/projects/cap/deprecated/tools_deprecated.cpp
index 6f13424..99f7a3d 100644
--- a/src/projects/cap/deprecated/tools_deprecated.cpp
+++ b/src/projects/cap/deprecated/tools_deprecated.cpp
@@ -52,7 +52,7 @@
 
 //     string ref = "/home/snurk/Dropbox/lab/mrsa/USA300_FPR3757.fasta";
 // //    string ref = "assembly_comp/gingi_diff_mask/tdc60.fasta";
-//     string output_folder = "assembly_comp/s60_usa300_" + ToString(K) + "/";
+//     string output_folder = "assembly_comp/s60_usa300_" + std::to_string(K) + "/";
 //     remove_dir(output_folder);
 //     make_dir(output_folder);
 
diff --git a/src/projects/cap/diff_masking.hpp b/src/projects/cap/diff_masking.hpp
index 67ef45e..0932d4d 100644
--- a/src/projects/cap/diff_masking.hpp
+++ b/src/projects/cap/diff_masking.hpp
@@ -273,7 +273,7 @@ inline void PerformIterativeRefinement(ContigStreams& streams,
     size_t current_k = k_values.back();
     k_values.pop_back();
 
-    string root = out_root + ToString(current_k) + "/";
+    string root = out_root + std::to_string(current_k) + "/";
 
     if (utils::NeedToUseLongSeq(current_k)) {
         omp_set_num_threads(1);
diff --git a/src/projects/cap/gene_analysis.hpp b/src/projects/cap/gene_analysis.hpp
index 07f99fe..4c2d974 100644
--- a/src/projects/cap/gene_analysis.hpp
+++ b/src/projects/cap/gene_analysis.hpp
@@ -8,7 +8,7 @@
 #pragma once
 
 #include "utils/standard_base.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "comparison_utils.hpp"
 #include "boost/tokenizer.hpp"
 #include "coloring.hpp"
@@ -183,7 +183,7 @@ struct GeneCollection {
     size_t id = 0;
     for (string name : genome_names) {
     string filename = genomes_folder + name;
-    path::CheckFileExistenceFATAL(filename);
+    fs::CheckFileExistenceFATAL(filename);
     genomes.insert(
         make_pair(
             id,
@@ -199,7 +199,7 @@ GenomeId genome_id(const string& name) const {
 
 void LoadGenomes(const string& file_with_genomes,
     const string& genomes_folder) {
-  path::CheckFileExistenceFATAL(file_with_genomes);
+  fs::CheckFileExistenceFATAL(file_with_genomes);
   ifstream stream(file_with_genomes);
   set<string> genome_names;
   string name;
@@ -230,7 +230,7 @@ void SaveGeneInfo(const string& filename) const {
 }
 
 set<int> LoadGeneIDs(const string& file_with_ids) {
-  path::CheckFileExistenceFATAL(file_with_ids);
+  fs::CheckFileExistenceFATAL(file_with_ids);
   ifstream stream(file_with_ids);
   set<int> gene_ids;
   int id;
@@ -252,7 +252,7 @@ void AddGeneInfo(const GeneId& gene_id, const GenomeId& genome_id, const Range&
 void LoadGeneInfo(const string& filename, set<int> gene_ids) {
   using boost::tokenizer;
   using boost::escaped_list_separator;
-  path::CheckFileExistenceFATAL(filename);
+  fs::CheckFileExistenceFATAL(filename);
   ifstream stream(filename);
   string line;
   while (!stream.eof()) {
@@ -334,7 +334,7 @@ DECL_LOGGER("GeneCollection")
 //                       const ColorHandler<typename gp_t::graph_t>& coloring) {
 //  for (auto it = gene_collection.genes.begin();
 //      it != gene_collection.genes.end(); ++it) {
-////        make_dir(folder + ToString(it->first));
+////        make_dir(folder + std::to_string(it->first));
 //    const GenePositions& gene_poss = it->second.gene_positions;
 //
 //    //todo improve later
@@ -345,7 +345,7 @@ DECL_LOGGER("GeneCollection")
 //        total_gene_sequence = total_gene_sequence + genome.Subseq(pos.first.start_pos, pos.first.end_pos);
 //      }
 //    }
-//    WriteComponentsAlongSequence(gp, folder + ToString(it->first) + "/",
+//    WriteComponentsAlongSequence(gp, folder + std::to_string(it->first) + "/",
 //                                 100000, 50, total_gene_sequence, coloring);
 //  }
 //}
diff --git a/src/projects/cap/genome_correction.hpp b/src/projects/cap/genome_correction.hpp
index 52ba5c4..4b32b99 100644
--- a/src/projects/cap/genome_correction.hpp
+++ b/src/projects/cap/genome_correction.hpp
@@ -12,7 +12,7 @@
 #include <vector>
 #include <map>
 #include <common/visualization/graph_labeler.hpp>
-#include "common/adt/bag.hpp"
+#include "adt/bag.hpp"
 
 namespace cap {
 
@@ -378,14 +378,14 @@ class SimpleInDelCorrector {
     void GenPicAlongPath(const vector<EdgeId> path, size_t cnt) {
     utils::MakeDirPath("ref_correction");
         WriteComponentsAlongPath(g_, visualization::graph_labeler::StrGraphLabeler<Graph>(g_),
-                "ref_correction/" + ToString(cnt) + ".dot", 100000, 10,
+                "ref_correction/" + std::to_string(cnt) + ".dot", 100000, 10,
                 TrivialMappingPath(g_, path), *ConstructColorer(coloring_));
     }
 
     void GenPicAroundEdge(EdgeId e, size_t cnt) {
         utils::MakeDirPath("ref_correction");
         GraphComponent<Graph> component = omnigraph::EdgeNeighborhood(g_, e, 10, 100000);
-        visualization::visualization_utils::WriteComponent(g_, "ref_correction/" + ToString(cnt) + ".dot", component, coloring_.GetInstance(),
+        visualization::visualization_utils::WriteComponent(g_, "ref_correction/" + std::to_string(cnt) + ".dot", component, coloring_.GetInstance(),
                                       visualization::graph_labeler::StrGraphLabeler<Graph>(g_));
     }
 
@@ -428,7 +428,7 @@ class SimpleInDelCorrector {
 //
 //        CompositeLabeler<Graph> labeler(basic_labeler, pos_labeler);
 //
-//        string alt_path_folder = folder_ + ToString(g_.int_id(e)) + "/";
+//        string alt_path_folder = folder_ + std::to_string(g_.int_id(e)) + "/";
 //        make_dir(alt_path_folder);
 //        WriteComponentsAlongPath(g_, labeler, alt_path_folder + "path.dot", /*split_length*/
 //        1000, /*vertex_number*/15, TrivialMappingPath(g_, genome_path),
diff --git a/src/projects/cap/longseq.hpp b/src/projects/cap/longseq.hpp
index 7a454ed..dd18673 100644
--- a/src/projects/cap/longseq.hpp
+++ b/src/projects/cap/longseq.hpp
@@ -10,9 +10,9 @@
 #include <cstdlib>
 #include <cstdint>
 #include "polynomial_hash.hpp"
-#include "utils/log.hpp"
+#include "math/log.hpp"
 #include "sequence/sequence.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 namespace cap {
 
diff --git a/src/projects/cap/main.cpp b/src/projects/cap/main.cpp
index 2b646c2..9043703 100644
--- a/src/projects/cap/main.cpp
+++ b/src/projects/cap/main.cpp
@@ -12,11 +12,11 @@
 #include "utils/segfault_handler.hpp"
 #include "utils/stacktrace.hpp"
 #include "pipeline/config_struct.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include "utils/memory_limit.hpp"
+#include "utils/mem/memory_limit.hpp"
 #include "io/dataset_support/read_converter.hpp"
 
 #include "cap_online_visualizer.hpp"
diff --git a/src/projects/cap/mosaic.hpp b/src/projects/cap/mosaic.hpp
index ac75fbb..b6a479d 100644
--- a/src/projects/cap/mosaic.hpp
+++ b/src/projects/cap/mosaic.hpp
@@ -9,7 +9,7 @@
 #include "io/reads/rc_reader_wrapper.hpp"
 #include "io/reads/sequence_reader.hpp"
 #include "diff_masking.hpp"
-#include "common/adt/bag.hpp"
+#include "adt/bag.hpp"
 #include "io/reads/vector_reader.hpp"
 #include "visualization/graph_colorer.hpp"
 
@@ -911,7 +911,7 @@ const vector<io::SingleRead> MakeReads(const vector<string>& reads, const vector
 vector<string> mosaic_names(size_t n) {
     vector<string> ans;
     for (size_t i = 0; i < n; ++i) {
-        ans.push_back("mosaic_" + ToString(i));
+        ans.push_back("mosaic_" + std::to_string(i));
     }
     return ans;
 }
@@ -968,8 +968,8 @@ void DrawGraph(const vector<StrandRange>& all_ranges,
             50
             /*numeric_limits<size_t>::max()*/);
 
-    path::remove_if_exists("mosaic_pics");
-    path::make_dir("mosaic_pics");
+    fs::remove_if_exists("mosaic_pics");
+    fs::make_dir("mosaic_pics");
     INFO("Writing components");
     visualization::visualization_utils::WriteComponents(gp.g, "mosaic_pics/", splitter,
             visualization::graph_colorer::DefaultColorer(gp.g), labeler);
diff --git a/src/projects/cap/repeat_masking.hpp b/src/projects/cap/repeat_masking.hpp
index ad1e19c..b396e9d 100644
--- a/src/projects/cap/repeat_masking.hpp
+++ b/src/projects/cap/repeat_masking.hpp
@@ -9,7 +9,7 @@
 
 #include "sequence/nucl.hpp"
 #include "io/reads/modifying_reader_wrapper.hpp"
-#include "common/adt/bag.hpp"
+#include "adt/bag.hpp"
 #include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_01.hpp>
 #include <boost/random/uniform_int.hpp>
@@ -280,7 +280,7 @@ inline bool MaskRepeatsIteration(size_t k, const string& input_dir, const vector
 //    size_t iter = 0;
 //    bool no_repeats = false;
 //    while (iter <= max_iter_count) {
-//        string out_dir = input_dir + ToString(iter) + "/";
+//        string out_dir = input_dir + std::to_string(iter) + "/";
 //        make_dir(out_dir);
 //        no_repeats = MaskRepeatsIteration(input_dir, suffixes, out_dir);
 //        if (no_repeats) {
@@ -292,7 +292,7 @@ inline bool MaskRepeatsIteration(size_t k, const string& input_dir, const vector
 //        string out_dir = input_dir + "masked/";
 //        make_dir(out_dir);
 //        ModifyAndSave(make_shared<io::TrivialModifier>(),
-//                      OpenStreams(input_dir + "/" + ToString(iter) + "/", suffixes,
+//                      OpenStreams(input_dir + "/" + std::to_string(iter) + "/", suffixes,
 //                                  out_dir));
 //    } else {
 //        WARN("Failed to mask repeats in " << max_iter_count << " iterations");
@@ -310,7 +310,7 @@ inline bool MaskRepeats(size_t k, ContigStreams input_streams, const vector<stri
     while (iter <= max_iter_count) {
         INFO("------------------------");
         INFO("Iteration " << iter);
-        string out_dir = work_dir + ToString(iter) + "/";
+        string out_dir = work_dir + std::to_string(iter) + "/";
         make_dir(out_dir);
         no_repeats = MaskRepeatsIteration(k, input_dir, suffixes, out_dir, rand_nucl);
         if (no_repeats) {
diff --git a/src/projects/cap/simple_inversion_finder.hpp b/src/projects/cap/simple_inversion_finder.hpp
index d29a272..f7fb62e 100644
--- a/src/projects/cap/simple_inversion_finder.hpp
+++ b/src/projects/cap/simple_inversion_finder.hpp
@@ -145,9 +145,9 @@ class SimpleInversionFinder {
     DEBUG("cycle found: " << v_list_str.str());
 
     const std::string edge_pic_name = base_pic_file_name_ + "_" +
-        ToString(num_cycles_found_) + ".dot";
+        std::to_string(num_cycles_found_) + ".dot";
     const std::string path_pic_name = base_pic_file_name_ + "_path_" +
-        ToString(num_cycles_found_) + ".dot";
+        std::to_string(num_cycles_found_) + ".dot";
 
     /*
     PrintColoredGraphAroundEdge(g_, coloring_, edge, gp_.edge_pos,
diff --git a/src/projects/cap/stats.hpp b/src/projects/cap/stats.hpp
index abb916e..3d61ab5 100644
--- a/src/projects/cap/stats.hpp
+++ b/src/projects/cap/stats.hpp
@@ -11,7 +11,7 @@
 #include "assembly_graph/components/graph_component.hpp"
 #include "assembly_graph/components/splitters.hpp"
 #include "utils.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "comparison_utils.hpp"
 #include "assembly_graph/graph_support/basic_graph_stats.hpp"
 #include "coloring.hpp"
@@ -493,8 +493,8 @@ class TrivialBreakpointFinder: public AbstractFilter<
 
         size_t MaxRedBlueIncLength(VertexId v) {
             vector<EdgeId> edges;
-            push_back_all(edges, g_.IncomingEdges(v));
-            push_back_all(edges, g_.OutgoingEdges(v));
+            utils::push_back_all(edges, g_.IncomingEdges(v));
+            utils::push_back_all(edges, g_.OutgoingEdges(v));
             return MaxRedBlueLength(edges);
         }
 
@@ -533,7 +533,7 @@ class TrivialBreakpointFinder: public AbstractFilter<
         GraphComponent<Graph> component = omnigraph::EdgeNeighborhood(g_, e);
         visualization::visualization_utils::WriteComponent(
                 component,
-                folder + prefix + ToString(g_.int_id(v)) + "_loc.dot",
+                folder + prefix + std::to_string(g_.int_id(v)) + "_loc.dot",
                 coloring_.ConstructColorer(component), labeler);
     }
 
@@ -586,8 +586,8 @@ public:
             ReportBreakpoint(
                     breakpoints[i],
                     folder,
-                    ToString(i) + "_"
-                            + ToString(comp.MaxRedBlueIncLength(breakpoints[i]))
+                    std::to_string(i) + "_"
+                            + std::to_string(comp.MaxRedBlueIncLength(breakpoints[i]))
                             + "_");
         }
     }
@@ -700,7 +700,7 @@ class SimpleInDelAnalyzer {
 
         visualization::graph_labeler::CompositeLabeler<Graph> labeler(basic_labeler, pos_labeler);
 
-        string alt_path_folder = folder_ + ToString(g_.int_id(e)) + "/";
+        string alt_path_folder = folder_ + std::to_string(g_.int_id(e)) + "/";
         make_dir(alt_path_folder);
         WriteComponentsAlongPath(g_, labeler, alt_path_folder + "path.dot", /*split_length*/
                 1000, /*vertex_number*/15, TrivialMappingPath(g_, genome_path),
@@ -984,16 +984,16 @@ public:
 //        for (auto it = genome_path_.begin(); it != genome_path_.end(); ++it) {
 //            if (answer.find(*it) == answer.end()) {
 //                curr++;
-//                answer[*it] = ToString(curr);
-//                answer[g_.conjugate(*it)] = ToString(-curr);
+//                answer[*it] = std::to_string(curr);
+//                answer[g_.conjugate(*it)] = std::to_string(-curr);
 //            }
 //        }
 //        curr = 1000000;
 //        for (auto it = g_.SmartEdgeBegin(); !it.IsEnd(); ++it) {
 //            if (answer.find(*it) == answer.end()) {
 //                curr++;
-//                answer[*it] = ToString(curr);
-//                answer[g_.conjugate(*it)] = ToString(-curr);
+//                answer[*it] = std::to_string(curr);
+//                answer[g_.conjugate(*it)] = std::to_string(-curr);
 //            }
 //        }
 //        return answer;
@@ -1496,7 +1496,7 @@ public:
             if (location.first) {
                 locality = !locality;
             }
-            ReportLocality(locality, output_dir_ + ToString(i) + ".dot");
+            ReportLocality(locality, output_dir_ + std::to_string(i) + ".dot");
         }
     }
 };}
diff --git a/src/projects/cclean/CMakeLists.txt b/src/projects/cclean/CMakeLists.txt
deleted file mode 100644
index 24ce7b9..0000000
--- a/src/projects/cclean/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-############################################################################
-# Copyright (c) 2015 Saint Petersburg State University
-# Copyright (c) 2011-2014 Saint-Petersburg Academic University
-# All Rights Reserved
-# See file LICENSE for details.
-############################################################################
-
-project(cclean CXX)
-aux_source_directory(. SRC_LIST)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-file(GLOB ${CMAKE_CURRENT_SOURCE_DIR}
-    "*.hh"
-    "*.h"
-    "*.hpp"
-    "*.cpp"
-)
-add_executable(${PROJECT_NAME} ${SRC_LIST})
-
-target_link_libraries(cclean ssw input cityhash ${COMMON_LIBRARIES})
-
-if (SPADES_STATIC_BUILD)
-  set_target_properties(cclean PROPERTIES LINK_SEARCH_END_STATIC 1)
-endif()
-
-install(TARGETS cclean
-        DESTINATION bin
-        COMPONENT runtime)
-install(DIRECTORY "${SPADES_CFG_DIR}/cclean"
-        DESTINATION share/spades/configs
-        FILES_MATCHING PATTERN "*.info")
diff --git a/src/projects/cclean/adapter_index.cpp b/src/projects/cclean/adapter_index.cpp
deleted file mode 100644
index 29d7f3a..0000000
--- a/src/projects/cclean/adapter_index.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include "adapter_index.hpp"
-#include "io/read_processor.hpp"
-#include "valid_kmer_generator.hpp"
-
-#include "io/ireadstream.hpp"
-#include "config_struct_cclean.hpp"
-
-#include <libcxx/sort.hpp>
-
-using namespace cclean;
-
-void AdapterIndexBuilder::FillAdapterIndex(const std::string &db, AdapterIndex &data) {
-  data.clear();
-
-  INFO("Reading adapter database from " << db);
-  ireadstream irs(db);
-  while (!irs.eof()) {
-    Read r;
-    irs >> r;
-    const std::string &seq = r.getSequenceString();
-
-    data.seqs_.push_back(seq);
-    data.seqs_.push_back(ReverseComplement(seq));
-  }
-
-  INFO("Filling adapter index");
-  for (size_t i = 0, e = data.seqs_.size(); i !=e; ++i) {
-    const std::string &seq = data.seqs_[i];
-    ValidKMerGenerator<cclean::K> gen(seq.c_str(), NULL, seq.size());
-
-    while (gen.HasMore()) {
-      KMer kmer = gen.kmer();
-
-      auto& entry = data.index_[kmer];
-      entry.insert(i);
-
-      gen.Next();
-    }
-  }
-
-  INFO("Done. Total " << data.seqs_.size() << " adapters processed. Total "
-                      << data.index_.size() << " unique k-mers.");
-}
diff --git a/src/projects/cclean/adapter_index.hpp b/src/projects/cclean/adapter_index.hpp
deleted file mode 100644
index 1bcc21f..0000000
--- a/src/projects/cclean/adapter_index.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef CCLEAN_ADAPTERINDEX_HPP
-#define CCLEAN_ADAPTERINDEX_HPP
-
-#include "sequence/seq.hpp"
-#include "utils/mph_index/kmer_index.hpp"
-
-#include <string>
-#include <set>
-#include <unordered_map>
-
-namespace cclean {
-const unsigned K = 10;
-typedef Seq<K> KMer;
-
-class AdapterIndex {
-  typedef std::set<std::size_t> IndexValueType;
-  std::unordered_map<KMer, IndexValueType, KMer::hash> index_;
-
- public:
-  AdapterIndex() {}
-
-  void clear() {
-    index_.clear();
-    seqs_.clear();
-  }
-  IndexValueType& operator[](cclean::KMer s) { return index_[s]; }
-  auto find(cclean::KMer s) const -> decltype(index_.find(s)) { return index_.find(s); }
-  auto end() const -> decltype(index_.end()) { return index_.end(); }
-
-  bool contains(cclean::KMer s) const {
-    return index_.find(s) != index_.end();
-  }
-  const std::string& seq(size_t idx) const { return seqs_[idx]; }
-
- private:
-  std::vector<std::string> seqs_;
-
-  friend class AdapterIndexBuilder;
-};
-
-class AdapterIndexBuilder {
- public:
-  AdapterIndexBuilder() {}
-
-  void FillAdapterIndex(const std::string &db, AdapterIndex &index);
-
- private:
-  DECL_LOGGER("Index Building");
-};
-
-  // end of namespace
-}
-
-#endif // __CCLEAN__ADAPTERINDEX_HPP__
diff --git a/src/projects/cclean/additional.cpp b/src/projects/cclean/additional.cpp
deleted file mode 100644
index ed0065f..0000000
--- a/src/projects/cclean/additional.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef ADDITIONAL_CPP
-#define ADDITIONAL_CPP
-
-#include "output.hpp"
-#include "config_struct_cclean.hpp"
-#include "io/read_processor.hpp"
-
-  enum WorkModeType {
-    NONE = 0,
-    SINGLE_END = 1,
-    SINGLE_END_Q = 2,
-    BRUTE_SIMPLE = 3,
-    BRUTE_WITH_Q = 4
-  };
-
-  constexpr double MatchScore = 0.6;
-  constexpr double MismatchScore = 100;
-
-  class AbstractCclean {
-      // Abstract base class for cclean functors
-    public:
-      AbstractCclean(std::ostream &aligned_output, std::ostream &bed,
-                     const std::string &db,
-                     const WorkModeType &mode,
-                     const unsigned mlen,
-                     const bool full_inform = false)
-                :aligned_(0), full_inform_(full_inform), read_mlen_(mlen),
-                 mismatch_threshold_(cfg::get().mismatch_threshold),
-                 score_threshold_(cfg::get().score_treshold),
-                 aligned_part_fraction_(cfg::get().aligned_part_fraction),
-                 db_name_(db), mode_(mode), aligned_output_stream_(aligned_output),
-                 bad_stream_(bed)  {}
-      virtual Read operator()(const Read &read, bool *ok) = 0;
-      inline size_t aligned() { return aligned_; }
-      virtual ~AbstractCclean() {}
-
-    protected:
-      size_t aligned_;
-
-      const bool full_inform_;
-      const uint read_mlen_;
-      const double mismatch_threshold_;  // for nonquality mode
-      const double score_threshold_;  // for quality mode
-
-      const double aligned_part_fraction_;
-      const std::string &db_name_;
-      const WorkModeType mode_;
-
-      std::ostream &aligned_output_stream_;
-      std::ostream &bad_stream_;
-      // Abstract for clean functors
-      class AbstractCleanFunctor {
-        public:
-          inline virtual bool operator()(const Read &r,
-                          const StripedSmithWaterman::Alignment &a,
-                          double aligned_part, const std::string &adapter,
-                          double *best_score) = 0;
-          virtual ~AbstractCleanFunctor() {}
-      };
-  };
-
-#endif // ADDITIONAL_CPP
diff --git a/src/projects/cclean/brute_force_clean.cpp b/src/projects/cclean/brute_force_clean.cpp
deleted file mode 100644
index de35bb3..0000000
--- a/src/projects/cclean/brute_force_clean.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include "brute_force_clean.hpp"
-
-#include <string>
-#include <vector>
-#include <iostream>
-
-#include "adapter_index.hpp"
-#include <ssw/ssw_cpp.h> // Striped Smith-Waterman aligner
-#include "additional.cpp"
-#include "output.hpp"
-
-using std::string;
-using std::vector;
-using StripedSmithWaterman::Filter;
-using StripedSmithWaterman::Aligner;
-using StripedSmithWaterman::Alignment;
-using cclean_output::print_alignment;
-using cclean_output::print_bad;
-using cclean_output::print_match;
-using cclean_output::print_read;
-
-static inline bool is_alignment_good(const StripedSmithWaterman::Alignment& a,
-                              const std::string& sequence,
-                              const std::string& query,
-                              double aligned_part_fraction) {
-  // Сheck that query adjoins or even overlaps the sequence edge
-  return (std::min(a.query_end - a.query_begin + 1, a.ref_end - a.ref_begin + 1)
-         / (double) query.size() > aligned_part_fraction) &&
-         (a.ref_begin == 0 || a.ref_end == sequence.size() - 1);
-}
-
-Read BruteForceClean::operator()(const Read &read, bool *ok) {
-  const string &read_name = read.getName();
-  const string &seq_string = read.getSequenceString();
-  Filter filter; // SSW filter
-  Aligner aligner; // SSW aligner
-  aligner.SetReferenceSequence(seq_string.c_str(),
-                               static_cast<int>(seq_string.size()));
-  Alignment alignment;
-
-  //  It can be many alignment adaps, so we searching the most probable
-  double best_score;
-  if (mode_ == BRUTE_SIMPLE)  // so in both mode first overlap will initialize as best
-    best_score = mismatch_threshold_;
-  if (mode_ == BRUTE_WITH_Q)
-    best_score = score_threshold_;
-  std::string best_adapter = "";
-
-  //  For each adapter align read and adapter
-  for (std::string adapt_string: adap_seqs_) {
-
-    aligner.Align(adapt_string.c_str(), filter, &alignment);
-    if((*checker)(read, alignment, aligned_part_fraction_, adapt_string,
-                  &best_score)) {
-      best_adapter = adapt_string;
-    }
-  }
-
-  if (!best_adapter.empty())  {
-      aligner.Align(best_adapter.c_str(), filter, &alignment);
-      aligned_ += 1;
-      Read cuted_read = cclean_utils::CutRead(read, alignment.ref_begin,
-                                              alignment.ref_end);
-      if (full_inform_)  // If user want full output
-#       pragma omp critical
-        print_alignment(aligned_output_stream_, alignment, seq_string,
-                        best_adapter, read_name, db_name_);
-
-      // Cuted read must be >= minimum lenght specified by arg
-      if (cuted_read.getSequenceString().size() >= read_mlen_) {
-        if (full_inform_)  // If user want full output
-#         pragma omp critical
-          print_bad(bad_stream_, read_name, alignment.ref_begin, alignment.ref_end);
-        (*ok) = true;
-        return cuted_read;
-      }
-      else {
-        if (full_inform_)
-#         pragma omp critical
-          print_bad(bad_stream_, read_name, 0, alignment.ref_end);
-        (*ok) = false;
-        return cuted_read;
-      }
-    }
-  else {
-    // Read was not aligned with any adapter
-    (*ok) = true;
-    return read;
-  }
-}
diff --git a/src/projects/cclean/brute_force_clean.hpp b/src/projects/cclean/brute_force_clean.hpp
deleted file mode 100644
index daeabe5..0000000
--- a/src/projects/cclean/brute_force_clean.hpp
+++ /dev/null
@@ -1,72 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef BRUTE_FORCE_CLEAN_HPP
-#define BRUTE_FORCE_CLEAN_HPP
-
-#include "utils.hpp"
-#include "additional.cpp"
-
-class BruteForceClean: public AbstractCclean {
-  // Class that get read with oper() and clean it, if that possible
-  public:
-    BruteForceClean(std::ostream& aligned_output,
-                    std::ostream& bed,const std::string &db,
-                    const WorkModeType &mode,
-                    const uint mlen,
-                    const std::vector<std::string> &gen,
-                    const bool full_inform = false)
-      : AbstractCclean(aligned_output, bed, db, mode, mlen, full_inform),
-        adap_seqs_(gen)  {
-      if(mode == BRUTE_SIMPLE) checker = new BruteCleanFunctor;
-      if(mode == BRUTE_WITH_Q) checker = new BruteQualityCleanFunctor;
-    }
-    virtual ~BruteForceClean() { delete checker; }
-    // ReadProcessor class put each read in this operator
-    virtual Read operator()(const Read &read, bool *ok);
-
-  private:
-    const std::vector<std::string> &adap_seqs_;
-    std::string best_adapter_;
-    AbstractCleanFunctor *checker; // Checks is adapter in read
-
-    // Here goes functors for clean in different modes
-    class BruteCleanFunctor: public AbstractCleanFunctor {
-        virtual inline bool operator()(const Read &r,
-                                       const StripedSmithWaterman::Alignment &a,
-                                       double aligned_part, const std::string &adapter,
-                                       double *best_score) {
-          double cur_score = cclean_utils::
-                             GetMismatches(r.getSequenceString(), adapter, a);
-          if (cur_score < (*best_score) &&
-              cclean_utils::is_alignment_good(a, r.getSequenceString(), adapter,
-                                              aligned_part)) {
-            (*best_score) = cur_score;
-            return true;
-          }
-          return false;
-        }
-    };
-    class BruteQualityCleanFunctor: public AbstractCleanFunctor {
-        virtual inline bool operator()(const Read &r,
-                                       const StripedSmithWaterman::Alignment &a,
-                                       double aligned_part, const std::string &adapter,
-                                       double *best_score) {
-          double cur_score = cclean_utils::
-                             GetScoreWithQuality(a, r.getQuality().str());
-          if (cur_score >= (*best_score) &&
-              cclean_utils::is_alignment_good(a, r.getSequenceString(), adapter,
-                                              aligned_part)) {
-            (*best_score) = cur_score;
-            return true;
-          }
-          return false;
-        }
-    };
-};
-
-#endif // BRUTE_FORCE_CLEAN_HPP
diff --git a/src/projects/cclean/comparator.hpp b/src/projects/cclean/comparator.hpp
deleted file mode 100644
index 355431e..0000000
--- a/src/projects/cclean/comparator.hpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef COMPARATOR_H_
-#define COMPARATOR_H_
-
-class Compare {
-   public:
-      bool operator() (std::string * lhs, std::string * rhs) const {
-          return *lhs < *rhs;
-      }
-};
-
-#endif /* COMPARATOR_H_ */
diff --git a/src/projects/cclean/config_struct_cclean.cpp b/src/projects/cclean/config_struct_cclean.cpp
deleted file mode 100644
index c9e9eda..0000000
--- a/src/projects/cclean/config_struct_cclean.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include "config_struct_cclean.hpp"
-#include "pipeline/config_common.hpp"
-#include "utils/openmp_wrapper.h"
-
-void load(cclean_config& cfg, const std::string &filename) {
-  boost::property_tree::ptree pt;
-  boost::property_tree::read_info(filename, pt);
-
-  load(cfg, pt);
-}
-
-void load(cclean_config& cfg, boost::property_tree::ptree const& pt) {
-  using config_common::load;
-  load(cfg.use_quality, pt, "use_quality");
-  load(cfg.use_bruteforce, pt, "use_bruteforce");
-  load(cfg.debug_information, pt, "debug_information");
-
-  load(cfg.score_treshold, pt, "score_treshold");
-  load(cfg.mismatch_threshold, pt, "mismatch_threshold");
-  load(cfg.minimum_lenght, pt, "minimum_lenght");
-  load(cfg.nthreads, pt, "nthreads");
-  load(cfg.aligned_part_fraction, pt, "aligned_part_fraction");
-  load(cfg.buffer_size, pt, "buffer_size");
-
-  load(cfg.dataset_file_name, pt, "dataset");
-  load(cfg.database, pt, "database");
-  load(cfg.input_working_dir, pt, "input_working_dir");
-  load(cfg.output_working_dir, pt, "output_working_dir");
-
-  std::string file_name = cfg.dataset_file_name;
-  cfg.dataset.load(file_name);
-
-  // Fix number of threads according to OMP capabilities.
-  cfg.nthreads = std::min(cfg.nthreads, (unsigned)omp_get_max_threads());
-  // Inform OpenMP runtime about this :)
-  omp_set_num_threads(cfg.nthreads);
-}
diff --git a/src/projects/cclean/config_struct_cclean.hpp b/src/projects/cclean/config_struct_cclean.hpp
deleted file mode 100644
index e56cc92..0000000
--- a/src/projects/cclean/config_struct_cclean.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef CONFIG_STRUCT_CCLEAN_HPP
-#define CONFIG_STRUCT_CCLEAN_HPP
-
-#include "pipeline/config_singl.hpp"
-#include <boost/property_tree/ptree_fwd.hpp>
-#include "pipeline/library.hpp"
-
-struct cclean_config {
-
-  bool use_quality;
-  bool use_bruteforce;
-  bool debug_information;
-
-  unsigned score_treshold;
-  unsigned mismatch_threshold;
-  unsigned minimum_lenght;
-  unsigned nthreads;
-  unsigned buffer_size;
-  double aligned_part_fraction;
-
-  std::string dataset_file_name;
-  std::string database;
-  std::string input_working_dir;
-  std::string output_working_dir;
-
-  io::DataSet<> dataset;
-};
-
-// main config load function
-void load(cclean_config& cfg, const std::string &filename);
-void load(cclean_config& cfg, boost::property_tree::ptree const& pt);
-
-typedef config_common::config<cclean_config> cfg;
-
-#endif
diff --git a/src/projects/cclean/job_wrappers.cpp b/src/projects/cclean/job_wrappers.cpp
deleted file mode 100644
index 3ea37c3..0000000
--- a/src/projects/cclean/job_wrappers.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include <set>
-
-#include "job_wrappers.hpp"
-#include "utils/logger/log_writers.hpp"
-#include "adapter_index.hpp"
-#include "valid_kmer_generator.hpp"
-#include "adapter_index.hpp"
-#include "output.hpp"
-#include "ssw/ssw_cpp.h"
-#include "utils.hpp"
-
-using cclean_output::print_alignment;
-using cclean_output::print_bad;
-using cclean_output::print_match;
-using cclean_output::print_read;
-
-Read SimpleClean::operator()(const Read &read, bool *ok)
-{
-  const std::string& name = read.getName();
-  const std::string& sequence = read.getSequenceString();
-
-  std::set<size_t> to_check;
-  ValidKMerGenerator<cclean::K> gen(sequence.c_str(), NULL, sequence.size());
-  while (gen.HasMore()) {
-    cclean::KMer kmer = gen.kmer();
-
-    auto it = index_.find(kmer);
-    if (it != index_.end())
-      to_check.insert(it->second.begin(), it->second.end());
-
-    gen.Next();
-  }
-
-  //  Try to align the artifacts for corresponding kmers
-  StripedSmithWaterman::Aligner aligner;
-  StripedSmithWaterman::Filter filter;
-  StripedSmithWaterman::Alignment alignment; //  why it was in for loop?
-  aligner.SetReferenceSequence(sequence.c_str(), sequence.size());
-
-  //  Pointer on best match adapter
-  const std::string *best_adapter = nullptr;
-  double best_score;
-  if (mode_ == SINGLE_END)  // so in both mode first overlap will initialize as best
-    best_score = mismatch_threshold_;
-  if (mode_ == SINGLE_END_Q)
-    best_score = score_threshold_;
-  best_adapter = nullptr;
-
-  for (auto it = to_check.begin(), et = to_check.end(); it != et; ++it) {
-    const std::string &query = index_.seq(*it);
-    aligner.Align(query.c_str(), filter, &alignment);
-    // Check is this apapter better then previous best
-    if((*checker)(read, alignment, aligned_part_fraction_, query,
-                  &best_score)) {
-      best_adapter = &query;
-    }
-  }
-
-  if (best_adapter != nullptr)  {
-      aligner.Align(best_adapter->c_str(), filter, &alignment);
-      aligned_ += 1;
-      Read cuted_read = cclean_utils::CutRead(read, alignment.ref_begin,
-                                              alignment.ref_end);
-      if (full_inform_)  // If user want full output
-#       pragma omp critical
-        print_alignment(aligned_output_stream_, alignment, sequence,
-                        *best_adapter,name, db_name_);
-
-      // Cuted read must be >= minimum lenght specified by arg
-      if (cuted_read.getSequenceString().size() >= read_mlen_) {
-        if (full_inform_)
-#         pragma omp critical
-          print_bad(bad_stream_, name, alignment.ref_begin, alignment.ref_end);
-        (*ok) = true;
-        return cuted_read;
-      }
-      else {
-        if (full_inform_)
-#         pragma omp critical
-          print_bad(bad_stream_, name, 0, alignment.ref_end);
-        (*ok) = false;
-        return cuted_read;
-      }
-  }
-  else {
-    // Read was not aligned with any adapter
-    (*ok) = true;
-    return read;
-  }
-}
diff --git a/src/projects/cclean/job_wrappers.hpp b/src/projects/cclean/job_wrappers.hpp
deleted file mode 100644
index 7adccb1..0000000
--- a/src/projects/cclean/job_wrappers.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef JOB_WRAPERS_HPP
-#define JOB_WRAPERS_HPP
-
-#include "additional.cpp"
-#include "utils.hpp"
-
-namespace cclean {
-  class AdapterIndex;
-}
-
-class SimpleClean: public AbstractCclean {
-  public:
-    SimpleClean(std::ostream &aligned_output,
-                std::ostream &bed, const std::string &db,
-                const WorkModeType &mode,
-                const unsigned mlen,
-                const cclean::AdapterIndex &index,
-                const bool full_inform = false)
-      : AbstractCclean(aligned_output, bed, db, mode, mlen, full_inform),
-        index_(index)  {
-      if(mode_ == SINGLE_END) checker = new SimpleCleanFunctor;
-      if(mode_ == SINGLE_END_Q) checker = new SimpleQualityCleanFunctor;
-    }
-    virtual ~SimpleClean() { delete checker; }
-    virtual Read operator()(const Read &read, bool *ok);
-
-  private:
-    const cclean::AdapterIndex &index_;
-    AbstractCleanFunctor *checker; // Checks is adapter in read
-
-    // Here goes functors for clean in different modes
-    class SimpleCleanFunctor: public AbstractCleanFunctor {
-        virtual inline bool operator()(const Read &r,
-                                       const StripedSmithWaterman::Alignment &a,
-                                       double aligned_part, const std::string &adapter,
-                                       double *best_score) {
-          double cur_score = cclean_utils::
-                             GetMismatches(r.getSequenceString(), adapter, a);
-          if (cur_score < (*best_score) &&
-              cclean_utils::is_alignment_good(a, r.getSequenceString(), adapter,
-                                aligned_part)) {
-              (*best_score) = cur_score;
-              return true;
-          }
-          return false;
-        }
-    };
-    class SimpleQualityCleanFunctor: public AbstractCleanFunctor {
-        virtual inline bool operator()(const Read &r,
-                                       const StripedSmithWaterman::Alignment &a,
-                                       double aligned_part, const std::string &adapter,
-                                       double *best_score) {
-          double cur_score = cclean_utils::
-                             GetScoreWithQuality(a, r.getQuality().str());
-          if (cur_score >= (*best_score) &&
-              cclean_utils::is_alignment_good(a, r.getSequenceString(), adapter,
-                                aligned_part)) {
-              (*best_score) = cur_score;
-              return true;
-          }
-          return false;
-        }
-    };
-};
-
-#endif /* JOBWRAPPERS_H_ */
diff --git a/src/projects/cclean/main.cpp b/src/projects/cclean/main.cpp
deleted file mode 100644
index 4d50785..0000000
--- a/src/projects/cclean/main.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include <iostream>
-#include <string>
-#include <map>
-#include <exception>
-
-#include "sequence/seq.hpp"
-#include "utils/logger/log_writers.hpp"
-#include "utils/memory_limit.hpp"
-#include "running_modes.hpp"
-#include "config_struct_cclean.hpp"
-#include "utils/simple_tools.hpp"
-#include "adapter_index.hpp"
-#include "utils.hpp"
-
-#include "valid_kmer_generator.hpp"
-#include "io/read_processor.hpp"
-#include "modules/ssw_cpp.h"
-#include "additional.cpp"
-
-#include "job_wrappers.hpp"
-#include "brute_force_clean.hpp"
-
-using logging::logger;
-using logging::create_logger;
-using logging::console_writer;
-using std::string;
-
-constexpr int CONFIG_FILE_ARG = 1;
-
-void usage() {
-  std::cout << "usage: cclean [program config file]" << std::endl;
-}
-
-void create_console_logger() {
-  logger *lg = create_logger("");
-  lg->add_writer(std::make_shared<console_writer>());
-  attach_logger(lg);
-}
-
-int main(int argc, char *argv[]) {
-
-  create_console_logger();
-
-  if (argc < 2) {
-    usage();
-    return EXIT_FAILURE;
-  }
-
-  std::string config_file = argv[CONFIG_FILE_ARG];
-  INFO("Loading config from " << config_file.c_str());
-  if (!path::FileExists(config_file)) {
-      ERROR("File " + config_file + " doesn't exists.");
-      return EXIT_FAILURE;
-  }
-  cfg::create_instance(config_file);
-
-  const std::string &database = cfg::get().database;
-  if (!path::FileExists(database)) {
-      ERROR("File " + database + " doesn't exists.");
-      return EXIT_FAILURE;
-  }
-  const std::string &dataset = cfg::get().dataset_file_name;
-  if (!path::FileExists(dataset)) {
-      ERROR("File " + dataset + " doesn't exists.");
-      return EXIT_FAILURE;
-  }
-
-  clock_t start = clock();
-
-  Cleaner::ProcessDataset();  // Main work here
-
-  INFO("DONE");
-  clock_t ends = clock();
-  INFO("Processor Time Spent: " << (double) (ends - start) / CLOCKS_PER_SEC
-       << " seconds.");
-  INFO("Goodbye!");
-
-  return EXIT_SUCCESS;
-}
diff --git a/src/projects/cclean/output.cpp b/src/projects/cclean/output.cpp
deleted file mode 100644
index ff85f99..0000000
--- a/src/projects/cclean/output.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include <iostream>
-#include <algorithm>
-#include <iostream>
-#include <fstream>
-#include "output.hpp"
-#include "utils.hpp"
-
-namespace cclean_output {
-
-void print_n_times(std::ostream& output, char c, int n) {
-  for (int i = 0; i < n; ++i) {
-    output << c;
-  }
-}
-
-void print_alignment(std::ostream& output, const StripedSmithWaterman::Alignment &data,
-        const std::string& ref, const std::string& query,
-        const std::string& name, const std::string& database_name) {
-
-  output << "Alignment: input sequence (first line) " << name << " alignes "
-         << std::endl
-         << "sequence from database (last line) " << database_name << std::endl;
-
-  std::string aligned_query, aligned_ref;
-  cclean_utils::RestoreFromCigar(ref, query, aligned_ref, aligned_query, data);
-
-  // case when pattern's start pos is less than text one
-  int text_offset = data.ref_begin - data.query_begin < 0 ? data.query_begin
-                                                            - data.ref_begin : 0;
-
-  // ref = read
-  print_n_times(output, ' ', text_offset);
-  output << ref << std::endl;
-  print_n_times(output, ' ', text_offset + data.ref_begin);
-  output << aligned_ref << std::endl;
-
-  // vertical dashes
-  print_n_times(output, ' ', text_offset + data.ref_begin);
-  for (int i = 0; i < (int)std::min(aligned_query.length(), aligned_ref.length()); ++i) {
-   aligned_query.at(i) == aligned_ref.at(i) ? output << "|" : output << "*";
-  }
-  output << std::endl;
-
-  // query = contamination
-  print_n_times(output, ' ', text_offset + data.ref_begin);
-  output << aligned_query << std::endl;
-  print_n_times(output, ' ', data.ref_begin - data.query_begin);
-  output << query << std::endl;
-  output << std::endl;
- }
-
-void print_match(std::ostream& output, std::ostream& bed, std::map<std::string*,
-                  std::vector<int>, Compare>& res, const std::string& name,
-                  const std::string& seq, const std::string &db_name) {
-  for (auto it = res.begin(); it != res.end(); ++it) {
-   for (auto it_pos = it->second.begin(); it_pos != it->second.end(); ++it_pos) {
-
-    output << "Match: input sequence (first line) " << name << " matches "
-           << std::endl
-           << "sequence from database (2nd line) " << db_name << std::endl;
-
-    output << seq << std::endl;
-    print_n_times(output, ' ', *it_pos);
-    print_n_times(output, '|', it->first->length());
-    output << std::endl;
-    print_n_times(output, ' ', *it_pos);
-    output << *(it->first) << std::endl;
-    output << std::endl;
-
-    print_bad(bed, name, *it_pos, *it_pos + it->first->size());
-   }
-  }
-}
-//end of namespace
-}
diff --git a/src/projects/cclean/output.hpp b/src/projects/cclean/output.hpp
deleted file mode 100644
index 8266a45..0000000
--- a/src/projects/cclean/output.hpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef OUTPUT_HPP
-#define OUTPUT_HPP
-
-#include <string>
-#include <vector>
-#include <map>
-#include <io/read.hpp>
-#include <ostream>
-#include "comparator.hpp"
-#include "modules/ssw_cpp.h"
-
-namespace cclean_output {
-
-void print_n_times(std::ostream& output, char c, int n);
-
-void print_alignment(std::ostream& output,
-                     const StripedSmithWaterman::Alignment & data,
-                     const std::string& ref,
-                     const std::string& query, const std::string& name,
-                     const std::string& database_name);
-
-void print_match(std::ostream& output, std::ostream& bed, std::map<std::string*,
-                 std::vector<int>, Compare>& res, const std::string& name,
-                 const std::string& seq, const std::string &db_name);
-
-void print_bad(std::ostream& output, const std::string & name,
-               int start, int stop);
-
-inline void print_read(std::ostream& output, const Read &read) {
-    std::ofstream &stream =
-    reinterpret_cast<std::ofstream&>(output);
-    read.print(stream, Read::PHRED_OFFSET);
-}
-
-inline void print_bad(std::ostream& output, const std::string & name,
-                      int start, int stop) {
-         output << name << "\t" << start << "\t" << stop << std::endl;
-}
-
-// end of namespace
-}
-#endif /* OUTPUT_H_ */
diff --git a/src/projects/cclean/running_modes.cpp b/src/projects/cclean/running_modes.cpp
deleted file mode 100644
index 73dcdfb..0000000
--- a/src/projects/cclean/running_modes.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include "running_modes.hpp"
-
-#include <string>
-#include <unordered_map>
-#include <algorithm>
-
-#include "adapter_index.hpp"
-#include "output.hpp"
-#include "io/read_processor.hpp"
-#include "pipeline/library.hpp"
-#include "utils/logger/log_writers.hpp"
-#include "job_wrappers.hpp"
-#include "brute_force_clean.hpp"
-
-AbstractCclean *Cleaner::getCleaner(std::ofstream *outf_alig_debug,
-                                    std::ofstream *outf_bad_deb,
-                                    const std::string &db, WorkModeType mode,
-                                    unsigned mlen,
-                                    const cclean::AdapterIndex &index,
-                                    bool deb_info) {
-  AbstractCclean *cleaner;  // Creating cleaner for reads
-  if (mode == SINGLE_END || mode == SINGLE_END_Q)
-    cleaner = new SimpleClean(*outf_alig_debug, *outf_bad_deb, db,
-                              mode, mlen, index, deb_info);
-  if (mode == BRUTE_SIMPLE || mode == BRUTE_WITH_Q)
-    cleaner = new BruteForceClean(*outf_alig_debug, *outf_bad_deb, db,
-                                  mode, mlen, index.GetSeqs(), deb_info);
-  return cleaner;
-}
-
-void Cleaner::ProcessDataset() {
-  // Options proceed
-  const std::string db = cfg::get().database;
-  const WorkModeType mode = getMode();
-
-  cclean::AdapterIndex index;
-  cclean::AdapterIndexBuilder().FillAdapterIndex(db, index);
-
-  const io::DataSet<> &dataset = cfg::get().dataset;
-  io::DataSet<> outdataset;
-  // Proccessing dataset. Iterating through libraries
-  for (auto it = dataset.library_begin(), et = dataset.library_end(); it != et; ++it) {
-    const io::SequencingLibrary<> &lib = *it;
-    io::SequencingLibrary<> outlib = lib;
-    outlib.clear();
-    // Iterating through paired reads in current library lib
-    for (auto I = lib.paired_begin(), E = lib.paired_end(); I != E; ++I) {
-      INFO("Correcting pair reads from " << I->first << " and " << I->second);
-
-      const std::string &file_name_l = I->first;
-      const std::string &file_name_r = I->second;
-      const std::string outcorl = getReadsFilename(cfg::get().output_working_dir,
-                                             file_name_l, "correct_l");
-      const std::string outcorr = getReadsFilename(cfg::get().output_working_dir,
-                                             file_name_r, "correct_r");
-      const std::string unpaired = getPureFilename(file_name_l) + "_" +
-                                   getPureFilename(file_name_r);
-      const std::string outcoru = getReadsFilename(cfg::get().output_working_dir,
-                                             unpaired, "correct_u");
-      const std::string outbadl = getReadsFilename(cfg::get().output_working_dir,
-                                                   file_name_l, "bad");
-      const std::string outbadr = getReadsFilename(cfg::get().output_working_dir,
-                                                   file_name_r, "bad");
-
-      std::ofstream ofcorl(outcorl.c_str());
-      std::ofstream ofbadl(outbadl.c_str());
-      std::ofstream ofcorr(outcorr.c_str());
-      std::ofstream ofbadr(outbadr.c_str());
-      std::ofstream ofunp (outcoru.c_str());
-
-      CorrectPairedReadFiles(index, file_name_l, file_name_r, &ofbadl, &ofcorl,
-                             &ofbadr, &ofcorr, &ofunp, mode);
-      outlib.push_back_paired(outcorl, outcorr);
-      outlib.push_back_single(outcoru);
-    }
-
-    for (auto I = lib.single_begin(), E = lib.single_end(); I != E; ++I) {
-      INFO("Correcting single reads from " << *I);
-
-      const std::string reads_file_name = *I;
-      const std::string outcor = getReadsFilename(cfg::get().output_working_dir,
-                                                  reads_file_name, "correct");
-      const std::string outbad = getReadsFilename(cfg::get().output_working_dir,
-                                                  reads_file_name, "bad");
-
-      std::ofstream ofgood(outcor.c_str());
-      std::ofstream ofbad(outbad.c_str());
-
-      CorrectReadFile(index, reads_file_name, &ofgood, &ofbad, mode);
-      outlib.push_back_single(outcor);
-    }
-    outdataset.push_back(outlib);
-  }
-
-  cfg::get_writable().dataset = outdataset;
-}
-
-void Cleaner::CorrectReadFile(const cclean::AdapterIndex &index,
-                              const std::string &fname, std::ofstream *outf_good,
-                              std::ofstream *outf_bad, WorkModeType mode) {
-  const unsigned nthreads = cfg::get().nthreads;
-  const std::string db = cfg::get().database;
-  const unsigned mlen = cfg::get().minimum_lenght;
-  const size_t read_buffer_size = nthreads * cfg::get().buffer_size;
-  std::vector<Read> reads(read_buffer_size);
-  std::vector<bool> res(read_buffer_size, false);
-
-  const bool deb_info = cfg::get().debug_information;
-  std::string bad_out_debug = "";
-  std::string aligned_out_debug = "";
-  if (deb_info) {
-    // Else ofstreams will be not used, so there is no sense to create empty files
-    // So ofstreams will be created with empty strings
-    bad_out_debug = getReadsFilename(cfg::get().output_working_dir,
-                                     fname, "debug.bad");
-    aligned_out_debug = getReadsFilename(cfg::get().output_working_dir,
-                                       fname, "debug.alig");
-  }
-  std::ofstream ofbad_deb(bad_out_debug.c_str());
-  std::ofstream ofalig_deb(aligned_out_debug.c_str());
-
-  unsigned buffer_no = 0;
-  unsigned count_bad = 0;
-  unsigned count_total = 0;
-
-  ireadstream irs(fname);
-  VERIFY(irs.is_open());
-
-  AbstractCclean *cleaner = getCleaner(&ofalig_deb, &ofbad_deb, db, mode, mlen,
-                                       index, deb_info);
-
-  while (!irs.eof()) {
-    unsigned buf_size = 0;
-    for (; buf_size < read_buffer_size && !irs.eof(); ++buf_size) {
-      irs >> reads[buf_size];
-    }
-    if(deb_info) INFO("Prepared batch " << buffer_no << " of "
-                      << buf_size << " reads.");
-    count_bad += CorrectReadsBatch(cleaner, &res, &reads, buf_size, nthreads);
-    count_total += buf_size;
-    if (deb_info) INFO("Processed batch " << buffer_no);
-    for (size_t i = 0; i < buf_size; ++i) { // Here output reads in files
-      reads[i].print(*(res[i] ? outf_good : outf_bad), Read::PHRED_OFFSET);
-    }
-    if(deb_info) INFO("Written batch " << buffer_no);
-    ++buffer_no;
-  }
-
-  delete cleaner;
-  // Process info about results
-  const double percent_val = static_cast<double>(count_total) / 100.0;
-  std::ostringstream percent_bad;
-  percent_bad << std::fixed << std::setprecision(2) <<
-                   (static_cast<double>(count_bad) / percent_val);
-  INFO("Total proceed " + std::to_string(count_total) + ", " +
-       std::to_string(count_bad) + " reads (" + percent_bad.str() +
-       " percents of total) is bad.");
-}
-
-void Cleaner::CorrectPairedReadFiles(const cclean::AdapterIndex &index,
-                                     const std::string &fnamel,
-                                     const std::string &fnamer, std::ofstream *ofbadl,
-                                     std::ofstream *ofcorl, std::ofstream *ofbadr,
-                                     std::ofstream *ofcorr, std::ofstream *ofunp,
-                                     WorkModeType mode) {
-  const unsigned nthreads = cfg::get().nthreads;
-  const std::string db = cfg::get().database;
-  const unsigned mlen = cfg::get().minimum_lenght;
-  const size_t read_buffer_size = nthreads * cfg::get().buffer_size;
-
-  std::vector<Read> left_reads(read_buffer_size);
-  std::vector<Read> right_reads(read_buffer_size);
-  std::vector<bool> left_res(read_buffer_size, false);
-  std::vector<bool> right_res(read_buffer_size, false);
-
-  ireadstream irsl(fnamel);
-  ireadstream irsr(fnamer);
-  VERIFY(irsl.is_open());
-  VERIFY(irsr.is_open());
-
-  const bool deb_info = cfg::get().debug_information;
-  std::string bad_out_deb_l = "";
-  std::string aligned_out_deb_l = "";
-  std::string bad_out_deb_r = "";
-  std::string aligned_out_deb_r = "";
-  if (deb_info) {
-    // Else ofstreams will be not used, so there is no sense to create empty files
-    // So ofstreams will be created with empty strings
-    bad_out_deb_l = getReadsFilename(cfg::get().output_working_dir,
-                                     fnamel, "debug.bad");
-    aligned_out_deb_l = getReadsFilename(cfg::get().output_working_dir,
-                                       fnamel, "debug.alig");
-    bad_out_deb_r = getReadsFilename(cfg::get().output_working_dir,
-                                     fnamer, "debug.bad");
-    aligned_out_deb_r = getReadsFilename(cfg::get().output_working_dir,
-                                       fnamer, "debug.alig");
-  }
-  std::ofstream ofbad_deb_l(bad_out_deb_l.c_str());
-  std::ofstream ofalig_deb_l(aligned_out_deb_l.c_str());
-  std::ofstream ofbad_deb_r(bad_out_deb_r.c_str());
-  std::ofstream ofalig_deb_r(aligned_out_deb_r.c_str());
-
-  AbstractCclean *cleaner_l = getCleaner(&ofalig_deb_l, &ofbad_deb_l, db, mode,
-                                         mlen, index, deb_info);
-  AbstractCclean *cleaner_r = getCleaner(&ofalig_deb_r, &ofbad_deb_r, db, mode,
-                                         mlen, index, deb_info);
-  unsigned buffer_no = 0;
-  unsigned count_bad_l = 0;
-  unsigned count_bad_r = 0;
-  unsigned count_total = 0;
-
-  while (!irsl.eof() && !irsr.eof()) {
-    unsigned buf_size = 0;
-    for (; buf_size < read_buffer_size && !irsl.eof() &&
-         !irsr.eof(); ++buf_size) {
-      irsl >> left_reads[buf_size];
-      irsr >> right_reads[buf_size];
-    }
-    if(deb_info) INFO("Prepared batch " << buffer_no << " of " << buf_size
-                       << " reads.");
-
-    count_bad_l += CorrectReadsBatch(cleaner_l, &left_res, &left_reads,
-                                     buf_size, nthreads);
-    count_bad_r += CorrectReadsBatch(cleaner_r, &right_res, &right_reads,
-                                     buf_size, nthreads);
-    count_total += buf_size;
-
-    if(deb_info) INFO("Processed batch " << buffer_no);
-    for (size_t i = 0; i < buf_size; ++i) {
-      if (left_res[i] && right_res[i]) {
-        left_reads[i].print(*ofcorl, Read::PHRED_OFFSET);
-        right_reads[i].print(*ofcorr, Read::PHRED_OFFSET);
-      }
-      else {
-        left_reads[i].print(*(left_res[i] ? ofunp : ofbadl),
-                            Read::PHRED_OFFSET);
-        right_reads[i].print(*(right_res[i] ? ofunp : ofbadr),
-                             Read::PHRED_OFFSET);
-      }
-    }
-    if(deb_info) INFO("Written batch " << buffer_no);
-    ++buffer_no;
-  }
-
-  delete cleaner_l;
-  delete cleaner_r;
-
-  // Process info abouts results
-  const double percent_val = static_cast<double>(count_total) / 100.0;
-  std::ostringstream percent_bad_l;
-  std::ostringstream percent_bad_r;
-  percent_bad_l << std::fixed << std::setprecision(2) <<
-                   (static_cast<double>(count_bad_l) / percent_val);
-  percent_bad_r << std::fixed << std::setprecision(2) <<
-                   (static_cast<double>(count_bad_r) / percent_val);
-  INFO("Total proceed " + std::to_string(count_total) + ", " +
-       std::to_string(count_bad_l) + " left reads (" +
-       percent_bad_l.str() + " percents of total) is bad" + ", " +
-       std::to_string(count_bad_r) + " right reads (" +
-       percent_bad_r.str() + " percents of total) is bad.");
-}
diff --git a/src/projects/cclean/running_modes.hpp b/src/projects/cclean/running_modes.hpp
deleted file mode 100644
index c2709db..0000000
--- a/src/projects/cclean/running_modes.hpp
+++ /dev/null
@@ -1,93 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef RUNNING_MODES_HPP
-#define RUNNING_MODES_HPP
-
-#include <unordered_map>
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include "additional.cpp"
-#include "adapter_index.hpp"
-
-class Cleaner {
-
-  public:
-    static void ProcessDataset();
-    // Correct reads in a given file
-    static void CorrectReadFile(const cclean::AdapterIndex &index,
-                                const std::string &fname,
-                                std::ofstream *outf_good, std::ofstream *outf_bad,
-                                WorkModeType mode);
-    // Correct reads in a given pair of files
-    static void CorrectPairedReadFiles(const cclean::AdapterIndex &index,
-                                       const std::string &fnamel,
-                                       const std::string &fnamer,
-                                       std::ofstream *ofbadl,
-                                       std::ofstream *ofcorl,
-                                       std::ofstream *ofbadr,
-                                       std::ofstream *ofcorr,
-                                       std::ofstream *ofunp,
-                                       WorkModeType mode);
-    // Parallel correction of batch of reads
-    static inline unsigned CorrectReadsBatch(AbstractCclean *cleaner,
-                                             std::vector<bool> *results,
-                                             std::vector<Read> *reads,
-                                             size_t buf_size, unsigned nthreads) {
-      unsigned bad = 0;
-#     pragma omp parallel for shared(reads, results) num_threads(nthreads)
-      for (size_t i = 0; i < buf_size; ++i) {
-        bool ok;
-        (*reads)[i] = (*cleaner)((*reads)[i], &ok);
-        (*results)[i] = ok;
-        if (!ok) ++bad;
-      }
-      return bad;
-    }
-    // Get pure file name without extension
-    inline static std::string getPureFilename(const std::string &fname) {
-      std::string tmp = path::filename(fname);
-      std::string pure_file_name = "";
-      size_t pos = tmp.find(".fastq");
-      if (pos == std::string::npos)
-        pure_file_name = tmp;
-      else
-        pure_file_name = tmp.substr(0, pos);
-      return pure_file_name;
-    }
-    // Get filename for reads
-    inline static std::string getReadsFilename(const std::string &dirprefix,
-                                               const std::string &fname,
-                                               const std::string &suffix) {
-      const std::string &pure_file_name = getPureFilename(fname);
-      return (dirprefix + "/" + pure_file_name + "." + suffix + ".fastq");
-    }
-    // Define mode depends on config file data
-    inline static WorkModeType getMode() {
-        WorkModeType mode;
-        if (cfg::get().use_bruteforce) {
-          if (cfg::get().use_quality) mode = BRUTE_WITH_Q;
-          else                        mode = BRUTE_SIMPLE;
-        }
-        else {
-          if (cfg::get().use_quality) mode = SINGLE_END_Q;
-          else                        mode = SINGLE_END;
-        }
-        return mode;
-    }
-    // Create and return cleaner depends on mode
-    inline static AbstractCclean* getCleaner(std::ofstream *outf_alig_debug,
-                                             std::ofstream *outf_bad_deb,
-                                             const std::string &db,
-                                             WorkModeType mode, unsigned mlen,
-                                             const cclean::AdapterIndex &index,
-                                             bool deb_info);
-
-};
-
-#endif /* RUNNING_MODES_H_ */
diff --git a/src/projects/cclean/utils.cpp b/src/projects/cclean/utils.cpp
deleted file mode 100644
index a5f0fc1..0000000
--- a/src/projects/cclean/utils.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include <iostream>
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "utils.hpp"
-#include <ssw/ssw_cpp.h>
-#include <ssw/ssw_cpp.h> // Striped Smith-Waterman aligner
-#include <io/read.hpp>
-#include "additional.cpp"
-
-namespace cclean_utils {
-
-inline std::string ReverseComplement(const std::string& read) {
-  std::map<char, char> reverse;
-  reverse['C'] = 'G';
-  reverse['G'] = 'C';
-  reverse['T'] = 'A';
-  reverse['A'] = 'T';
-  reverse['N'] = 'N';
-
-  std::vector<char> res;
-  for(int i = 0; i < (int) read.length(); ++i) {
-   res.push_back(reverse[read[i]]);
-  }
-
-  std::reverse(res.begin(), res.end());
-  return std::string(res.begin(), res.end());
-}
-
-double GetScoreWithQuality(const StripedSmithWaterman::Alignment &a,
-                                            const Quality &qual)
-{ // Try to get more realistic align score depend on read quality
-  // Mathes and mismatches get from cigar alignment string below
-  double score = 0.0;
-  int ref_pos = 0, query_pos = 0;
-  for (std::vector<uint32_t>::const_iterator it = a.cigar.begin();
-       it != a.cigar.end(); ++it) {
-
-    int num = (*it & 0xFFFFFFF0) >> 4;
-    int op_code = *it & 0x0000000F;
-
-    switch (op_code) {
-      case 0: { //match
-        for (int i = 0; i < num; ++i, ++ref_pos, ++query_pos)
-          score += MatchScore;
-        break;
-      }
-      case 1: { //insert
-        for (int i = 0; i < num; ++i, ++query_pos)
-          score -= (double)qual[query_pos] / MismatchScore;
-        break;
-      }
-      case 2: { //del
-        for (int i = 0; i < num; ++i, ++ref_pos)
-          score -= (double)qual[query_pos] / MismatchScore;
-        break;
-      }
-      default:
-        break;
-    }
-  }
-  return score;
-}
-
-Read CutRead(const Read &r, int start_pos, int end_pos) {
-  if(start_pos > end_pos)  return r;
-  //  Step 1: cutting read sequence
-  Read read = r;
-  std::string read_seq = read.getSequenceString();
-  std::string cuted_read_seq(std::string(read_seq, 0, start_pos) +
-                             std::string(read_seq, end_pos + 1));
-  read.setSequence(cuted_read_seq.c_str());
-
-  //  Step 2: cutting read quality string
-  std::string qual_string = read.getQuality().str();
-  if(qual_string.empty())  return read;
-  std::string cuted_qual_string(std::string(qual_string, 0, start_pos) +
-                                std::string(qual_string, end_pos + 1));
-  read.setQuality(cuted_qual_string.c_str(), 0);
-  return read;
-}
-
-void RestoreFromCigar(const std::string& ref, const std::string& query,
-                      std::string& out_ref, std::string& out_query,
-                      const StripedSmithWaterman::Alignment& a) {
-
-  std::vector<char> aligned_ref, aligned_query;
-  int ref_pos = 0, query_pos = 0;
-  for (std::vector<uint32_t>::const_iterator it = a.cigar.begin();
-       it != a.cigar.end(); ++it) {
-    int num = (*it & 0xFFFFFFF0) >> 4;
-    int op_code = *it & 0x0000000F;
-
-    switch (op_code) {
-      case 0: { //match
-        for (int i = 0; i < num; ++i) {
-          aligned_ref.push_back(ref[a.ref_begin + ref_pos++]);
-          aligned_query.push_back(query[a.query_begin + query_pos++]);
-        }
-        break;
-      }
-      case 1: { //insert
-        for (int i = 0; i < num; ++i) {
-          aligned_ref.push_back('-');
-          aligned_query.push_back(query[a.query_begin + query_pos++]);
-        }
-        break;
-      }
-      case 2: { //del
-        for (int i = 0; i < num; ++i) {
-          aligned_ref.push_back(ref[a.ref_begin + ref_pos++]);
-          aligned_query.push_back('-');
-        }
-        break;
-     }
-      default:
-        break;
-    }
-
-  }
-
-  out_ref = std::string(aligned_ref.begin(), aligned_ref.end());
-  out_query = std::string(aligned_query.begin(), aligned_query.end());
-}
-
-  // end of namespace cclean_utils
-}
diff --git a/src/projects/cclean/utils.hpp b/src/projects/cclean/utils.hpp
deleted file mode 100644
index a71a200..0000000
--- a/src/projects/cclean/utils.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef UTILS_HPP
-#define UTILS_HPP
-
-#include <ssw/ssw_cpp.h> // Striped Smith-Waterman aligner
-#include <io/read.hpp>
-#include "additional.cpp"
-#include "running_modes.hpp"
-#include "adapter_index.hpp"
-
-namespace cclean_utils {
-
-std::string ReverseComplement(const std::string& read);
-
-std::unordered_map<std::string, std::string> ProcessArgs(int argc, char *argv[],
-                                                         bool *ok, std::string *error);
-
-double GetScoreWithQuality(const StripedSmithWaterman::Alignment &a,
-                                            const Quality &qual);
-
-inline bool is_alignment_good(const StripedSmithWaterman::Alignment& a,
-                              const std::string& sequence,
-                              const std::string& query,
-                              double aligned_part_fraction) {
-  //  Сheck that query adjoins or even overlaps the sequence edge
-  return (std::min(a.query_end - a.query_begin + 1, a.ref_end - a.ref_begin + 1)
-         / (double) query.size() > aligned_part_fraction) /*&&
-         (a.ref_begin == 0 || a.ref_end == sequence.size() - 1)*/;
-}
-
-// Cut read from start to end position of best aligment with adapter
-Read CutRead(const Read &r, int start_pos, int end_pos);
-void RestoreFromCigar(const std::string& ref, const std::string& query,
-                      std::string& out_ref, std::string& out_query,
-                      const StripedSmithWaterman::Alignment& a);
-
-inline double GetMismatches(const std::string &read, const std::string &adapter,
-                         const StripedSmithWaterman::Alignment &a)  {
-  std::string aligned_read;
-  std::string aligned_adapter;
-  RestoreFromCigar(read, adapter, aligned_read, aligned_adapter, a);
-  int size = (int)std::min(aligned_read.length(), aligned_adapter.length());
-  int mismatched_score = 0;
-  for (int i = 0; i < size; ++i)  {
-    if (aligned_read[i] != aligned_adapter[i])
-      ++mismatched_score;
-  }
-  return static_cast<double>(mismatched_score);
-}
-// end of namespace
-}
-#endif /* UTILS_HPP */
diff --git a/src/projects/cclean/valid_kmer_generator.hpp b/src/projects/cclean/valid_kmer_generator.hpp
deleted file mode 100644
index a03a9b3..0000000
--- a/src/projects/cclean/valid_kmer_generator.hpp
+++ /dev/null
@@ -1,198 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef HAMMER_VALIDKMERGENERATOR_HPP_
-#define HAMMER_VALIDKMERGENERATOR_HPP_
-
-#include "io/read.hpp"
-#include "sequence/seq.hpp"
-
-#include <string>
-#include <vector>
-
-#include <cstdint>
-#include <cmath>
-
-/**
- * This class is designed to iterate through valid k-mers in read.
- * @example
- *   ValidKMerGenerator<2> gen(read, 4);
- *   while (gen.HasMore()) {
- *     MyTrickyFunction(gen.kmer());
- *     gen.Next();
- *   }
- *   or
- *   for (ValidKMerGenerator<2> gen(read, 2); gen.HasMore; gen.Next() {
- *     MyTrickyFunction(gen.kmer(), gen.pos(), gen.correct_probability());
- *   }
- * @param kK k-mer length.
- */
-template<uint32_t kK>
-class ValidKMerGenerator {
- public:
-  /**
-   * @param read Read to generate k-mers from.
-   * @param bad_quality_threshold  This class virtually cuts
-   * nucleotides with quality lower the threshold from the ends of the
-   * read.
-   */
-  explicit ValidKMerGenerator(const Read &read,
-                              uint32_t bad_quality_threshold = 2) {
-    Reset(read.getSequenceString().data(),
-          read.getQualityString().data(),
-          read.getSequenceString().size(),
-          bad_quality_threshold);
-  }
-  /**
-   * @param seq sequence to generate k-mers from.
-   * @param qual quality string
-   * @param bad_quality_threshold  This class virtually cuts
-   * nucleotides with quality lower the threshold from the ends of the
-   * read.
-   */
-  explicit ValidKMerGenerator(const char *seq, const char *qual,
-                              size_t len,
-                              uint32_t bad_quality_threshold = 2) {
-    Reset(seq, qual, len, bad_quality_threshold);
-  }
-
-  ValidKMerGenerator()
-      : kmer_(), seq_(0), qual_(0),
-        pos_(-1), end_(-1), len_(0),
-        correct_probability_(1), bad_quality_threshold_(2),
-        has_more_(false), first(true) {}
-
-  void Reset(const char *seq, const char *qual,
-             size_t len,
-             uint32_t bad_quality_threshold = 2) {
-    kmer_ = Seq<kK>();
-    seq_ = seq;
-    qual_ = qual;
-    pos_ = -1;
-    end_ = -1;
-    len_ = len;
-    correct_probability_ = 1.0;
-    bad_quality_threshold_ = bad_quality_threshold;
-    has_more_ = true;
-    first = true;
-
-    TrimBadQuality();
-    Next();
-  }
-
-  /**
-   * @result true if Next() succeed while generating new k-mer, false
-   * otherwise.
-   */
-  bool HasMore() const {
-    return has_more_;
-  }
-  /**
-   * @result last k-mer generated by Next().
-   */
-  const Seq<kK>& kmer() const {
-    return kmer_;
-  }
-  /**
-   * @result last k-mer position in initial read.
-   */
-  int pos() const {
-    return pos_;
-  }
-  /**
-   * @result probability that last generated k-mer is correct.
-   */
-  double correct_probability() const {
-    return correct_probability_;
-  }
-  /**
-   * This functions reads next k-mer from the read and sets hasmore to
-   * if succeeded. You can access k-mer read with kmer().
-   */
-  void Next();
- private:
-  void TrimBadQuality();
-  double Prob(uint8_t qual) {
-    return 1 - (qual < 3 ? 0.75 : pow(10.0, -(int)qual / 10.0));
-  }
-  uint32_t GetQual(uint32_t pos) {
-    if (pos >= len_) {
-      return 2;
-    } else {
-      return qual_[pos];
-    }
-  }
-  Seq<kK> kmer_;
-  const char* seq_;
-  const char* qual_;
-  size_t pos_;
-  size_t end_;
-  size_t len_;
-  double correct_probability_;
-  uint32_t bad_quality_threshold_;
-  bool has_more_;
-  bool first;
-
-  // Disallow copy and assign
-  ValidKMerGenerator(const ValidKMerGenerator&) = delete;
-  void operator=(const ValidKMerGenerator&) = delete;
-};
-
-template<uint32_t kK>
-void ValidKMerGenerator<kK>::TrimBadQuality() {
-  pos_ = 0;
-  if (qual_)
-    for (; pos_ < len_; ++pos_) {
-      if (GetQual(pos_) >= bad_quality_threshold_)
-        break;
-    }
-  end_ = len_;
-  if (qual_)
-    for (; end_ > pos_; --end_) {
-      if (GetQual(end_ - 1) >= bad_quality_threshold_)
-        break;
-  }
-}
-
-template<uint32_t kK>
-void ValidKMerGenerator<kK>::Next() {
-  if (pos_ + kK > end_) {
-    has_more_ = false;
-  } else if (first || !is_nucl(seq_[pos_ + kK - 1])) {
-    // in this case we have to look for new k-mer
-    correct_probability_ = 1.0;
-    uint32_t start_hypothesis = pos_;
-    uint32_t i = pos_;
-    for (; i < len_; ++i) {
-      if (i == kK + start_hypothesis) {
-        break;
-      }
-      if (qual_)
-        correct_probability_ *= Prob(GetQual(i));
-      if (!is_nucl(seq_[i])) {
-        start_hypothesis = i + 1;
-        correct_probability_ = 1.0;
-      }
-    }
-    if (i == kK + start_hypothesis) {
-      kmer_ = Seq<kK>(seq_ + start_hypothesis, 0, kK, /* raw */ true);
-      pos_ = start_hypothesis + 1;
-    } else {
-      has_more_ = false;
-    }
-  } else {
-    // good case we can just shift our previous answer
-    kmer_ = kmer_ << seq_[pos_ + kK - 1];
-    if (qual_) {
-      correct_probability_ *= Prob(GetQual(pos_ + kK - 1));
-      correct_probability_ /= Prob(GetQual(pos_ - 1));
-    }
-    ++pos_;
-  }
-  first = false;
-}
-#endif  // HAMMER_VALIDKMERGENERATOR_HPP__
diff --git a/src/projects/corrector/config_struct.cpp b/src/projects/corrector/config_struct.cpp
index 594bae8..1464ff1 100644
--- a/src/projects/corrector/config_struct.cpp
+++ b/src/projects/corrector/config_struct.cpp
@@ -7,7 +7,7 @@
 
 #include "config_struct.hpp"
 
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -53,6 +53,7 @@ struct MappingTraits<corrector::corrector_config> {
         io.mapOptional("max_nthreads", cfg.max_nthreads, 1u);
         io.mapRequired("strategy", cfg.strat);
         io.mapOptional("bwa", cfg.bwa, std::string("."));
+        io.mapOptional("log_filename", cfg.log_filename, std::string("."));
     }
 };
 }}
diff --git a/src/projects/corrector/config_struct.hpp b/src/projects/corrector/config_struct.hpp
index e1fcf19..a831190 100644
--- a/src/projects/corrector/config_struct.hpp
+++ b/src/projects/corrector/config_struct.hpp
@@ -25,6 +25,7 @@ struct corrector_config {
     unsigned max_nthreads;
     Strategy strat;
     std::string bwa;
+    std::string log_filename;
 };
 
 void load(corrector::corrector_config& cfg, const std::string &filename);
diff --git a/src/projects/corrector/contig_processor.cpp b/src/projects/corrector/contig_processor.cpp
index 8564d17..ce5e9b5 100644
--- a/src/projects/corrector/contig_processor.cpp
+++ b/src/projects/corrector/contig_processor.cpp
@@ -13,7 +13,7 @@
 #include "io/reads/osequencestream.hpp"
 #include "io/reads/file_reader.hpp"
 #include "io/reads/single_read.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 
 #include <boost/algorithm/string.hpp>
 
@@ -34,7 +34,7 @@ void ContigProcessor::ReadContig() {
     contig_name_ = cur_read.name();
     contig_ = cur_read.GetSequenceString();
 
-    output_contig_file_ = path::append_path(path::parent_path(contig_file_), path::basename(contig_file_) + ".ref.fasta");
+    output_contig_file_ = fs::append_path(fs::parent_path(contig_file_), fs::basename(contig_file_) + ".ref.fasta");
     charts_.resize(contig_.length());
 }
 
@@ -74,8 +74,13 @@ size_t ContigProcessor::UpdateOneBase(size_t i, stringstream &ss, const unordere
         if (maxj != maxi) {
             DEBUG("Interesting positions differ with majority!");
             DEBUG("On position " << i << "  old: " << old << " majority: " << pos_to_var[maxi] << "interesting: " << pos_to_var[maxj]);
-            if (strat != Strategy::MajorityOnly)
-                maxi = maxj;
+            if (strat != Strategy::MajorityOnly) {
+                if (charts_[i].votes[maxj] > interesting_weight_cutoff)
+                    maxi = maxj;
+                else
+                    DEBUG(" alternative interesting position with weight " << charts_[i].votes[maxj] <<
+                          " fails weight cutoff");
+            }
         }
     }
     if (old != pos_to_var[maxi]) {
@@ -257,7 +262,19 @@ size_t ContigProcessor::ProcessMultipleSamFiles() {
         }
         sm.close();
     }
-
+    size_t total_coverage = 0;
+    for (const auto &pos: charts_)
+        total_coverage += pos.TotalMapped();
+    size_t average_coverage = total_coverage / contig_.length();
+    size_t different_cov = 0;
+    for (const auto &pos: charts_)
+        if ((pos.TotalMapped() < average_coverage / 2) || (pos.TotalMapped() > (average_coverage * 3) / 2))
+            different_cov++;
+    if (different_cov < contig_.length() * 3/ 10) {
+        interesting_weight_cutoff = int (average_coverage / 2);
+        DEBUG ("coverage is relatively uniform, average coverage is " << average_coverage
+               << " setting interesting positions heuristics to " << interesting_weight_cutoff);
+    }
     ipp_.FillInterestingPositions(charts_);
     for (const auto &sf : sam_files_) {
         MappedSamStream sm(sf.first);
@@ -285,7 +302,7 @@ size_t ContigProcessor::ProcessMultipleSamFiles() {
     }
     vector<string> contig_name_splitted;
     boost::split(contig_name_splitted, contig_name_, boost::is_any_of("_"));
-    io::osequencestream_simple oss(output_contig_file_);
+    io::OutputSequenceStream oss(output_contig_file_);
     for(size_t i = 0; i < contig_name_splitted.size(); i++) {
         if (contig_name_splitted[i] == "length" && i + 1 < contig_name_splitted.size()) {
             contig_name_splitted[i + 1] = std::to_string(int(s_new_contig.str().length()));
@@ -296,8 +313,7 @@ size_t ContigProcessor::ProcessMultipleSamFiles() {
     for(size_t i = 1; i < contig_name_splitted.size(); i++) {
         new_header += "_" + contig_name_splitted[i];
     }
-    oss.set_header(new_header);
-    oss << s_new_contig.str();
+    oss << io::SingleRead(new_header, s_new_contig.str());
 
     return total_changes;
 }
diff --git a/src/projects/corrector/contig_processor.hpp b/src/projects/corrector/contig_processor.hpp
index a35db3b..2e36548 100644
--- a/src/projects/corrector/contig_processor.hpp
+++ b/src/projects/corrector/contig_processor.hpp
@@ -15,7 +15,7 @@
 #pragma once
 #include "interesting_pos_processor.hpp"
 #include "positional_read.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include <io/sam/sam_reader.hpp>
 #include <io/sam/read.hpp>
@@ -41,12 +41,16 @@ class ContigProcessor {
     std::vector<int> error_counts_;
 
     const size_t kMaxErrorNum = 20;
-
+    int interesting_weight_cutoff;
+protected:
+    DECL_LOGGER("ContigProcessor")
 public:
     ContigProcessor(const sam_files_type &sam_files, const std::string &contig_file)
             : sam_files_(sam_files), contig_file_(contig_file) {
         ReadContig();
         ipp_.set_contig(contig_);
+//At least three reads to believe in inexact repeats heuristics.
+        interesting_weight_cutoff = 2;
     }
     size_t ProcessMultipleSamFiles();
 private:
diff --git a/src/projects/corrector/dataset_processor.cpp b/src/projects/corrector/dataset_processor.cpp
index 20f3e1e..4147f91 100644
--- a/src/projects/corrector/dataset_processor.cpp
+++ b/src/projects/corrector/dataset_processor.cpp
@@ -11,9 +11,9 @@
 #include "config_struct.hpp"
 
 #include "io/reads/file_reader.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 #include "io/reads/osequencestream.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include <boost/algorithm/string.hpp>
 
@@ -26,7 +26,7 @@ namespace corrector {
 std::string DatasetProcessor::GetLibDir(const size_t lib_count) {
     if (lib_dirs_.find(lib_count) != lib_dirs_.end())
         return lib_dirs_[lib_count];
-    std::string res = path::make_temp_dir(corr_cfg::get().work_dir, "lib" + to_string(lib_count));
+    std::string res = fs::make_temp_dir(corr_cfg::get().work_dir, "lib" + to_string(lib_count));
     lib_dirs_[lib_count] = res;
     return res;
 }
@@ -42,13 +42,13 @@ void DatasetProcessor::SplitGenome(const string &genome_splitted_dir) {
         if (all_contigs_.find(contig_name) != all_contigs_.end()) {
             WARN("Duplicated contig names! Multiple contigs with name" << contig_name);
         }
-        string full_path = path::append_path(genome_splitted_dir, contig_name + ".fasta");
-        string out_full_path = path::append_path(genome_splitted_dir, contig_name + ".ref.fasta");
-        string sam_filename = path::append_path(genome_splitted_dir, contig_name + ".pair.sam");
+        string full_path = fs::append_path(genome_splitted_dir, contig_name + ".fasta");
+        string out_full_path = fs::append_path(genome_splitted_dir, contig_name + ".ref.fasta");
+        string sam_filename = fs::append_path(genome_splitted_dir, contig_name + ".pair.sam");
         all_contigs_[contig_name] = {full_path, out_full_path, contig_seq.length(), sam_files_type(), sam_filename, cur_id};
         cur_id ++;
         buffered_reads_[contig_name].clear();
-        io::osequencestream oss(full_path);
+        io::OutputSequenceStream oss(full_path);
         oss << io::SingleRead(contig_name, contig_seq);
         DEBUG("full_path " + full_path)
     }
@@ -133,9 +133,9 @@ void DatasetProcessor::SplitPairedLibrary(const string &all_reads_filename, cons
 string DatasetProcessor::RunPairedBwa(const string &left, const string &right, const size_t lib)  {
     string cur_dir = GetLibDir(lib);
     int run_res = 0;
-    string tmp_sam_filename = path::append_path(cur_dir, "tmp.sam");
-    string bwa_string = path::screen_whitespaces(path::screen_whitespaces(corr_cfg::get().bwa));
-    string genome_screened = path::screen_whitespaces(genome_file_);
+    string tmp_sam_filename = fs::append_path(cur_dir, "tmp.sam");
+    string bwa_string = fs::screen_whitespaces(fs::screen_whitespaces(corr_cfg::get().bwa));
+    string genome_screened = fs::screen_whitespaces(genome_file_);
     string index_line = bwa_string + string(" index ") + "-a " + "is " + genome_screened ;
     INFO("Running bwa index ...: " << index_line);
     run_res = system(index_line.c_str());
@@ -144,8 +144,8 @@ string DatasetProcessor::RunPairedBwa(const string &left, const string &right, c
         return "";
     }
     string nthreads_str = to_string(nthreads_);
-    string last_line = bwa_string + string(" mem ") + " -v 1 -t " + nthreads_str + " "+ genome_screened + " " + path::screen_whitespaces(left) + " " + path::screen_whitespaces(right)  + "  > "
-            + path::screen_whitespaces(tmp_sam_filename) ;
+    string last_line = bwa_string + string(" mem ") + " -v 1 -t " + nthreads_str + " "+ genome_screened + " " + fs::screen_whitespaces(left) + " " + fs::screen_whitespaces(right)  + "  > "
+            + fs::screen_whitespaces(tmp_sam_filename) ;
     INFO("Running bwa mem ...:" << last_line);
     run_res = system(last_line.c_str());
     if (run_res != 0) {
@@ -158,9 +158,9 @@ string DatasetProcessor::RunPairedBwa(const string &left, const string &right, c
 string DatasetProcessor::RunSingleBwa(const string &single, const size_t lib)  {
     int run_res = 0;
     string cur_dir = GetLibDir(lib);
-    string tmp_sam_filename = path::append_path(cur_dir, "tmp.sam");
-    string bwa_string = path::screen_whitespaces(path::screen_whitespaces(corr_cfg::get().bwa));
-    string genome_screened = path::screen_whitespaces(genome_file_);
+    string tmp_sam_filename = fs::append_path(cur_dir, "tmp.sam");
+    string bwa_string = fs::screen_whitespaces(fs::screen_whitespaces(corr_cfg::get().bwa));
+    string genome_screened = fs::screen_whitespaces(genome_file_);
     string index_line = bwa_string + string(" index ") + "-a " + "is " + genome_screened ;
     INFO("Running bwa index ...: " << index_line);
     run_res = system(index_line.c_str());
@@ -169,7 +169,7 @@ string DatasetProcessor::RunSingleBwa(const string &single, const size_t lib)  {
         return "";
     }
     string nthreads_str = to_string(nthreads_);
-    string last_line = bwa_string + " mem "+ " -v 1 -t " + nthreads_str + " " + genome_screened + " "  + path::screen_whitespaces(single)  + "  > " + path::screen_whitespaces(tmp_sam_filename);
+    string last_line = bwa_string + " mem "+ " -v 1 -t " + nthreads_str + " " + genome_screened + " "  + fs::screen_whitespaces(single)  + "  > " + fs::screen_whitespaces(tmp_sam_filename);
     INFO("Running bwa mem ...:" << last_line);
     run_res = system(last_line.c_str());
     if (run_res != 0) {
@@ -183,7 +183,7 @@ void DatasetProcessor::PrepareContigDirs(const size_t lib_count) {
     string out_dir = GetLibDir(lib_count);
     for (auto &ac : all_contigs_) {
         auto contig_name = ac.first;
-        string out_name = path::append_path(out_dir, contig_name + ".sam");
+        string out_name = fs::append_path(out_dir, contig_name + ".sam");
         ac.second.sam_filenames.push_back(make_pair(out_name, unsplitted_sam_files_[lib_count].second));
         BufferedOutputRead("@SQ\tSN:" + contig_name + "\tLN:" + to_string(all_contigs_[contig_name].contig_length), contig_name, lib_count);
     }
diff --git a/src/projects/corrector/dataset_processor.hpp b/src/projects/corrector/dataset_processor.hpp
index 2edf657..44c9c7c 100644
--- a/src/projects/corrector/dataset_processor.hpp
+++ b/src/projects/corrector/dataset_processor.hpp
@@ -7,10 +7,9 @@
 
 #pragma once
 
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 
 #include "io/reads/file_reader.hpp"
-#include "utils/path_helper.hpp"
 
 #include "pipeline/library.hpp"
 
@@ -46,10 +45,14 @@ class DatasetProcessor {
     std::unordered_map<size_t, std::string> lib_dirs_;
     const size_t kBuffSize = 100000;
     const size_t kMinContigLengthForInfo = 20000;
+
+protected:
+    DECL_LOGGER("DatasetProcessor")
+
 public:
     DatasetProcessor(const std::string &genome_file, const std::string &work_dir, const std::string &output_dir, const size_t &thread_num)
             : genome_file_(genome_file), work_dir_(work_dir), nthreads_(thread_num) {
-        output_contig_file_ = path::append_path(output_dir, "corrected_contigs.fasta");
+        output_contig_file_ = fs::append_path(output_dir, "corrected_contigs.fasta");
         buffered_count_ = 0;
     }
 
diff --git a/src/projects/corrector/interesting_pos_processor.hpp b/src/projects/corrector/interesting_pos_processor.hpp
index 6e1cc62..16dcf43 100644
--- a/src/projects/corrector/interesting_pos_processor.hpp
+++ b/src/projects/corrector/interesting_pos_processor.hpp
@@ -11,6 +11,8 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
+#include "utils/logger/log_writers.hpp"
+
 
 namespace corrector {
 typedef std::vector<WeightedPositionalRead> WeightedReadStorage;
@@ -29,6 +31,9 @@ class InterestingPositionProcessor {
     static const size_t kMaxErrorCount = 6;
     const int error_weight[kMaxErrorCount] = { 100, 10, 8, 5, 2, 1 };
 
+private:
+    DECL_LOGGER("InterestingPositionProcessor")
+
 public:
     InterestingPositionProcessor() {}
     void set_contig(const std::string &ctg);
diff --git a/src/projects/corrector/main.cpp b/src/projects/corrector/main.cpp
index ff6afa8..affd938 100644
--- a/src/projects/corrector/main.cpp
+++ b/src/projects/corrector/main.cpp
@@ -19,22 +19,27 @@
 #include <string>
 
 using namespace std;
-void create_console_logger() {
+
+void create_console_logger(const string& dir) {
     using namespace logging;
 
-    logger *lg = create_logger("");
+    string log_props_file = corr_cfg::get().log_filename;
+
+    if (!fs::FileExists(log_props_file))
+        log_props_file = fs::append_path(dir, corr_cfg::get().log_filename);
+    cout << log_props_file;
+    logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
     lg->add_writer(std::make_shared<console_writer>());
+    //lg->add_writer(std::make_shared<mutex_writer>(std::make_shared<console_writer>()));
     attach_logger(lg);
 }
 
 int main(int argc, char** argv) {
-    perf_counter pc;
+    utils::perf_counter pc;
 
     srand(42);
     srandom(42);
     try {
-        create_console_logger();
-
         if (argc != 3) {
             WARN("Wrong argument number");
             return 1;
@@ -42,11 +47,13 @@ int main(int argc, char** argv) {
         string contig_name(argv[2]);
         string cfg_file(argv[1]);
         corr_cfg::create_instance(cfg_file);
+        string cfg_dir = fs::parent_path(cfg_file);
+        create_console_logger(cfg_dir);
         string work_dir = corr_cfg::get().work_dir;
-        if (!path::check_existence(corr_cfg::get().output_dir))
-            path::make_dir(corr_cfg::get().output_dir);
-        if (!path::check_existence(corr_cfg::get().work_dir))
-            path::make_dir(corr_cfg::get().work_dir);
+        if (!fs::check_existence(corr_cfg::get().output_dir))
+            fs::make_dir(corr_cfg::get().output_dir);
+        if (!fs::check_existence(corr_cfg::get().work_dir))
+            fs::make_dir(corr_cfg::get().work_dir);
 
         INFO("Starting MismatchCorrector, built from " SPADES_GIT_REFSPEC ", git revision " SPADES_GIT_SHA1);
 
diff --git a/src/projects/corrector/positional_read.hpp b/src/projects/corrector/positional_read.hpp
index 5f0ce0b..7f16e7a 100644
--- a/src/projects/corrector/positional_read.hpp
+++ b/src/projects/corrector/positional_read.hpp
@@ -42,7 +42,12 @@ struct position_description {
         return maxi;
     }
 
-
+    size_t TotalMapped() const {
+        size_t res = 0;
+        for (size_t i = 0; i < MAX_VARIANTS; i++)
+            res += votes[i];
+        return res;
+    }
     std::string str() const;
     void clear() ;
 };
diff --git a/src/projects/dipspades/consensus_contigs_constructor/consensus_contigs_constructor.hpp b/src/projects/dipspades/consensus_contigs_constructor/consensus_contigs_constructor.hpp
index 445641f..9a7762f 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/consensus_contigs_constructor.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/consensus_contigs_constructor.hpp
@@ -82,7 +82,7 @@ class ConsensusContigsConstructor {
             }
         }
         if(zero_paths != 0)
-            INFO(ToString(zero_paths) + " contigs with total length " << total_length_unmapped <<
+            INFO(std::to_string(zero_paths) + " contigs with total length " << total_length_unmapped <<
                     " have mapped path with zero length");
         return map_paths;
     }
@@ -213,13 +213,13 @@ class ConsensusContigsConstructor {
                 double_contigs->Add(contig);
         }
         WriteContigsToFile(double_contigs,
-                path::append_path(dsp_cfg::get().io.output_dir, "paired_consensus_contigs.fasta").c_str());
+                fs::append_path(dsp_cfg::get().io.output_dir, "paired_consensus_contigs.fasta").c_str());
         WriteContigsToFile(single_contigs,
-                path::append_path(dsp_cfg::get().io.output_dir, "unpaired_consensus_contigs.fasta").c_str());
+                fs::append_path(dsp_cfg::get().io.output_dir, "unpaired_consensus_contigs.fasta").c_str());
     }
 
     void WriteAlignedHaplocontigs(){
-        string fname = path::append_path(dsp_cfg::get().io.output_dir, "haplocontigs_alignment");
+        string fname = fs::append_path(dsp_cfg::get().io.output_dir, "haplocontigs_alignment");
         ofstream out(fname.c_str());
         INFO("Writing haplocontigs alignment to " << fname);
 
@@ -310,7 +310,7 @@ public:
 
         composite_storage_ = DefineOverlappingContigs(processed_storage);
 
-        string consensus_fname(path::append_path(dsp_cfg::get().io.output_dir, "consensus_contigs.fasta").c_str());
+        string consensus_fname(fs::append_path(dsp_cfg::get().io.output_dir, "consensus_contigs.fasta").c_str());
         WriteContigsToFile(composite_storage_, consensus_fname);
         WritePairedAndUnpairedContigs(composite_storage_);
 
diff --git a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/close_gaps_corrector.hpp b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/close_gaps_corrector.hpp
index 46b3080..1e68e71 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/close_gaps_corrector.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/close_gaps_corrector.hpp
@@ -75,7 +75,7 @@ class CloseGapsCorrector : public AbstractContigCorrector{
             if(cur_process_perc > processed_perc) {
                 while(processed_perc + step <= cur_process_perc)
                     processed_perc += step;
-                INFO(ToString(processed_perc * 100.0) << "% contigs were processed");
+                INFO(std::to_string(processed_perc * 100.0) << "% contigs were processed");
                 processed_perc += step;
             }
         }
@@ -91,16 +91,16 @@ public:
 
     virtual ContigStoragePtr Correct(ContigStoragePtr storage){
 
-        INFO(ToString(CountContigsWithGaps(storage)) << " contigs from " <<
-                ToString(storage->Size()) << " have gaps before correction");
+        INFO(std::to_string(CountContigsWithGaps(storage)) << " contigs from " <<
+                std::to_string(storage->Size()) << " have gaps before correction");
 
         ProcessContigs(storage);
 
-        INFO(ToString(num_corr) << " contigs from " <<
-                ToString(storage->Size()) << " with total length " << ToString(connected_length_) + " are correct");
-        INFO(ToString(storage->Size() - num_corr) << " contigs from "
-                << ToString(storage->Size()) << " with total length " <<
-                ToString(disconnected_length_) + " have gaps after correction");
+        INFO(std::to_string(num_corr) << " contigs from " <<
+                std::to_string(storage->Size()) << " with total length " << std::to_string(connected_length_) + " are correct");
+        INFO(std::to_string(storage->Size() - num_corr) << " contigs from "
+                << std::to_string(storage->Size()) << " with total length " <<
+                std::to_string(disconnected_length_) + " have gaps after correction");
 
         storage->DeleteByIDs(incorr_contigs);
         return storage;
diff --git a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/equal_path_deletion_correction.hpp b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/equal_path_deletion_correction.hpp
index fd68341..b4ab165 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/equal_path_deletion_correction.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/equal_path_deletion_correction.hpp
@@ -62,7 +62,7 @@ public:
         }
         RedundancyMapCondenser<size_t> condenser;
         res_.redundancy_map = condenser.Condense(res_.redundancy_map);
-        INFO(ToString(ids_for_deletion.size()) + " contigs from " << contigs->Size() << " are redundant");
+        INFO(std::to_string(ids_for_deletion.size()) + " contigs from " << contigs->Size() << " are redundant");
         contigs->DeleteByIDs(ids_for_deletion);
 
         INFO("Computing redundant equal contigs ends");
diff --git a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/incorrect_contig_remover.hpp b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/incorrect_contig_remover.hpp
index 70b0757..19bf61e 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/incorrect_contig_remover.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/incorrect_contig_remover.hpp
@@ -28,7 +28,7 @@ public:
                 contigs_for_deletion.insert((*storage)[i]->id());
             }
         }
-        INFO(ToString(contigs_for_deletion.size()) +  " contigs from " <<
+        INFO(std::to_string(contigs_for_deletion.size()) +  " contigs from " <<
                 storage->Size() << " were deleted");
         storage->DeleteByIDs(contigs_for_deletion);
         return storage;
diff --git a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/iterative_redundant_contigs_remover.hpp b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/iterative_redundant_contigs_remover.hpp
index bd12357..ce37b82 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/iterative_redundant_contigs_remover.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/iterative_redundant_contigs_remover.hpp
@@ -40,14 +40,14 @@ public:
             contigs = equal_path_remover.Correct(contigs);
             res.redundancy_map = equal_path_remover.Result().redundancy_map;
             index_.Clear();
-            INFO(ToString(contigs->Size()) + " contigs will be used further");
+            INFO(std::to_string(contigs->Size()) + " contigs will be used further");
         }
 
         INFO("Iterative loop corrector starts");
         {
             INFO("Only exact match iteration with parameters:");
-            INFO("\tMaximal loop length - " + ToString(max_loop_len_));
-            INFO("\tMinimal lcs length - " + ToString(min_lcs_length_));
+            INFO("\tMaximal loop length - " + std::to_string(max_loop_len_));
+            INFO("\tMinimal lcs length - " + std::to_string(min_lcs_length_));
             INFO("\tMaximal tail length - 0");
 
             index_.Initialize(contigs);
@@ -59,14 +59,14 @@ public:
             RedundancyMapMerger<size_t> map_merger;
             res.redundancy_map = map_merger.MergeTwoMaps(old_map, new_map);
             index_.Clear();
-            INFO(ToString(contigs->Size()) + " contigs will be used further");
+            INFO(std::to_string(contigs->Size()) + " contigs will be used further");
         }
 
         {
             INFO("Tails allowing match iteration with parameters:");
-            INFO("\tMaximal loop length - " + ToString(max_loop_len_));
-            INFO("\tMinimal lcs length - " + ToString(min_lcs_length_));
-            INFO("\tMaximal tail length - " + ToString(max_tail_length_));
+            INFO("\tMaximal loop length - " + std::to_string(max_loop_len_));
+            INFO("\tMinimal lcs length - " + std::to_string(min_lcs_length_));
+            INFO("\tMaximal tail length - " + std::to_string(max_tail_length_));
             index_.Initialize(contigs);
             LoopBulgeDeletionCorrector loop_corr(g_, k_value_,
                     max_loop_len_, max_tail_length_, min_lcs_length_, index_);
@@ -76,7 +76,7 @@ public:
             RedundancyMapMerger<size_t> map_merger;
             res.redundancy_map = map_merger.MergeTwoMaps(old_map, new_map);
             index_.Clear();
-            INFO(ToString(contigs->Size()) + " contigs will be used further");
+            INFO(std::to_string(contigs->Size()) + " contigs will be used further");
         }
         INFO("Iterative loop corrector ends");
         return contigs;
diff --git a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/overlap_searcher.hpp b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/overlap_searcher.hpp
index 6e8c49a..ee535ee 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/overlap_searcher.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/overlap_searcher.hpp
@@ -473,7 +473,7 @@ public:
             }
         }
 
-        TRACE("Overlapped contigs map. Size - " << ToString(overlap_map.Size()) << endl <<
+        TRACE("Overlapped contigs map. Size - " << std::to_string(overlap_map.Size()) << endl <<
                 overlap_map);
 
         FillOverlapGraphByMap(overlap_map, og);
@@ -481,16 +481,16 @@ public:
         string fname = dsp_cfg::get().io.output_dir + "default_overlap_graph.dot";
         OverlapgraphToDot(fname, og, contigs);
 
-        INFO("Overlap graph with " + ToString(og.Vertices().size()) + " vertices and " +
-                ToString(og.Edges().size()) + " edges constructed");
+        INFO("Overlap graph with " + std::to_string(og.Vertices().size()) + " vertices and " +
+                std::to_string(og.Edges().size()) + " edges constructed");
 
         auto og_vertices = og.Vertices();
         auto edges = og.Edges();
 
         SimplifyOverlapGraph(og, 10, 5);
 
-        INFO("Simplified overlap graph contains " + ToString(og.Vertices().size()) + " vertices and " +
-                ToString(og.Edges().size()) + " edges");
+        INFO("Simplified overlap graph contains " + std::to_string(og.Vertices().size()) + " vertices and " +
+                std::to_string(og.Edges().size()) + " edges");
 
         fname = dsp_cfg::get().io.output_dir + "simplified_overlap_graph.dot";
         OverlapgraphToDot(fname, og, contigs);
diff --git a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/redundant_contig_remover.hpp b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/redundant_contig_remover.hpp
index f8fac9d..14bc4b2 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/redundant_contig_remover.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/contig_correctors/redundant_contig_remover.hpp
@@ -856,7 +856,7 @@ public:
             if(cur_process_perc > processed_perc) {
                 while(processed_perc + processed_step<= cur_process_perc)
                     processed_perc += processed_step;
-                INFO(ToString(processed_perc * 100.0) << "% contigs were processed");
+                INFO(std::to_string(processed_perc * 100.0) << "% contigs were processed");
                 processed_perc += processed_step;
             }
         }
@@ -865,7 +865,7 @@ public:
         RedundancyMapCondenser<size_t> condenser;
         condenser.Condense(res.redundancy_map);
 
-        INFO(ToString(redundant_contigs.size()) + " contigs from " + ToString(contigs->Size()) + " are redundant");
+        INFO(std::to_string(redundant_contigs.size()) + " contigs from " + std::to_string(contigs->Size()) + " are redundant");
 
         contigs->DeleteByIDs(redundant_contigs);
 
diff --git a/src/projects/dipspades/consensus_contigs_constructor/overlap_graph.hpp b/src/projects/dipspades/consensus_contigs_constructor/overlap_graph.hpp
index 923f4ec..b9663f9 100644
--- a/src/projects/dipspades/consensus_contigs_constructor/overlap_graph.hpp
+++ b/src/projects/dipspades/consensus_contigs_constructor/overlap_graph.hpp
@@ -919,7 +919,7 @@ public:
         map<size_t, vector<size_t> > sh_paths = dijkstra.Paths();
 
         if(sh_paths.find(v2) == sh_paths.end()){
-//            INFO("Path from " + ToString(v1) + " to " + ToString(v2) + " isn't found");
+//            INFO("Path from " + ToString(v1) + " to " + ContainerToString(v2) + " isn't found");
             return paths;
         }
         else{
@@ -1104,7 +1104,7 @@ void SimplifyOverlapGraph(OverlapGraph &overlap_graph, size_t tc_num_iter, size_
         TransitiveReductionCorrector tr_corr;
         tr_res = tr_corr.Correct(overlap_graph);
 
-        INFO(ToString(tc_res) + " tips and " + ToString(tr_res) + " transitive edges were deleted in overlap graph");
+        INFO(std::to_string(tc_res) + " tips and " + std::to_string(tr_res) + " transitive edges were deleted in overlap graph");
     }
 
     INFO("Bulge remover starts");
@@ -1112,7 +1112,7 @@ void SimplifyOverlapGraph(OverlapGraph &overlap_graph, size_t tc_num_iter, size_
     size_t num_bulges  = 1;
     for(size_t i = 0; (i < br_num_iter && num_bulges > 0); i++){
         num_bulges = br_corr.Correct(overlap_graph);
-        INFO(ToString(num_bulges) + " bulges were deleted in overlap graph");
+        INFO(std::to_string(num_bulges) + " bulges were deleted in overlap graph");
     }
 }
 
diff --git a/src/projects/dipspades/dipspades.hpp b/src/projects/dipspades/dipspades.hpp
index 2163350..376d148 100644
--- a/src/projects/dipspades/dipspades.hpp
+++ b/src/projects/dipspades/dipspades.hpp
@@ -84,7 +84,7 @@ public:
     void load(debruijn_graph::conj_graph_pack& gp,
             const std::string &load_from,
             const char* prefix) {
-        std::string p = path::append_path(load_from, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(load_from, prefix == NULL ? id() : prefix);
         INFO("Loading current state from " << p);
         debruijn_graph::graphio::ScanAll(p, gp, false);
 
@@ -93,7 +93,7 @@ public:
     void save(const debruijn_graph::conj_graph_pack& gp,
             const std::string & save_to,
             const char* prefix) const {
-        std::string p = path::append_path(save_to, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(save_to, prefix == NULL ? id() : prefix);
         INFO("Saving current state to " << p);
         debruijn_graph::graphio::PrintAll(p, gp);
     }
@@ -116,7 +116,7 @@ public:
     void load(debruijn_graph::conj_graph_pack& gp,
             const std::string &load_from,
             const char* prefix) {
-        std::string p = path::append_path(load_from, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(load_from, prefix == NULL ? id() : prefix);
         INFO("Loading current state from " << p);
         debruijn_graph::graphio::ScanAll(p, gp, false);
         INFO("Loading histogram of bulge length");
@@ -127,7 +127,7 @@ public:
     void save(const debruijn_graph::conj_graph_pack& gp,
             const std::string & save_to,
             const char* prefix) const {
-        std::string p = path::append_path(save_to, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(save_to, prefix == NULL ? id() : prefix);
         INFO("Saving current state to " << p);
         debruijn_graph::graphio::PrintAll(p, gp);
         storage().bulge_len_histogram.SaveToFile(p + ".hist");
@@ -150,7 +150,7 @@ public:
     void load(debruijn_graph::conj_graph_pack& gp,
               const std::string &load_from,
               const char* prefix) {
-        std::string p = path::append_path(load_from, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(load_from, prefix == NULL ? id() : prefix);
         INFO("Loading current state from " << p);
         debruijn_graph::graphio::ScanAll(p, gp, false);
         INFO("Loading histogram of bulge length");
@@ -161,7 +161,7 @@ public:
     void save(const debruijn_graph::conj_graph_pack& gp,
               const std::string & save_to,
               const char* prefix) const {
-        std::string p = path::append_path(save_to, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(save_to, prefix == NULL ? id() : prefix);
         INFO("Saving current state to " << p);
         debruijn_graph::graphio::PrintAll(p, gp);
         storage().bulge_len_histogram.SaveToFile(p + ".hist");
@@ -188,7 +188,7 @@ public:
     void load(debruijn_graph::conj_graph_pack& gp,
             const std::string &load_from,
             const char* prefix) {
-        std::string p = path::append_path(load_from, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(load_from, prefix == NULL ? id() : prefix);
         INFO("Loading current state from " << p);
         debruijn_graph::graphio::ScanAll(p, gp, false);
     }
@@ -196,7 +196,7 @@ public:
     void save(const debruijn_graph::conj_graph_pack& gp,
             const std::string & save_to,
             const char* prefix) const {
-        std::string p = path::append_path(save_to, prefix == NULL ? id() : prefix);
+        std::string p = fs::append_path(save_to, prefix == NULL ? id() : prefix);
         INFO("Saving current state to " << p);
         debruijn_graph::graphio::PrintAll(p, gp);
         storage().bulge_len_histogram.SaveToFile(p + ".hist");
@@ -217,7 +217,7 @@ public:
             return;
         INFO("Diploid graph construction");
         conj_graph_pack double_graph_pack(graph_pack.k_value, dsp_cfg::get().io.tmp_dir,
-                                          dsp_cfg::get().io.num_libraries, "");
+                                          dsp_cfg::get().io.num_libraries, std::vector<std::string>());
         construct_graph_from_contigs(double_graph_pack);
         HaplotypeAssembler(graph_pack, double_graph_pack, storage().default_storage,
                            storage().composite_storage, storage().redundancy_map).Run();
@@ -240,7 +240,7 @@ void run_dipspades() {
             dsp_cfg::get().bp.K,
             dsp_cfg::get().io.tmp_dir,
             dsp_cfg::get().io.num_libraries,
-            "", // reference genome
+            std::vector<std::string>(), // reference genome
             1); // flanking range
 
     conj_gp.kmer_mapper.Attach();
diff --git a/src/projects/dipspades/dipspades_config.cpp b/src/projects/dipspades/dipspades_config.cpp
index deafb99..f8cccb4 100644
--- a/src/projects/dipspades/dipspades_config.cpp
+++ b/src/projects/dipspades/dipspades_config.cpp
@@ -8,7 +8,7 @@
 #include "dipspades_config.hpp"
 #include "pipeline/config_common.hpp"
 #include "utils/files_utils.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/filesystem/path_helper.hpp"
 
 using namespace dipspades;
 
@@ -58,7 +58,7 @@ void edit_io_params(bool developer_mode, dipspades_config::io_params &io){
         io.dataset_name = io.output_dir.substr(0, io.output_dir.length() - 1);
         io.output_dir = io.output_base + io.output_dir + "/";
         io.output_root = io.output_dir;
-        io.output_suffix = path::MakeLaunchTimeDirName() + "/";
+        io.output_suffix = fs::MakeLaunchTimeDirName() + "/";
         io.output_dir = io.output_root + io.output_suffix;
         io.output_saves = io.output_dir + "saves/";
 //        io.load_from = io.output_root + io.load_from;
diff --git a/src/projects/dipspades/haplotype_assembly/conservative_regions_searcher.hpp b/src/projects/dipspades/haplotype_assembly/conservative_regions_searcher.hpp
index f064ede..312224d 100644
--- a/src/projects/dipspades/haplotype_assembly/conservative_regions_searcher.hpp
+++ b/src/projects/dipspades/haplotype_assembly/conservative_regions_searcher.hpp
@@ -148,7 +148,7 @@ public:
         size_t cons_regions_length = ComputeSummaryLengthOfRegionInStorage(cons_reg_storage_.cons_regions_begin(),
                 cons_reg_storage_.cons_regions_end());
         if(cons_regions_length > 0){
-            string cons_regions_fname(path::append_path(dsp_cfg::get().io.output_dir,
+            string cons_regions_fname(fs::append_path(dsp_cfg::get().io.output_dir,
                     "conservative_regions.fasta").c_str());
             WriteConservativeRegionsStorageToFile(cons_regions_fname, cons_reg_storage_.cons_regions_begin(),
                 cons_reg_storage_.cons_regions_end());
@@ -159,7 +159,7 @@ public:
         size_t poss_cons_regions_length = ComputeSummaryLengthOfRegionInStorage(cons_reg_storage_.poss_cons_regions_begin(),
                 cons_reg_storage_.poss_cons_regions_end());
         if(poss_cons_regions_length > 0){
-            string poss_cons_regions_fname(path::append_path(dsp_cfg::get().io.output_dir,
+            string poss_cons_regions_fname(fs::append_path(dsp_cfg::get().io.output_dir,
                 "possibly_conservative_regions.fasta").c_str());
 //            INFO("Possibly conservative regions written in file " << poss_cons_regions_fname);
             WriteConservativeRegionsStorageToFile(poss_cons_regions_fname, cons_reg_storage_.poss_cons_regions_begin(),
diff --git a/src/projects/dipspades/haplotype_assembly/haplotype_assembler.hpp b/src/projects/dipspades/haplotype_assembly/haplotype_assembler.hpp
index 8b72abf..0b3b9ef 100644
--- a/src/projects/dipspades/haplotype_assembly/haplotype_assembler.hpp
+++ b/src/projects/dipspades/haplotype_assembly/haplotype_assembler.hpp
@@ -42,7 +42,7 @@ public:
         INFO("Consensus contigs number: " << composite_storage_->Size());
         separator.SeparateContigs();
         SignedLabels signed_labels = separator.GetSignedLabels();
-        string hapl_output(path::append_path(dsp_cfg::get().io.output_dir, "haplotype_assembly.out").c_str());
+        string hapl_output(fs::append_path(dsp_cfg::get().io.output_dir, "haplotype_assembly.out").c_str());
         signed_labels.WriteToFile(hapl_output, default_storage_);
         INFO("Result of haplotype assembly written in file " << hapl_output);
         INFO("Contigs separation ends");
diff --git a/src/projects/dipspades/main.cpp b/src/projects/dipspades/main.cpp
index 018283f..b26a539 100644
--- a/src/projects/dipspades/main.cpp
+++ b/src/projects/dipspades/main.cpp
@@ -11,8 +11,8 @@
 #include "utils/logger/log_writers.hpp"
 
 #include "utils/segfault_handler.hpp"
-#include "utils/memory_limit.hpp"
-#include "utils/copy_file.hpp"
+#include "utils/perf/memory_limit.hpp"
+#include "utils/filesystem/copy_file.hpp"
 
 #include "pipeline/graph_pack.hpp"
 #include "stages/construction.hpp"
@@ -35,11 +35,11 @@ void copy_configs(string cfg_filename, string to) {
   if (!make_dir(to)) {
     WARN("Could not create files use in /tmp directory");
   }
-  path::copy_files_by_ext(path::parent_path(cfg_filename), to, ".info", true);
+  fs::copy_files_by_ext(fs::parent_path(cfg_filename), to, ".info", true);
 }
 
 void load_config(string cfg_filename) {
-  path::CheckFileExistenceFATAL(cfg_filename);
+  fs::CheckFileExistenceFATAL(cfg_filename);
   dsp_cfg::create_instance(cfg_filename);
 //  string path_to_copy = path::append_path(dsp_cfg::get().io.output_dir, "configs");
 //  copy_configs(cfg_filename, path_to_copy);
@@ -50,23 +50,23 @@ void create_console_logger(string cfg_filename) {
 
   string log_props_file = dsp_cfg::get().io.log_filename;
 
-  if (!path::FileExists(log_props_file)){
-    log_props_file = path::append_path(path::parent_path(cfg_filename), dsp_cfg::get().io.log_filename);
+  if (!fs::FileExists(log_props_file)){
+    log_props_file = fs::append_path(fs::parent_path(cfg_filename), dsp_cfg::get().io.log_filename);
   }
 
-  logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+  logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
   lg->add_writer(std::make_shared<console_writer>());
   attach_logger(lg);
 }
 
 int main(int /*argc*/, char** argv) {
-  perf_counter pc;
+  utils::perf_counter pc;
   const size_t GB = 1 << 30;
 
   srand(42);
   srandom(42);
 
-  segfault_handler sh;
+  utils::segfault_handler sh;
 
   try {
     using namespace debruijn_graph;
@@ -74,7 +74,7 @@ int main(int /*argc*/, char** argv) {
     load_config          (cfg_filename);
     make_dirs();
     if(dsp_cfg::get().rp.developer_mode)
-        copy_configs(cfg_filename, path::append_path(dsp_cfg::get().io.output_dir, "configs"));
+        copy_configs(cfg_filename, fs::append_path(dsp_cfg::get().io.output_dir, "configs"));
     create_console_logger(cfg_filename);
 
     INFO("Loaded config from " << cfg_filename);
@@ -82,7 +82,7 @@ int main(int /*argc*/, char** argv) {
     VERIFY(dsp_cfg::get().bp.K >= runtime_k::MIN_K && dsp_cfg::get().bp.K < runtime_k::MAX_K);
     VERIFY(dsp_cfg::get().bp.K % 2 != 0);
 
-    limit_memory(dsp_cfg::get().bp.max_memory * GB);
+    utils::limit_memory(dsp_cfg::get().bp.max_memory * GB);
 
     INFO("Starting dipSPAdes, built from " SPADES_GIT_REFSPEC ", git revision " SPADES_GIT_SHA1);
     INFO("Assembling dataset (" << dsp_cfg::get().io.dataset_name << ") with K=" << dsp_cfg::get().bp.K);
diff --git a/src/projects/dipspades/polymorphic_bulge_remover/polymorphic_bulge_remover.hpp b/src/projects/dipspades/polymorphic_bulge_remover/polymorphic_bulge_remover.hpp
index ccdb009..15cd2ac 100644
--- a/src/projects/dipspades/polymorphic_bulge_remover/polymorphic_bulge_remover.hpp
+++ b/src/projects/dipspades/polymorphic_bulge_remover/polymorphic_bulge_remover.hpp
@@ -45,8 +45,8 @@ class PolymorphicBulgeRemover {
         size_t num_glued_bulges = 1;
         for(size_t num_iter = 1; num_glued_bulges > 0; num_iter++){
             num_glued_bulges = spath_br.Run();
-            CompressAllVertices(graph_pack_.g, false);
-            INFO(ToString(num_iter) + " iteration: " + ToString(num_glued_bulges) + " simple bulges were glued");
+            CompressAllVertices(graph_pack_.g, 1, false);
+            INFO(std::to_string(num_iter) + " iteration: " + std::to_string(num_glued_bulges) + " simple bulges were glued");
         }
         INFO("Simple polymorphic bulge remover ends");
     }
@@ -63,8 +63,8 @@ class PolymorphicBulgeRemover {
         size_t num_glued_bulges = 1;
         for(size_t i = 0; (i < num_iters) && (num_glued_bulges != 0); i++){
             num_glued_bulges = br.Run();
-            CompressAllVertices(graph_pack_.g, false);
-            INFO(ToString(i + 1) + " iteration: " + ToString(num_glued_bulges) + " complex bulges were glued");
+            CompressAllVertices(graph_pack_.g, 1, false);
+            INFO(std::to_string(i + 1) + " iteration: " + std::to_string(num_glued_bulges) + " complex bulges were glued");
         }
         INFO(bulge_remover_name + " ends");
     }
diff --git a/src/projects/dipspades/utils/bulge_utils.hpp b/src/projects/dipspades/utils/bulge_utils.hpp
index 0471891..2c407b8 100644
--- a/src/projects/dipspades/utils/bulge_utils.hpp
+++ b/src/projects/dipspades/utils/bulge_utils.hpp
@@ -105,7 +105,7 @@ class Bulge : public BaseBulge{
     string GetPathStr(vector<EdgeId> path) {
         string s1 = "";
         for(auto edge = path.begin(); edge != path.end(); edge++)
-            s1 = ToString(graph_.int_id(*edge)) + "-";
+            s1 = std::to_string(graph_.int_id(*edge)) + "-";
         return s1.substr(0, s1.size() - 1);
     }
 
diff --git a/src/projects/hammer/config_struct_hammer.cpp b/src/projects/hammer/config_struct_hammer.cpp
index ba056b9..3363ed4 100644
--- a/src/projects/hammer/config_struct_hammer.cpp
+++ b/src/projects/hammer/config_struct_hammer.cpp
@@ -14,7 +14,7 @@
 
 #include "config_struct_hammer.hpp"
 #include "pipeline/config_common.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include <boost/property_tree/ptree.hpp>
 #include <string>
diff --git a/src/projects/hammer/hamcluster.cpp b/src/projects/hammer/hamcluster.cpp
index 997ebd5..06017df 100644
--- a/src/projects/hammer/hamcluster.cpp
+++ b/src/projects/hammer/hamcluster.cpp
@@ -7,7 +7,7 @@
 
 #include "hamcluster.hpp"
 
-#include "common/adt/concurrent_dsu.hpp"
+#include "adt/concurrent_dsu.hpp"
 #include "io/kmers/mmapped_reader.hpp"
 #include "parallel_radix_sort.hpp"
 
@@ -65,7 +65,7 @@ std::pair<size_t, size_t> SubKMerSplitter::split(Op &&op) {
 }
 
 #if 1
-static bool canMerge(const ConcurrentDSU &uf, size_t x, size_t y) {
+static bool canMerge(const dsu::ConcurrentDSU &uf, size_t x, size_t y) {
   size_t szx = uf.set_size(x), szy = uf.set_size(y);
   const size_t hardthr = 2500;
 
@@ -88,7 +88,7 @@ static bool canMerge(const ConcurrentDSU &uf, size_t x, size_t y) {
 #endif
 
 
-static void processBlockQuadratic(ConcurrentDSU  &uf,
+static void processBlockQuadratic(dsu::ConcurrentDSU  &uf,
                                   const std::vector<size_t>::iterator &block,
                                   size_t block_size,
                                   const KMerData &data,
@@ -110,7 +110,7 @@ static void processBlockQuadratic(ConcurrentDSU  &uf,
 
 void KMerHamClusterer::cluster(const std::string &prefix,
                                const KMerData &data,
-                               ConcurrentDSU &uf) {
+                               dsu::ConcurrentDSU &uf) {
   // First pass - split & sort the k-mers
   std::string fname = prefix + ".first", bfname = fname + ".blocks", kfname = fname + ".kmers";
   std::ofstream bfs(bfname, std::ios::out | std::ios::binary);
@@ -209,7 +209,7 @@ enum {
   FULLY_LOCKED = 3
 };
 
-static bool canMerge2(const ConcurrentDSU &uf, size_t kidx, size_t cidx) {
+static bool canMerge2(const dsu::ConcurrentDSU &uf, size_t kidx, size_t cidx) {
     // If either of indices is fully locked - bail out
     uint64_t kaux = uf.root_aux(kidx), caux = uf.root_aux(cidx);
     if (kaux == FULLY_LOCKED || caux == FULLY_LOCKED)
@@ -224,7 +224,7 @@ static bool canMerge2(const ConcurrentDSU &uf, size_t kidx, size_t cidx) {
     return true;
 }
 
-static void ClusterChunk(size_t start_idx, size_t end_idx, const KMerData &data, ConcurrentDSU &uf) {
+static void ClusterChunk(size_t start_idx, size_t end_idx, const KMerData &data, dsu::ConcurrentDSU &uf) {
     unsigned nthreads = cfg::get().general_max_nthreads;
 
     // INFO("Cluster: " << start_idx << ":" << end_idx);
@@ -274,7 +274,7 @@ static void ClusterChunk(size_t start_idx, size_t end_idx, const KMerData &data,
     }
 }
 
-void TauOneKMerHamClusterer::cluster(const std::string &, const KMerData &data, ConcurrentDSU &uf) {
+void TauOneKMerHamClusterer::cluster(const std::string &, const KMerData &data, dsu::ConcurrentDSU &uf) {
     size_t start_idx = 0;
     while (start_idx < data.size()) {
         size_t end_idx = start_idx + 64*1024;
diff --git a/src/projects/hammer/hamcluster.hpp b/src/projects/hammer/hamcluster.hpp
index 0db51f6..2b83337 100644
--- a/src/projects/hammer/hamcluster.hpp
+++ b/src/projects/hammer/hamcluster.hpp
@@ -17,8 +17,8 @@
 
 #include <iostream>
 #include <vector>
+#include <common/adt/concurrent_dsu.hpp>
 
-class ConcurrentDSU;
 
 typedef Seq<(hammer::K + 1) / 2, uint32_t> SubKMer;
 
@@ -144,7 +144,7 @@ class KMerHamClusterer {
   KMerHamClusterer(unsigned tau)
       : tau_(tau) {}
 
-  void cluster(const std::string &prefix, const KMerData &data, ConcurrentDSU &uf);
+  void cluster(const std::string &prefix, const KMerData &data, dsu::ConcurrentDSU &uf);
  private:
   DECL_LOGGER("Hamming Clustering");
 };
@@ -152,7 +152,7 @@ class KMerHamClusterer {
 class TauOneKMerHamClusterer {
  public:
   TauOneKMerHamClusterer() {} 
-  void cluster(const std::string &prefix, const KMerData &data, ConcurrentDSU &uf);
+  void cluster(const std::string &prefix, const KMerData &data, dsu::ConcurrentDSU &uf);
  private:
   DECL_LOGGER("tau = 1 Hamming Clustering");
 };
diff --git a/src/projects/hammer/hammer_tools.cpp b/src/projects/hammer/hammer_tools.cpp
index 3a14777..e178dd3 100644
--- a/src/projects/hammer/hammer_tools.cpp
+++ b/src/projects/hammer/hammer_tools.cpp
@@ -52,7 +52,7 @@ string getReadsFilename(const std::string & dirprefix, const std::string &fname,
   ostringstream tmp;
   tmp.str("");
 
-  tmp << dirprefix.data() << "/" << path::basename(fname) << '.' << std::setfill('0') << std::setw(2) << iter_no << "." << suffix.data();
+  tmp << dirprefix.data() << "/" << fs::basename(fname) << '.' << std::setfill('0') << std::setw(2) << iter_no << "." << suffix.data();
   return tmp.str();
 }
 
diff --git a/src/projects/hammer/kmer_cluster.cpp b/src/projects/hammer/kmer_cluster.cpp
index d6944b2..d05d26c 100644
--- a/src/projects/hammer/kmer_cluster.cpp
+++ b/src/projects/hammer/kmer_cluster.cpp
@@ -6,7 +6,7 @@
 //***************************************************************************
 
 #include "io/reads/ireadstream.hpp"
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include "hammer_tools.hpp"
 #include "hamcluster.hpp"
@@ -424,16 +424,16 @@ size_t KMerClustering::SubClusterSingle(const std::vector<size_t> & block, std::
       }
 
       if (centersInCluster[k] == -1u) {
-        unsigned new_idx = 0;
-        #pragma omp critical
-        {
-          KMer newkmer(bestCenters[k].center_);
-
-          KMerStat kms(0 /* cnt */, 1.0 /* total quality */, NULL /*quality */);
-          kms.mark_good();
-          new_idx = (unsigned)data_.push_back(newkmer, kms);
-          if (data_.kmer(data_.seq_idx(newkmer)) != newkmer)
+        KMer newkmer(bestCenters[k].center_);
+        size_t new_idx = data_.checking_seq_idx(newkmer);
+        if (new_idx == -1ULL) {
+          #pragma omp critical
+          {
+            KMerStat kms(0 /* cnt */, 1.0 /* total quality */, NULL /*quality */);
+            kms.mark_good();
+            new_idx = data_.push_back(newkmer, kms);
             newkmers += 1;
+          }
         }
         v.insert(v.begin(), new_idx);
       }
diff --git a/src/projects/hammer/kmer_data.cpp b/src/projects/hammer/kmer_data.cpp
index dd730bd..a773b69 100644
--- a/src/projects/hammer/kmer_data.cpp
+++ b/src/projects/hammer/kmer_data.cpp
@@ -12,11 +12,12 @@
 #include "io/reads/ireadstream.hpp"
 #include "config_struct_hammer.hpp"
 
-#include "utils/mph_index/kmer_index_builder.hpp"
+#include "utils/kmer_mph/kmer_index_builder.hpp"
+#include "utils/logger/logger.hpp"
 
 #include "io/kmers/kmer_iterator.hpp"
-#include "common/adt/bf.hpp"
-#include "common/adt/hll.hpp"
+#include "adt/bf.hpp"
+#include "adt/hll.hpp"
 
 using namespace hammer;
 
@@ -35,7 +36,7 @@ struct KMerComparator {
 };
 
 
-class HammerFilteringKMerSplitter : public KMerSortingSplitter<hammer::KMer> {
+class HammerFilteringKMerSplitter : public utils::KMerSortingSplitter<hammer::KMer> {
  public:
   typedef std::function<bool(const KMer&)> KMerFilter;
 
@@ -44,7 +45,7 @@ class HammerFilteringKMerSplitter : public KMerSortingSplitter<hammer::KMer> {
       : KMerSortingSplitter<hammer::KMer>(work_dir, hammer::K),
       filter_(std::move(filter)) {}
 
-  path::files_t Split(size_t num_files) override;
+  fs::files_t Split(size_t num_files, unsigned nthreads) override;
 
  private:
   KMerFilter filter_;
@@ -84,13 +85,10 @@ class BufferFiller {
   }
 };
 
-path::files_t HammerFilteringKMerSplitter::Split(size_t num_files) {
-  unsigned nthreads = std::min(cfg::get().count_merge_nthreads, cfg::get().general_max_nthreads);
+fs::files_t HammerFilteringKMerSplitter::Split(size_t num_files, unsigned nthreads) {
   size_t reads_buffer_size = cfg::get().count_split_buffer;
 
-  INFO("Splitting kmer instances into " << num_files << " buckets. This might take a while.");
-
-  path::files_t out = PrepareBuffers(num_files, nthreads, reads_buffer_size);
+  fs::files_t out = PrepareBuffers(num_files, nthreads, reads_buffer_size);
 
   size_t n = 15, processed = 0;
   BufferFiller filler(*this);
@@ -335,22 +333,22 @@ void KMerDataCounter::BuildKMerIndex(KMerData &data) {
       // FIXME: Reduce code duplication
       HammerFilteringKMerSplitter splitter(workdir,
                                            [&] (const KMer &k) { return mcounter.count(k) > 1; });
-      KMerDiskCounter<hammer::KMer> counter(workdir, splitter);
+      utils::KMerDiskCounter<hammer::KMer> counter(workdir, splitter);
 
-      kmers = KMerIndexBuilder<HammerKMerIndex>(workdir, num_files_, omp_get_max_threads()).BuildIndex(data.index_, counter, /* save final */ true);
+      kmers = utils::KMerIndexBuilder<HammerKMerIndex>(workdir, num_files_, omp_get_max_threads()).BuildIndex(data.index_, counter, /* save final */ true);
       final_kmers = counter.GetFinalKMersFname();
   } else {
       HammerFilteringKMerSplitter splitter(workdir);
-      KMerDiskCounter<hammer::KMer> counter(workdir, splitter);
+      utils::KMerDiskCounter<hammer::KMer> counter(workdir, splitter);
 
-      kmers = KMerIndexBuilder<HammerKMerIndex>(workdir, num_files_, omp_get_max_threads()).BuildIndex(data.index_, counter, /* save final */ true);
+      kmers = utils::KMerIndexBuilder<HammerKMerIndex>(workdir, num_files_, omp_get_max_threads()).BuildIndex(data.index_, counter, /* save final */ true);
       final_kmers = counter.GetFinalKMersFname();
   }
 
 
   // Check, whether we'll ever have enough memory for running BH and bail out earlier
   double needed = 1.25 * (double)kmers * (sizeof(KMerStat) + sizeof(hammer::KMer));
-  if (needed > (double) get_memory_limit())
+  if (needed > (double) utils::get_memory_limit())
       FATAL_ERROR("The reads contain too many k-mers to fit into available memory. You need approx. "
                   << needed / 1024.0 / 1024.0 / 1024.0
                   << "GB of free RAM to assemble your dataset");
diff --git a/src/projects/hammer/kmer_data.hpp b/src/projects/hammer/kmer_data.hpp
index ece0e53..ef80a92 100644
--- a/src/projects/hammer/kmer_data.hpp
+++ b/src/projects/hammer/kmer_data.hpp
@@ -9,16 +9,19 @@
 #define __HAMMER_KMER_DATA_HPP__
 
 #include "kmer_stat.hpp"
-#include "common/adt/array_vector.hpp"
-#include "utils/mph_index/kmer_index.hpp"
+#include "adt/array_vector.hpp"
+
+#include "utils/kmer_mph/kmer_index.hpp"
+#include "utils/logger/logger.hpp"
+
 #include <vector>
 
-typedef KMerIndex<kmer_index_traits<hammer::KMer> > HammerKMerIndex;
+typedef utils::KMerIndex<utils::kmer_index_traits<hammer::KMer> > HammerKMerIndex;
 
 class KMerData {
   typedef std::vector<KMerStat> KMerDataStorageType;
   typedef std::vector<hammer::KMer> KMerStorageType;
-  typedef kmer_index_traits<hammer::KMer> traits;
+  typedef utils::kmer_index_traits<hammer::KMer> traits;
 
  public:
   KMerData()
@@ -114,7 +117,7 @@ class KMerData {
   }
 
  private:
-  array_vector<hammer::KMer::DataType> kmers_;
+  adt::array_vector<hammer::KMer::DataType> kmers_;
 
   KMerDataStorageType data_;
   KMerStorageType kmer_push_back_buffer_;
diff --git a/src/projects/hammer/main.cpp b/src/projects/hammer/main.cpp
index 1bb0dca..9817dfd 100644
--- a/src/projects/hammer/main.cpp
+++ b/src/projects/hammer/main.cpp
@@ -20,12 +20,12 @@
 #include "kmer_data.hpp"
 #include "expander.hpp"
 
-#include "common/adt/concurrent_dsu.hpp"
+#include "adt/concurrent_dsu.hpp"
 #include "utils/segfault_handler.hpp"
 #include "io/reads/read_processor.hpp"
 #include "io/reads/ireadstream.hpp"
 
-#include "utils/memory_limit.hpp"
+#include "utils/perf/memory_limit.hpp"
 
 #include "utils/logger/logger.hpp"
 #include "utils/logger/log_writers.hpp"
@@ -70,7 +70,7 @@ void create_console_logger() {
 }
 
 int main(int argc, char * argv[]) {
-  segfault_handler sh;
+  utils::segfault_handler sh;
 
   srand(42);
   srandom(42);
@@ -86,7 +86,7 @@ int main(int argc, char * argv[]) {
 
     // hard memory limit
     const size_t GB = 1 << 30;
-    limit_memory(cfg::get().general_hard_memory_limit * GB);
+    utils::limit_memory(cfg::get().general_hard_memory_limit * GB);
 
     // determine quality offset if not specified
     if (!cfg::get().input_qvoffset_opt) {
@@ -150,7 +150,7 @@ int main(int argc, char * argv[]) {
       // Cluster the Hamming graph
       std::vector<std::vector<size_t> > classes;
       if (cfg::get().hamming_do || do_everything) {
-        ConcurrentDSU uf(Globals::kmer_data->size());
+        dsu::ConcurrentDSU uf(Globals::kmer_data->size());
         std::string ham_prefix = hammer::getFilename(cfg::get().input_working_dir, Globals::iteration_no, "kmers.hamcls");
         INFO("Clustering Hamming graph.");
         if (cfg::get().general_tau > 1) {
diff --git a/src/projects/hammer/parallel_radix_sort.hpp b/src/projects/hammer/parallel_radix_sort.hpp
index 2765afb..7936fac 100644
--- a/src/projects/hammer/parallel_radix_sort.hpp
+++ b/src/projects/hammer/parallel_radix_sort.hpp
@@ -36,7 +36,7 @@
 #ifndef PARALLEL_RADIX_SORT_H_
 #define PARALLEL_RADIX_SORT_H_
 
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include <stdint.h>
 #include <cstring>
diff --git a/src/projects/ionhammer/CMakeLists.txt b/src/projects/ionhammer/CMakeLists.txt
index 66a26ed..0c69016 100644
--- a/src/projects/ionhammer/CMakeLists.txt
+++ b/src/projects/ionhammer/CMakeLists.txt
@@ -11,16 +11,30 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 
 add_executable(ionhammer
                kmer_data.cpp
-               hamcluster.cpp
                subcluster.cpp
                err_helper_table.cpp
                config_struct.cpp
-               expander.cpp
+               kmer_helpers.cpp
                seqeval/BaseHypothesisEvaluator.cpp
                seqeval/TreephaserLite.cpp
-               main.cpp)
+               main.cpp
+               reference.cpp
+               quality_metrics.cpp
+               quality_thresholds_estimator.cpp
+               hamcluster_1.cpp
+               gamma_poisson_model.cpp
+               normal_quality_model.cpp)
+
+add_executable(kmer_evaluator
+               kmer_data.cpp
+               kmer_evaluator.cpp
+               kmer_helpers.cpp
+               err_helper_table.cpp)
+
+
 
 target_link_libraries(ionhammer input utils pipeline mph_index BamTools ${COMMON_LIBRARIES})
+target_link_libraries(kmer_evaluator input  utils mph_index  BamTools ${COMMON_LIBRARIES})
 
 if (SPADES_STATIC_BUILD)
   set_target_properties(ionhammer PROPERTIES LINK_SEARCH_END_STATIC 1)
@@ -28,6 +42,8 @@ endif()
 
 install(TARGETS ionhammer
         RUNTIME DESTINATION bin)
+#install(TARGETS kmer_evaluator
+#        RUNTIME DESTINATION bin)
 install(DIRECTORY "${SPADES_CFG_DIR}/ionhammer"
         DESTINATION share/spades/configs
                 FILES_MATCHING PATTERN "*.cfg")
diff --git a/src/projects/ionhammer/HSeq.hpp b/src/projects/ionhammer/HSeq.hpp
index 567f84f..22e2858 100644
--- a/src/projects/ionhammer/HSeq.hpp
+++ b/src/projects/ionhammer/HSeq.hpp
@@ -8,13 +8,13 @@
 #ifndef __HAMMER_HSEQ_HPP__
 #define __HAMMER_HSEQ_HPP__
 
-#include "sequence/nucl.hpp"
 #include <city/city.h>
+#include "sequence/nucl.hpp"
 
 #include <array>
+#include <deque>
 #include <string>
 #include <vector>
-#include <deque>
 
 #include <cstdint>
 
@@ -23,56 +23,50 @@ namespace hammer {
 union HomopolymerRun {
   uint8_t raw;
   struct {
-    uint8_t len  : 6;
+    uint8_t len : 6;
     uint8_t nucl : 2;
   };
 
-  HomopolymerRun()
-      : raw(0) {}
-  HomopolymerRun(uint8_t nucl, uint8_t len)
+  HomopolymerRun() : raw(0) {}
+  HomopolymerRun(uint8_t nucl, uint8_t len = 1)
       : len(len & 63), nucl(nucl & 3) {}
 
-  bool operator==(const HomopolymerRun &that) const {
-    return raw == that.raw;
-  }
+  bool operator==(const HomopolymerRun &that) const { return raw == that.raw; }
 
-  bool operator!=(const HomopolymerRun &that) const {
-    return raw != that.raw;
-  }
+  bool operator!=(const HomopolymerRun &that) const { return raw != that.raw; }
 
-  bool operator<(const HomopolymerRun &that) const {
-    return raw < that.raw;
-  }
+  bool operator<(const HomopolymerRun &that) const { return raw < that.raw; }
 
-  std::string str() const {
-    return std::string(len, ::nucl(nucl));
-  }
+  inline char Nucl() const { return nucl; }
+
+  inline char Len() const { return len; }
+
+  std::string str() const { return std::string(len, ::nucl(nucl)); }
 };
 
 namespace iontorrent {
-  // Container shall have push_back method
-  template <typename Container>
-  void toHomopolymerRuns(const std::string &seq, Container& runs) {
-    if (seq.empty())
-      return;
-
-    char nucl = seq[0];
-    uint8_t len = 1;
-    for (size_t i = 1; i < seq.size(); ++i) {
-      if (seq[i] != nucl) {
-        runs.push_back(HomopolymerRun(dignucl(nucl), len));
-        len = 1;
-        nucl = seq[i];
-      } else {
-        ++len;
-      }
-    }
-    if (len > 0) {
+// Container shall have push_back method
+template <typename Container>
+void toHomopolymerRuns(const std::string &seq, Container &runs) {
+  if (seq.empty()) return;
+
+  char nucl = seq[0];
+  uint8_t len = 1;
+  for (size_t i = 1; i < seq.size(); ++i) {
+    if (seq[i] != nucl) {
       runs.push_back(HomopolymerRun(dignucl(nucl), len));
+      len = 1;
+      nucl = seq[i];
+    } else {
+      ++len;
     }
   }
+  if (len > 0) {
+    runs.push_back(HomopolymerRun(dignucl(nucl), len));
+  }
+}
 
-};
+};  // namespace iontorrent
 
 template <size_t N = 16>
 class HSeq {
@@ -82,14 +76,12 @@ class HSeq {
  private:
   StorageType data_;
 
-  const static size_t PrimeNum = 239;
-
  public:
   HSeq() {}
 
-  HSeq(typename StorageType::const_iterator Start,
-       typename StorageType::const_iterator End) {
-    std::copy(Start, End, data_.begin());
+  template <class Iterator>
+  HSeq(Iterator start, Iterator end) {
+    std::copy(start, end, data_.begin());
   }
 
   typedef HomopolymerRun DataType;
@@ -101,13 +93,9 @@ class HSeq {
     return N * sizeof(HomopolymerRun);
   }
 
-  typename StorageType::const_iterator begin() const {
-    return data_.begin();
-  }
+  typename StorageType::const_iterator begin() const { return data_.begin(); }
 
-  typename StorageType::const_iterator end() const {
-    return data_.end();
-  }
+  typename StorageType::const_iterator end() const { return data_.end(); }
 
   typename StorageType::const_reverse_iterator rbegin() const {
     return data_.rbegin();
@@ -117,21 +105,13 @@ class HSeq {
     return data_.rend();
   }
 
-  const HomopolymerRun *data() const {
-    return data_.data();
-  }
+  const HomopolymerRun *data() const { return data_.data(); }
 
-  size_t data_size() const {
-    return DataSize;
-  }
+  size_t data_size() const { return DataSize; }
 
-  HomopolymerRun &operator[](size_t idx) {
-    return data_[idx];
-  }
+  HomopolymerRun &operator[](size_t idx) { return data_[idx]; }
 
-  const HomopolymerRun &operator[](size_t idx) const {
-    return data_[idx];
-  }
+  const HomopolymerRun &operator[](size_t idx) const { return data_[idx]; }
 
   HSeq<N> operator!() const {
     HSeq<N> res(*this);
@@ -144,56 +124,97 @@ class HSeq {
       res[N - i - 1] = front;
     }
 
-    if (N & 1)
-      res[N/2].nucl = complement(res[N/2].nucl) & 3;
+    if (N & 1) res[N / 2].nucl = complement(res[N / 2].nucl) & 3;
 
     return res;
   }
 
   HSeq<N> operator<<(char nucl) const {
-    if (is_nucl(nucl))
-      nucl = dignucl(nucl);
+    if (is_nucl(nucl)) nucl = dignucl(nucl);
 
     HSeq<N> res(*this);
     // Easy case - just add to run
-    HomopolymerRun &last = res[N-1];
+    HomopolymerRun &last = res[N - 1];
     if (last.nucl == nucl) {
       last.len += 1;
       return res;
     }
 
     // Hard case - have to shift the stuff
-    for (size_t i = 0; i < N - 1; ++i)
-      res[i] = res[i + 1];
+    for (size_t i = 0; i < N - 1; ++i) res[i] = res[i + 1];
     res[N - 1].nucl = nucl;
     res[N - 1].len = 1;
 
     return res;
   }
 
-  HSeq<N>& operator<<=(char nucl) {
-    if (is_nucl(nucl))
-      nucl = dignucl(nucl);
+  HSeq<N> operator<<(HomopolymerRun run) const {
+    HSeq<N> res(*this);
+    // Easy case - just add to run
+    HomopolymerRun &last = res[N - 1];
+    if (last.nucl == run.nucl) {
+      last.len += run.len;
+      return res;
+    }
+
+    // Hard case - have to shift the stuff
+    for (size_t i = 0; i < N - 1; ++i) res[i] = res[i + 1];
+    res[N - 1] = run;
+
+    return res;
+  }
+
+  HSeq<N> &operator<<=(char nucl) {
+    if (is_nucl(nucl)) nucl = dignucl(nucl);
 
     // Easy case - just add to run
-    HomopolymerRun &last = data_[N-1];
+    HomopolymerRun &last = data_[N - 1];
     if (last.nucl == nucl) {
       last.len = (last.len + 1) & 63;
       return *this;
     }
 
     // Hard case - have to shift the stuff
-    for (size_t i = 0; i < N - 1; ++i)
-      data_[i] = data_[i + 1];
+    for (size_t i = 0; i < N - 1; ++i) data_[i] = data_[i + 1];
     data_[N - 1].nucl = nucl & 3;
     data_[N - 1].len = 1;
 
     return *this;
   }
 
+  HSeq<N> &operator<<=(HomopolymerRun run) {
+    // Easy case - just add to run
+    HomopolymerRun &last = data_[N - 1];
+    if (last.nucl == run.nucl) {
+      last.len = (last.len + run.len) & 63;
+      return *this;
+    }
+
+    // Hard case - have to shift the stuff
+    for (size_t i = 0; i < N - 1; ++i) data_[i] = data_[i + 1];
+    data_[N - 1] = run;
+    return *this;
+  }
+
+  HSeq<N> operator>>(HomopolymerRun run) const {
+    HSeq<N> res(*this);
+    // Easy case - just add to run
+    HomopolymerRun &first = res[0];
+    if (first.nucl == run.nucl) {
+      first.len += run.len;
+      return res;
+    }
+
+    // Hard case - have to shift the stuff
+    for (size_t i = 0; i < N - 1; ++i) res[i + 1] = res[i];
+    res[0].nucl = run.nucl;
+    res[0].len = run.len;
+
+    return res;
+  }
+
   HSeq<N> operator>>(char nucl) const {
-    if (is_nucl(nucl))
-      nucl = dignucl(nucl);
+    if (is_nucl(nucl)) nucl = dignucl(nucl);
 
     HSeq<N> res(*this);
     // Easy case - just add to run
@@ -204,39 +225,40 @@ class HSeq {
     }
 
     // Hard case - have to shift the stuff
-    for (size_t i = 0; i < N - 1; ++i)
-      res[i + 1] = res[i];
+    for (size_t i = 0; i < N - 1; ++i) res[i + 1] = res[i];
     res[0].nucl = nucl;
     res[0].len = 1;
 
     return res;
   }
 
-  bool operator==(const HSeq<N> &that) const {
-    return (data_ == that.data_);
-  }
-  bool operator!=(const HSeq<N> &that) const {
-    return (data_ != that.data_);
-  }
+  bool operator==(const HSeq<N> &that) const { return (data_ == that.data_); }
+  bool operator!=(const HSeq<N> &that) const { return (data_ != that.data_); }
 
   size_t size() const {
     size_t res = 0;
-    for (size_t i = 0; i < N; ++i)
-      res += data_[i].len;
+    for (size_t i = 0; i < N; ++i) res += data_[i].len;
+
+    return res;
+  }
+  size_t max_run_length() const {
+    size_t res = 0;
+    for (size_t i = 0; i < N; ++i) res = std::max((size_t)(data_[i].len), res);
 
     return res;
   }
 
   std::string str() const {
     std::string res;
-    for (size_t i = 0; i < N; ++i)
-      res += data_[i].str();
+    for (size_t i = 0; i < N; ++i) res += data_[i].str();
 
     return res;
   }
 
-  static size_t GetHash(const DataType *data, size_t sz = DataSize, uint32_t seed = 0) {
-    return CityHash64WithSeed((const char*)data, sz * sizeof(DataType), 0x9E3779B9 ^ seed);
+  static size_t GetHash(const DataType *data, size_t sz = DataSize,
+                        uint32_t seed = 0) {
+    return CityHash64WithSeed((const char *)data, sz * sizeof(DataType),
+                              0x9E3779B9 ^ seed);
   }
 
   size_t GetHash(uint32_t seed = 0) const {
@@ -248,7 +270,8 @@ class HSeq {
       return seq.GetHash(seed);
     }
 
-    size_t operator()(const DataType *data, size_t sz = DataSize, uint32_t seed = 0) const {
+    size_t operator()(const DataType *data, size_t sz = DataSize,
+                      uint32_t seed = 0) const {
       return GetHash(data, sz, seed);
     }
   };
@@ -257,8 +280,7 @@ class HSeq {
     bool operator()(const HSeq<N> &l, const HSeq<N> &r) const {
       for (size_t i = 0; i < N; ++i) {
         const uint8_t lr = l[i].raw, rr = r[i].raw;
-        if (lr != rr)
-          return lr < rr;
+        if (lr != rr) return lr < rr;
       }
 
       return false;
@@ -266,24 +288,24 @@ class HSeq {
   };
 };
 
-template<size_t N>
-std::ostream& operator<<(std::ostream& os, const HSeq<N> &seq) {
+template <size_t N>
+std::ostream &operator<<(std::ostream &os, const HSeq<N> &seq) {
   os << seq.str();
   return os;
 }
 
 namespace internal {
-  template <size_t N>
-  inline size_t getSize(const hammer::HSeq<N> &) {
-    return N;
-  }
+template <size_t N>
+inline size_t getSize(const hammer::HSeq<N> &) {
+  return N;
+}
 
-  template <typename T>
-  inline size_t getSize(const T& a) { 
-    return a.size(); 
-  }
+template <typename T>
+inline size_t getSize(const T &a) {
+  return a.size();
 }
+}  // namespace internal
 
-};
+};  // namespace hammer
 
-#endif // __HAMMER_HSEQ_HPP__
+#endif  // __HAMMER_HSEQ_HPP__
diff --git a/src/projects/ionhammer/comparator.py b/src/projects/ionhammer/comparator.py
new file mode 100755
index 0000000..19e41d4
--- /dev/null
+++ b/src/projects/ionhammer/comparator.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+import subprocess
+import os
+import argparse
+import os.path
+import sys
+
+import pandas as pd
+
+def exit_with_error(message):
+    sys.stderr.write("Error: {__message}\n".format(__message=message))
+    sys.exit(1)
+
+class Comparator(object):
+
+    @staticmethod
+    def improved_count(corrected):
+        return corrected.query('levDistance < levDistance_baseline').shape[0]
+
+    @staticmethod
+    def comparable_improved_count(corrected):
+        return corrected.query('levDistance < levDistance_baseline and comparable==True').shape[0]
+
+    @staticmethod
+    def fill_comparable_aligments_flag(corrected):
+        corrected.eval('comparable = ((abs(refStart - refStart_baseline) < 15) and (abs(refEnd - refEnd_baseline) < 15))', inplace=True)
+        return corrected
+
+
+    @staticmethod
+    def corrupted_count(corrected):
+        corrupted = corrected.query('levDistance > levDistance_baseline and comparable==True')
+        print("Corrupted")
+        print(corrupted.index)
+        return corrupted.shape[0]
+
+    @staticmethod
+    def full_fixed_count(corrected):
+        return corrected.query("hammingDistance == 0 and hammingDistance_baseline != 0").shape[0]
+
+    @staticmethod
+    def comparable_full_fixed_count(corrected):
+        return corrected.query("hammingDistance == 0 and hammingDistance_baseline != 0 and comparable==True").shape[0]
+
+    @staticmethod
+    def stats_filename(path):
+        return path + ".stats.tsv.gz"
+
+
+    def print_count(self,  count):
+        return "{__count:.0f} ({__percent:.2f}%)".format(__count=count, __percent=100.0 * count / self.baseline.shape[0])
+
+    def calc_stats(self, path):
+        corrected = pd.read_csv(Comparator.stats_filename(path), sep="\t")
+        #wtf, ids after correction contains some trash values in suffix
+        corrected["id"]=corrected["id"].apply(lambda  x : x.split("_")[0])
+        corrected.set_index("id", inplace=True)
+        # sum_stats = corrected.sum(axis=0)
+        corrected = self.join_with_baseline(corrected)
+        corrected = self.fill_comparable_aligments_flag(corrected)
+        corrected.eval('gain = (hammingDistance_baseline - hammingDistance) / (hammingDistance_baseline + 1)')
+        mean_stats = corrected.query("comparable==True")[["hammingDistance", "levDistance", "gain"]].mean(axis=0)
+        print(corrected.shape)
+        corrected_dist_one = corrected.query("hammingDistance_baseline == 1")
+        print("Uncomparable reads")
+        uncomparable = corrected.query('comparable == False')
+        print(uncomparable.index)
+        comparable_count = corrected.query('comparable == True').shape[0]
+        corrupted = self.corrupted_count(corrected)
+        return pd.Series({"path" : path,
+                          "comparable_count" : self.print_count(comparable_count),
+                          "realigned_count" : self.print_count(uncomparable.shape[0]),
+                          # "meanHammingDistance" : mean_stats["hammingDistance"],
+                          "mean_lev_distance" : "{__dist:.4f} (x{__percent:.2f})".format(__dist=mean_stats["levDistance"], __percent=self.__mean_lev_dist / mean_stats["levDistance"]),
+                          # "gain" : mean_stats["gain"],
+                          # "total_insertions" : sum_stats["insertions"],
+                          # "total_deletions" : sum_stats["deletions"],
+                          # "total_mismatch" : sum_stats["mismatch"],
+                          # "improved_count" : self.improved_count(corrected),
+                          "improved_count" : self.print_count(self.comparable_improved_count(corrected)),
+                          "corrupted_count" : self.print_count(corrupted),
+                          "realigned_corrupted_count" : self.print_count(corrupted + uncomparable.shape[0]),
+                          # "full_fixed_count" : self.full_fixed_count(corrected),
+                          "full_fixed_count" : self.print_count(self.comparable_full_fixed_count(corrected)),
+                          "one_error_corrupted_count" : self.print_count(self.corrupted_count(corrected_dist_one)),
+                          "one_error_full_fixed_count" : self.print_count(self.comparable_full_fixed_count(corrected_dist_one))})
+
+    def join_with_baseline(self, corrected):
+        return corrected.join(self.baseline, rsuffix="_baseline")
+
+
+    def run_calc_stats_task(self, reads_path):
+        stats_path = self.stats_filename(reads_path)
+        if os.path.isfile(stats_path) and not self.force_recalc:
+            return
+        cmd = "java -Xmx64G -jar {__comparator_jar} {__reference} {__reads} {__stats_file}".format(__comparator_jar = self.comparator_jar,
+                                                                                           __reference=self.reference,
+                                                                                           __reads = reads_path,
+                                                                                           __stats_file = stats_path)
+        subprocess.call(cmd, shell=True)
+
+
+    def print_baseline_stats(self):
+        print("Baseline distance stats:")
+        print("Mean distance")
+        print(self.baseline[["hammingDistance", "levDistance"]].mean(axis=0))
+        self.__mean_lev_dist = self.baseline[["levDistance"]].mean(axis=0)[0]
+        print("Error sums:")
+        print(self.baseline.drop(['levDistance', 'hammingDistance'], axis=1).sum())
+
+    def __init__(self, reference_path, baseline_path, force_recalc, comparator_jar = "~/comparator.jar"):
+        self.force_recalc = force_recalc
+        self.reference = reference_path
+        self.comparator_jar = comparator_jar
+        self.results = []
+        self.__mean_lev_dist = 0
+        self.run_calc_stats_task(baseline_path)
+        self.baseline = pd.read_csv(Comparator.stats_filename(baseline_path), sep="\t")
+        self.baseline.set_index("id", inplace=True)
+        self.print_baseline_stats()
+
+    def add(self, path):
+        self.run_calc_stats_task(path)
+        self.results.append(self.calc_stats(path))
+
+    def save_results(self, path):
+        result = pd.DataFrame(self.results)
+        result.set_index("path", inplace=True)
+        result = result.T
+        result.to_latex(path, float_format="%.4f")
+
+
+
+class Mapper(object):
+    def __init__(self, reference_path, force_remap):
+        self.samtools_cmd = "tmap  mapall -f {__reference}".format(__reference=reference_path) \
+                            + " -i {__input_type} -o 1 -O 2 -g 3 -s {__output} -o 0 -v stage1  map1 map2 map3 map4"
+        self.samtools_view = "samtools view -h {__input_file}"
+        self.zcat_view = "zcat {__input_file}"
+        self.cat_view = "cat {__input_file}"
+        self.force_remap = force_remap
+
+    @staticmethod
+    def get_reads_format(path):
+        if path.endswith("sam"):
+            return "sam"
+        elif path.endswith("bam"):
+            return "sam"
+        elif path.endswith("fasta.gz") or path.endswith("fasta"):
+            return "fasta"
+        elif path.endswith("fastq"):
+            return "fastq"
+
+
+    @staticmethod
+    def is_sam_or_bam(path):
+        return path.endswith("sam") or path.endswith("bam")
+
+    @staticmethod
+    def is_fasta_gzip(path):
+        return path.endswith("fasta.gz")
+
+    @staticmethod
+    def is_fasta_or_fastq(path):
+        return path.endswith("fasta") or path.endswith("fastq")
+
+    def get_view_cmd(self, path):
+        if Mapper.is_sam_or_bam(path):
+            return self.samtools_view
+        elif Mapper.is_fasta_gzip(path):
+            return self.zcat_view
+        elif Mapper.is_fasta_or_fastq(path):
+            return self.cat_view
+        else:
+            exit_with_error("Unknown extension for file " + path)
+
+    def map_reads(self, path):
+        mapped_path = self.mapped_filename(path)
+        if os.path.isfile(mapped_path) and not self.force_remap:
+            return
+        view_cmd = self.get_view_cmd(path).format(__input_file=path)
+        map_cmd = self.samtools_cmd.format(__output=self.mapped_filename(path), __input_type=self.get_reads_format(path))
+        cmd = "{__view_cmd} | {__map_cmd}".format(__view_cmd=view_cmd, __map_cmd=map_cmd)
+        print("Running tmap command: " + cmd)
+        subprocess.call(cmd, shell=True)
+
+    @staticmethod
+    def mapped_filename(path):
+        return path + ".mapped.sam"
+
+    def run_task(self, path):
+        self.map_reads(path)
+        return self.mapped_filename(path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Compute reads quality for correction')
+    parser.add_argument('--reference', dest='reference', help="Reference fasta file")
+    parser.add_argument("--force", help="Force recalc all stats", dest='recalc', action="store_true")
+    parser.add_argument("--reads", help="Baseline reads", dest='reads')
+    parser.add_argument("--corrected-reads", help="Corrected reads", dest='corrected_reads', nargs="+")
+    args = parser.parse_args()
+
+    mapper = Mapper(args.reference, args.recalc)
+
+    mapped_baseline_path = mapper.run_task(args.reads)
+    comparator = Comparator(args.reference, mapped_baseline_path, args.recalc)
+
+    for corrected_path in args.corrected_reads:
+        mapped_reads_path = mapper.run_task(corrected_path)
+        comparator.add(mapped_reads_path)
+    comparator.save_results("corrections_quality.tex")
+
+
diff --git a/src/projects/ionhammer/config_struct.cpp b/src/projects/ionhammer/config_struct.cpp
index 7701eef..098e31a 100644
--- a/src/projects/ionhammer/config_struct.cpp
+++ b/src/projects/ionhammer/config_struct.cpp
@@ -7,7 +7,7 @@
 
 #include "config_struct.hpp"
 
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -16,69 +16,107 @@
 
 using namespace llvm;
 
-namespace llvm { namespace yaml {
+namespace llvm {
+namespace yaml {
 template <>
 struct ScalarEnumerationTraits<hammer_config::HammerStage> {
-    static void enumeration(yaml::IO &io, hammer_config::HammerStage &value) {
-        io.enumCase(value, "count",       hammer_config::HammerStage::KMerCounting);
-        io.enumCase(value, "hamcluster",  hammer_config::HammerStage::HammingClustering);
-        io.enumCase(value, "subcluster",  hammer_config::HammerStage::SubClustering);
-        io.enumCase(value, "correct",     hammer_config::HammerStage::ReadCorrection);
-    }
+  static void enumeration(yaml::IO &io, hammer_config::HammerStage &value) {
+    io.enumCase(value, "count", hammer_config::HammerStage::KMerCounting);
+    io.enumCase(value, "hamcluster",
+                hammer_config::HammerStage::HammingClustering);
+    io.enumCase(value, "subcluster", hammer_config::HammerStage::SubClustering);
+    io.enumCase(value, "correct", hammer_config::HammerStage::ReadCorrection);
+  }
 };
-}}
+
+  template <>
+  struct ScalarEnumerationTraits<hammer_config::CenterType> {
+    static void enumeration(yaml::IO &io, hammer_config::CenterType &value) {
+      io.enumCase(value, "count_max", hammer_config::CenterType::COUNT_ARGMAX);
+      io.enumCase(value, "consensus", hammer_config::CenterType::CONSENSUS);
+      io.enumCase(value, "posterior_consensus", hammer_config::CenterType::BY_POSTERIOR_QUALITY);
+    }
+  };
+}  // namespace yaml
+}  // namespace llvm
 
 // FIXME: This is temporary
 class DataSetReader {
-  public:
-    DataSetReader(yaml::IO&) {}
-    DataSetReader(yaml::IO&, io::DataSet<>&) {}
+ public:
+  DataSetReader(yaml::IO &) {}
+  DataSetReader(yaml::IO &, io::DataSet<> &) {}
 
-    io::DataSet<> denormalize(yaml::IO &) {
-        return io::DataSet<>(path);
-    }
+  io::DataSet<> denormalize(yaml::IO &) { return io::DataSet<>(path); }
 
-    std::string path;
+  std::string path;
 };
 
-namespace llvm { namespace yaml {
+namespace llvm {
+namespace yaml {
 template <>
 struct MappingTraits<hammer_config::hammer_config> {
-    static void mapping(yaml::IO &io, hammer_config::hammer_config &cfg) {
-        yaml::MappingNormalization<DataSetReader, io::DataSet<>> dataset(io, cfg.dataset);
-
-        io.mapRequired("dataset", dataset->path);
-        io.mapOptional("working_dir", cfg.working_dir, std::string("."));
-        io.mapOptional("output_dir", cfg.output_dir, std::string("."));
-        io.mapRequired("hard_memory_limit", cfg.hard_memory_limit);
-        io.mapOptional("count_split_buffer", cfg.count_split_buffer, 0ul);
-        io.mapOptional("max_nthreads", cfg.max_nthreads, 1u);
-        io.mapRequired("kmer_qual_threshold", cfg.kmer_qual_threshold);
-        io.mapRequired("center_qual_threshold", cfg.center_qual_threshold);
-        io.mapRequired("delta_score_threshold", cfg.delta_score_threshold);
-        io.mapRequired("keep_uncorrected_ends", cfg.keep_uncorrected_ends);
-        io.mapRequired("tau", cfg.tau);
-        io.mapOptional("debug_mode", cfg.debug_mode, false);
-        io.mapOptional("start_stage", cfg.start_stage, hammer_config::HammerStage::KMerCounting);
-    }
+  static void mapping(yaml::IO &io, hammer_config::hammer_config &cfg) {
+    yaml::MappingNormalization<DataSetReader, io::DataSet<>> dataset(
+        io, cfg.dataset);
+
+    io.mapRequired("dataset", dataset->path);
+    io.mapOptional("working_dir", cfg.working_dir, std::string("."));
+    io.mapOptional("output_dir", cfg.output_dir, std::string("."));
+    io.mapRequired("hard_memory_limit", cfg.hard_memory_limit);
+    io.mapOptional("count_split_buffer", cfg.count_split_buffer, 0ul);
+    io.mapOptional("max_nthreads", cfg.max_nthreads, 1u);
+
+    io.mapOptional("oracle_path", cfg.oracle_path, std::string(""));
+    io.mapOptional("max_full_del", cfg.max_full_del, 1u);
+    io.mapOptional("max_second_indel", cfg.max_second_indel, 1u);
+    io.mapOptional("max_indel", cfg.max_indel, 3u);
+    io.mapOptional("max_from_zero_insertion", cfg.max_from_zero_insertion, 1u);
+
+    io.mapOptional("sample_rate", cfg.sample_rate, 1.0);
+    io.mapOptional("subcluster_min_count", cfg.subcluster_min_count, 15u);
+    io.mapOptional("good_threshold", cfg.good_threshold, -0.69);
+    io.mapOptional("skip_threshold", cfg.skip_threshold, -0.01);
+    io.mapOptional("subcluster_threshold", cfg.subcluster_threshold, -0.001);
+    io.mapOptional("subcluster_filter_by_count", cfg.subcluster_filter_by_count_enabled, true);
+    io.mapOptional("queue_limit_multiplier", cfg.queue_limit_multiplier, 500);
+    io.mapOptional("dist_one_subcluster_alpha", cfg.dist_one_subcluster_alpha, 0.51);
+    io.mapOptional("subcluster_qual_mult", cfg.subcluster_qual_mult, 1.0);
+    io.mapOptional("subcluster_count_mult", cfg.subcluster_count_mult, 0.3);
+    io.mapOptional("correction_penalty", cfg.correction_penalty, -7.0);
+    io.mapOptional("bad_kmer_penalty", cfg.bad_kmer_penalty, -20.0);
+    io.mapOptional("count_dist_eps", cfg.count_dist_eps, 1e-3);
+    io.mapOptional("count_dist_skip_quantile", cfg.count_dist_skip_quantile, 0.05);
+    io.mapOptional("noise_filter_count_threshold", cfg.noise_filter_count_threshold, 3u);
+    io.mapOptional("center_type", cfg.center_type, hammer_config::CenterType::COUNT_ARGMAX);
+
+
+    io.mapRequired("kmer_qual_threshold", cfg.kmer_qual_threshold);
+    io.mapRequired("center_qual_threshold", cfg.center_qual_threshold);
+    io.mapRequired("delta_score_threshold", cfg.delta_score_threshold);
+    io.mapRequired("keep_uncorrected_ends", cfg.keep_uncorrected_ends);
+    io.mapRequired("tau", cfg.tau);
+    io.mapOptional("debug_mode", cfg.debug_mode, false);
+    io.mapOptional("start_stage", cfg.start_stage,
+                   hammer_config::HammerStage::KMerCounting);
+  }
 };
-}}
+}  // namespace yaml
+}  // namespace llvm
 
 namespace hammer_config {
-void load(hammer_config& cfg, const std::string &filename) {
-    ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(filename);
-    if (!Buf)
-        throw(std::string("Failed to load config file ") + filename);
+void load(hammer_config &cfg, const std::string &filename) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(filename);
+  if (!Buf) throw(std::string("Failed to load config file ") + filename);
 
-    yaml::Input yin(*Buf.get());
-    yin >> cfg;
+  yaml::Input yin(*Buf.get());
+  yin >> cfg;
 
-    if (yin.error())
-        throw(std::string("Failed to load config file ") + filename);
+  if (yin.error()) throw(std::string("Failed to load config file ") + filename);
 
-    // Fix number of threads according to OMP capabilities.
-    cfg.max_nthreads = std::min(cfg.max_nthreads, (unsigned)omp_get_max_threads());
-    // Inform OpenMP runtime about this :)
-    omp_set_num_threads(cfg.max_nthreads);
-}
+  // Fix number of threads according to OMP capabilities.
+  cfg.max_nthreads =
+      std::min(cfg.max_nthreads, (unsigned)omp_get_max_threads());
+  // Inform OpenMP runtime about this :)
+  omp_set_num_threads(cfg.max_nthreads);
 }
+}  // namespace hammer_config
diff --git a/src/projects/ionhammer/config_struct.hpp b/src/projects/ionhammer/config_struct.hpp
index 64fe4b2..439c54c 100644
--- a/src/projects/ionhammer/config_struct.hpp
+++ b/src/projects/ionhammer/config_struct.hpp
@@ -9,7 +9,6 @@
 #define __HAMMER_IT_CONFIG_HPP__
 
 #include "pipeline/config_singl.hpp"
-
 #include "pipeline/library.hpp"
 
 namespace hammer_config {
@@ -20,7 +19,10 @@ enum class HammerStage {
   ReadCorrection = 4
 };
 
-struct hammer_config {
+enum class CenterType { COUNT_ARGMAX, CONSENSUS, BY_POSTERIOR_QUALITY };
+
+
+  struct hammer_config {
   io::DataSet<> dataset;
 
   std::string working_dir;
@@ -39,11 +41,36 @@ struct hammer_config {
 
   bool debug_mode;
   HammerStage start_stage;
+
+  double sample_rate = 1.0;
+  unsigned max_full_del = 1;
+  unsigned max_indel = 3;
+  unsigned max_second_indel = 1;
+  unsigned max_from_zero_insertion = 1;
+  std::string oracle_path = "";
+
+  unsigned subcluster_min_count = 15;
+  double good_threshold = -0.69;
+  double skip_threshold = -0.01;
+  double subcluster_threshold = -0.001;
+  bool subcluster_filter_by_count_enabled = true;
+  int queue_limit_multiplier = 200;
+  double dist_one_subcluster_alpha = 0.6;
+  double subcluster_qual_mult = 1.0;
+  double subcluster_count_mult = 0.4;
+
+  double correction_penalty = -7;
+  double bad_kmer_penalty = -20;
+  double count_dist_eps = 1e-3;
+  double count_dist_skip_quantile = 0.05;
+
+  unsigned noise_filter_count_threshold = 3;
+  CenterType center_type = CenterType::COUNT_ARGMAX;
 };
 
-void load(hammer_config& cfg, const std::string &filename);
-}
+void load(hammer_config& cfg, const std::string& filename);
+}  // namespace hammer_config
 
 typedef config_common::config<hammer_config::hammer_config> cfg;
 
-#endif // __HAMMER_IT_CONFIG_HPP__
+#endif  // __HAMMER_IT_CONFIG_HPP__
diff --git a/src/projects/ionhammer/consensus.hpp b/src/projects/ionhammer/consensus.hpp
index 512c411..88258f7 100644
--- a/src/projects/ionhammer/consensus.hpp
+++ b/src/projects/ionhammer/consensus.hpp
@@ -10,14 +10,14 @@
 
 #include "HSeq.hpp"
 
-#include <limits>
 #include <boost/numeric/ublas/matrix.hpp>
+#include <limits>
 
 namespace hammer {
 namespace iontorrent {
 
-inline
-std::pair<hammer::HomopolymerRun, double> consensus(const boost::numeric::ublas::matrix<double>& scores) {
+inline std::pair<hammer::HomopolymerRun, double> consensus(
+    const boost::numeric::ublas::matrix<double>& scores) {
   double inf = -std::numeric_limits<double>::infinity();
 
   double max = inf;
@@ -34,7 +34,7 @@ std::pair<hammer::HomopolymerRun, double> consensus(const boost::numeric::ublas:
   return std::make_pair(hammer::HomopolymerRun(nucl, len), max);
 }
 
-};
-};
+};  // namespace iontorrent
+};  // namespace hammer
 
-#endif // __HAMMER_IT_CONSENSUS_HPP__
+#endif  // __HAMMER_IT_CONSENSUS_HPP__
diff --git a/src/projects/ionhammer/err_helper_table.cpp b/src/projects/ionhammer/err_helper_table.cpp
index 8e20c63..b53b6ab 100644
--- a/src/projects/ionhammer/err_helper_table.cpp
+++ b/src/projects/ionhammer/err_helper_table.cpp
@@ -25,15 +25,13 @@ static const uint32_t helper_table_data[] = {
 // (2 * 4^^2) / 32,
 // (2 * 4^^4) / 32,
 // ...
-const HelperTable helper_tables[] = {
-  { 1, helper_table_data },
-  { 2, helper_table_data + 1 },
-  { 3, helper_table_data + 17 },
-  { 4, helper_table_data + 273 },
-  { 5, helper_table_data + 4369 }
-};
+const HelperTable helper_tables[] = {{1, helper_table_data},
+                                     {2, helper_table_data + 1},
+                                     {3, helper_table_data + 17},
+                                     {4, helper_table_data + 273},
+                                     {5, helper_table_data + 4369}};
 
-}; // namespace internal
+};  // namespace internal
 
-}; // namespace errHelper
-}; // namespace hammer
+};  // namespace errHelper
+};  // namespace hammer
diff --git a/src/projects/ionhammer/err_helper_table.hpp b/src/projects/ionhammer/err_helper_table.hpp
index 342ff82..dce8d18 100644
--- a/src/projects/ionhammer/err_helper_table.hpp
+++ b/src/projects/ionhammer/err_helper_table.hpp
@@ -10,11 +10,11 @@
 
 #include "hkmer.hpp"
 
-#include <vector>
+#include <cassert>
+#include <cstdlib>
 #include <istream>
 #include <string>
-#include <cstdlib>
-#include <cassert>
+#include <vector>
 
 #include "utils/logger/logger.hpp"
 
@@ -23,11 +23,7 @@ namespace hammer {
 namespace errHelper {
 
 /// Type of error
-enum Hint {
-  kMismatch,
-  kInsertion,
-  kDeletion
-};
+enum Hint { kMismatch, kInsertion, kDeletion };
 
 namespace internal {
 
@@ -36,38 +32,34 @@ static const unsigned int MAX_K = 5;
 
 struct HelperTable {
   const unsigned k_;
-  const uint32_t* storage_;
+  const uint32_t *storage_;
 
   template <typename It1, typename It2>
-  Hint lookupHint(const It1 &x_it, const It2 &y_it,
-                  size_t x_nfront, size_t y_nfront) const {
-
+  Hint lookupHint(const It1 &x_it, const It2 &y_it, size_t x_nfront,
+                  size_t y_nfront) const {
     VERIFY(k_ <= MAX_K);
     unsigned x_code = getCode(x_it, x_nfront, k_);
     unsigned y_code = getCode(y_it, y_nfront, k_);
 
     unsigned code = x_code + (y_code << (2 * k_));
-    uint32_t bt = storage_[code / 16]; // 16 hints per uint32_t
+    uint32_t bt = storage_[code / 16];  // 16 hints per uint32_t
     unsigned shift = (code % 16) * 2;
     return static_cast<Hint>((bt >> shift) & 0x3);
   }
 
   template <typename HRunIter>
-  static unsigned getCode(const HRunIter& x_it, size_t x_nfront, size_t k) {
+  static unsigned getCode(const HRunIter &x_it, size_t x_nfront, size_t k) {
     unsigned code = 0;
     unsigned len = 0;
     auto nucl = x_it->nucl;
-    for (len = 0; len < x_nfront && len < k; ++len)
-      code |= nucl << (2 * len);
+    for (len = 0; len < x_nfront && len < k; ++len) code |= nucl << (2 * len);
 
-    if (len == k)
-      return code;
+    if (len == k) return code;
 
-    for (HRunIter it = x_it + 1; ; ++it) {
+    for (HRunIter it = x_it + 1;; ++it) {
       for (size_t i = 0; i < it->len; ++i) {
         code |= it->nucl << (2 * len++);
-        if (len == k)
-          return code;
+        if (len == k) return code;
       }
     }
 
@@ -83,19 +75,17 @@ static inline size_t getNumberOfRemainingBases(const HRunIter &x_it,
                                                const HRunIter &x_end,
                                                size_t x_nfront) {
   size_t n = x_nfront;
-  if (n >= MAX_K)
-    return MAX_K;
+  if (n >= MAX_K) return MAX_K;
 
   for (HRunIter it = x_it + 1; it != x_end; ++it) {
     n += it->len;
-    if (n >= MAX_K)
-      return MAX_K;
+    if (n >= MAX_K) return MAX_K;
   }
 
   return n;
 }
 
-}; // namespace internal
+};  // namespace internal
 
 /// Estimate what kind of error occurred at the position
 template <typename It1, typename It2>
@@ -107,11 +97,11 @@ static inline Hint getHint(const It1 &x_begin, const It1 &x_end,
   size_t x_rem = internal::getNumberOfRemainingBases(x_begin, x_end, x_nfront);
   size_t y_rem = internal::getNumberOfRemainingBases(y_begin, y_end, y_nfront);
 
-  auto& table = internal::helper_tables[std::min(x_rem, y_rem) - 1];
+  auto &table = internal::helper_tables[std::min(x_rem, y_rem) - 1];
   return table.lookupHint<It1, It2>(x_begin, y_begin, x_nfront, y_nfront);
 }
 
-}; // namespace errHelper
-}; // namespace hammer
+};  // namespace errHelper
+};  // namespace hammer
 
-#endif // __HAMMER_ERR_HELPER_TABLE_HPP__
+#endif  // __HAMMER_ERR_HELPER_TABLE_HPP__
diff --git a/src/projects/ionhammer/expander.cpp b/src/projects/ionhammer/expander.cpp
deleted file mode 100644
index acc7d3e..0000000
--- a/src/projects/ionhammer/expander.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include "expander.hpp"
-
-#include "config_struct.hpp"
-#include "kmer_data.hpp"
-#include "valid_hkmer_generator.hpp"
-
-#include "io/reads/file_reader.hpp"
-
-#include <vector>
-#include <cstring>
-
-bool Expander::operator()(const io::SingleRead &r) {
-    size_t sz = r.size();
-
-    std::vector<unsigned> covered_by_solid(sz, false);
-    std::vector<size_t> kmer_indices(sz, -1ull);
-
-    ValidHKMerGenerator<hammer::K> gen(r);
-    while (gen.HasMore()) {
-        hammer::HKMer kmer = gen.kmer();
-        size_t idx = data_.seq_idx(kmer), kl = kmer.size();
-        size_t read_pos = gen.pos() - kl;
-
-        kmer_indices[read_pos] = idx;
-        if (data_[idx].changeto == idx &&
-            data_[idx].qual < cfg::get().center_qual_threshold) {
-            for (size_t j = read_pos; j < read_pos + kl; ++j) {
-                VERIFY_MSG(j < sz, "read_pos == " << read_pos << ", r.size() == " << r.size() << ", kmer: " << kmer << ", read: " << r.GetSequenceString());
-                covered_by_solid[j] = true;
-            }
-        }
-
-        gen.Next();
-    }
-
-    for (size_t j = 0; j < sz; ++j) {
-        if (!covered_by_solid[j] || kmer_indices[j] == -1ull)
-            continue;
-
-        size_t idx = kmer_indices[j];
-        auto &kmer_data = data_[idx];
-        if (kmer_data.changeto != idx) {
-#           pragma omp atomic
-            changed_ += 1;
-
-            kmer_data.lock();
-            kmer_data.changeto = static_cast<unsigned>(idx);
-            kmer_data.unlock();
-        }
-    }
-
-    return false;
-}
diff --git a/src/projects/ionhammer/expander.hpp b/src/projects/ionhammer/expander.hpp
deleted file mode 100644
index bd27673..0000000
--- a/src/projects/ionhammer/expander.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef __HAMMER_EXPANDER_HPP__
-#define __HAMMER_EXPANDER_HPP__
-
-class KMerData;
-namespace io {
-class SingleRead;
-}
-
-#include <cstring>
-
-class Expander {
-  KMerData &data_;
-  size_t changed_;
-  
- public:
-  Expander(KMerData &data) 
-      : data_(data), changed_(0) {}
-
-  size_t changed() const { return changed_; }
-
-  bool operator()(const io::SingleRead &r);
-};
-
-#endif
diff --git a/src/projects/ionhammer/flow_space_read.hpp b/src/projects/ionhammer/flow_space_read.hpp
index d308f4d..b40a130 100644
--- a/src/projects/ionhammer/flow_space_read.hpp
+++ b/src/projects/ionhammer/flow_space_read.hpp
@@ -8,11 +8,11 @@
 #ifndef __HAMMER_IT_FLOW_SPACE_READ_HPP__
 #define __HAMMER_IT_FLOW_SPACE_READ_HPP__
 
-#include "io/reads/single_read.hpp"
 #include "HSeq.hpp"
+#include "io/reads/single_read.hpp"
 
-#include <deque>
 #include <cstddef>
+#include <deque>
 #include <string>
 
 namespace hammer {
@@ -21,6 +21,7 @@ namespace hammer {
 class FlowSpaceRead {
   std::string name_;
   std::deque<HomopolymerRun> runs_;
+
  public:
   FlowSpaceRead(const io::SingleRead& read) : name_(read.name()) {
     const auto& seq = read.GetSequenceString();
@@ -28,24 +29,15 @@ class FlowSpaceRead {
   }
 
   template <typename It>
-  FlowSpaceRead(It runs_beg, It runs_end) :
-    runs_(runs_beg, runs_end) {}
+  FlowSpaceRead(It runs_beg, It runs_end) : runs_(runs_beg, runs_end) {}
 
-  size_t size() const {
-    return runs_.size();
-  }
+  size_t size() const { return runs_.size(); }
 
-  const std::string& name() const {
-    return name_;
-  }
+  const std::string& name() const { return name_; }
 
-  HomopolymerRun operator[](size_t index) const {
-    return runs_[index];
-  }
+  HomopolymerRun operator[](size_t index) const { return runs_[index]; }
 
-  HomopolymerRun& operator[](size_t index) {
-    return runs_[index];
-  }
+  HomopolymerRun& operator[](size_t index) { return runs_[index]; }
 
   void TrimLeft(size_t n_runs) {
     if (n_runs >= runs_.size())
@@ -63,15 +55,12 @@ class FlowSpaceRead {
 
   std::string GetSequenceString() const {
     std::string seq;
-    for (size_t i = 0; i < runs_.size(); ++i)
-      seq += runs_[i].str();
+    for (size_t i = 0; i < runs_.size(); ++i) seq += runs_[i].str();
     return seq;
   }
 
-  const std::deque<hammer::HomopolymerRun>& data() const {
-    return runs_;
-  }
+  const std::deque<hammer::HomopolymerRun>& data() const { return runs_; }
 };
 
-} // namespace hammer
+}  // namespace hammer
 #endif
diff --git a/src/projects/ionhammer/gamma_poisson_model.cpp b/src/projects/ionhammer/gamma_poisson_model.cpp
new file mode 100644
index 0000000..65d236a
--- /dev/null
+++ b/src/projects/ionhammer/gamma_poisson_model.cpp
@@ -0,0 +1,16 @@
+//
+// Created by Vasiliy Ershov on 08/11/2016.
+//
+
+#include "gamma_poisson_model.hpp"
+
+using namespace n_gamma_poisson_model;
+
+std::array<double, 100000> PoissonGammaDistribution::log_gamma_integer_cache_ =
+    []() -> std::array<double, 100000> {
+  std::array<double, 100000> cache;
+  for (size_t i = 0; i < cache.size(); ++i) {
+    cache[i] = boost::math::lgamma(i + 1);
+  }
+  return cache;
+}();
diff --git a/src/projects/ionhammer/gamma_poisson_model.hpp b/src/projects/ionhammer/gamma_poisson_model.hpp
new file mode 100644
index 0000000..73fde71
--- /dev/null
+++ b/src/projects/ionhammer/gamma_poisson_model.hpp
@@ -0,0 +1,869 @@
+//
+// Created by Vasiliy Ershov on 08/11/2016.
+//
+
+#ifndef PROJECT_GAMMA_POISSON_MODEL_HPP
+#define PROJECT_GAMMA_POISSON_MODEL_HPP
+
+#include <common/utils/parallel/openmp_wrapper.h>
+#include <boost/math/special_functions/binomial.hpp>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/trigamma.hpp>
+#include <vector>
+#include "kmer_data.hpp"
+#include "thread_utils.h"
+#include "valid_hkmer_generator.hpp"
+//
+
+namespace n_gamma_poisson_model {
+
+struct QualFunc {
+  double alpha_;
+  double beta_;
+
+  double operator()(double x) const { return alpha_ * x + beta_; }
+
+  double GenomicLogLikelihood(double x) const {
+    const double val = (*this)(x);
+    const double exp_point = exp(val);
+    return val - (std::isfinite(exp_point) ? log(1 + exp_point) : val);
+  }
+};
+
+class GammaDistribution {
+ private:
+  double shape_;
+  double rate_;
+  double log_gamma_at_shape_;
+
+ public:
+  GammaDistribution(const GammaDistribution&) = default;
+
+  GammaDistribution& operator=(const GammaDistribution&) = default;
+
+  GammaDistribution(const double shape = 1, const double rate = 1)
+      : shape_(shape), rate_(rate) {
+    log_gamma_at_shape_ = boost::math::lgamma(shape_);
+  }
+
+  inline double GetShape() const { return shape_; }
+
+  inline double GetRate() const { return rate_; }
+
+  inline double LogGammaAtShape() const { return log_gamma_at_shape_; }
+};
+
+class GammaMixture {
+ private:
+  GammaDistribution first_;
+  GammaDistribution second_;
+  double first_weight_;
+
+ public:
+  GammaMixture() : first_(1, 1), second_(1, 1), first_weight_(-1) {}
+
+  GammaMixture(const GammaDistribution& first,
+                const GammaDistribution& second,
+                double firstWeight)
+      : first_(first), second_(second), first_weight_(firstWeight) {}
+
+  const GammaDistribution& GetFirst() const { return first_; }
+
+  const GammaDistribution& GetSecond() const { return second_; }
+
+  double GetFirstWeight() const { return first_weight_; }
+};
+
+class PoissonGammaDistribution {
+ private:
+  const GammaDistribution& prior_;
+  static std::array<double, 100000> log_gamma_integer_cache_;
+
+ private:
+  inline double IntLogGamma(size_t count) const {
+    if (count < log_gamma_integer_cache_.size()) {
+      return log_gamma_integer_cache_[count];
+    } else {
+      return boost::math::lgamma(((double)count) + 1);
+    }
+  }
+
+ public:
+  PoissonGammaDistribution(const GammaDistribution& prior) : prior_(prior) {}
+
+  inline double PartialLogLikelihood(size_t count) const {
+    const double a = prior_.GetShape();
+    const double b = prior_.GetRate();
+
+    double ll = 0.0;
+    ll += a * log(b) - (a + (double)count) * log(b + 1);
+    ll +=
+        boost::math::lgamma(prior_.GetShape() + (double)count) - prior_.LogGammaAtShape();
+    return ll;
+  }
+
+  inline double LogLikelihood(size_t count) const {
+    const double a = prior_.GetShape();
+    const double b = prior_.GetRate();
+
+    double ll = 0.0;
+    ll += a * log(b) - (a + (double)count) * log(b + 1);
+    ll += boost::math::lgamma(prior_.GetShape() + ((double)count)) - IntLogGamma(count) -
+          prior_.LogGammaAtShape();
+
+    return ll;
+  }
+
+  inline double Quantile(double p) const {
+    const double a = prior_.GetShape();
+    const double b = prior_.GetRate();
+    return boost::math::ibeta_inva(a, 1.0 / (1.0 + b), 1.0 - p);
+  }
+
+  inline double Cumulative(size_t count) const {
+    const double a = prior_.GetShape();
+    const double b = prior_.GetRate();
+
+    return 1.0 - boost::math::ibeta((double)count + 1, a, 1.0 / (1.0 + b));
+  }
+};
+
+constexpr int RunSizeLimit = 8;
+
+class ParametricClusterModel {
+ private:
+  GammaMixture prior_;
+  QualFunc qual_func_;
+  double count_threshold_;
+  std::array<double, RunSizeLimit> alphas_;
+
+ public:
+ public:
+  ParametricClusterModel() : count_threshold_(100000) {}
+
+  double ErrorRate(const int runSize) const {
+    auto idx = runSize - 1;
+    idx = std::max(idx, 0);
+    idx = std::min(idx, (const int)(alphas_.size() - 1));
+    return alphas_[idx] * (runSize == 0 ? 0.5 : 1);
+  }
+
+  double ExpectedErrorRate(const hammer::HKMer& from,
+                           const hammer::HKMer& to) const {
+    double errRate = 0;
+    for (uint i = 0; i < hammer::K; ++i) {
+      errRate +=
+          std::abs(from[i].len - to[i].len) * log(ErrorRate(from[i].len));
+      //      errRate += std::abs(from[i].len - to[i].len) *
+      //      log(ErrorRate(from[i].len)) - log(1.0 - ErrorRate(from[i].len));
+    }
+    return exp(errRate);
+  }
+
+  ParametricClusterModel(const GammaMixture& prior,
+                         const QualFunc& qualFunc,
+                         const double countThreshold,
+                         const std::array<double, RunSizeLimit>& alphas)
+      : prior_(prior), qual_func_(qualFunc), count_threshold_(countThreshold) {
+    std::copy(alphas.begin(), alphas.end(), alphas_.begin());
+    for (uint i = 0; i < RunSizeLimit; ++i) {
+      INFO("Run length " << i << " estimated error rate " << alphas_[i]);
+    }
+  }
+
+  ParametricClusterModel(const ParametricClusterModel& other) = default;
+
+  ParametricClusterModel& operator=(const ParametricClusterModel&) = default;
+
+  double QualityLogPrior(double qual) const {
+    return max(min(qual_func_.GenomicLogLikelihood(qual), -1e-10), -1000.0);
+  }
+
+  bool NeedSubcluster(const hammer::KMerStat& stat) const {
+    return qual_func_.GenomicLogLikelihood(stat.qual) > -0.1 &&
+           stat.count >= count_threshold_;
+  }
+
+  const GammaDistribution& GenomicPrior() const { return prior_.GetFirst(); }
+
+  const GammaDistribution& NoisePrior() const { return prior_.GetSecond(); }
+
+  double GenerateLogLikelihood(double expectedNoiseCount,
+                               size_t noiseCount) const {
+    const auto& prior = NoisePrior();
+
+    GammaDistribution posterior(prior.GetShape() + expectedNoiseCount,
+                                 prior.GetRate() + 1);
+    return PoissonGammaDistribution(posterior).LogLikelihood(noiseCount);
+  }
+
+  double GenomicLogLikelihood(size_t count) const {
+    const auto& prior = GenomicPrior();
+    const double a = prior.GetShape();
+    const double b = prior.GetRate();
+
+    double ll = a * log(b) - (a + (double)count) * log(b + 1);
+    ll += boost::math::lgamma(prior.GetShape() + ((double)count)) -
+          prior.LogGammaAtShape() - boost::math::lgamma(((double)count) + 1);
+    return ll;
+  }
+};
+
+// this class estimate prior distribution.
+class TClusterModelEstimator {
+ private:
+  const KMerData& data_;
+  double threshold_;
+  uint num_threads_;
+  size_t max_terations_;
+  bool calc_likelihood_;
+
+ private:
+
+  struct TClusterSufficientStat {
+    double count_ = 0;
+    double qualtiy_ = 0;
+    double genomic_class_prob_ = 0;
+  };
+
+  struct TQualityStat {
+    double quality_ = 0;
+    double class_ = 0;
+
+    TQualityStat(double quality, double cls) : quality_(quality), class_(cls) {}
+  };
+
+  struct TRunErrorStats {
+    const KMerData* data_;
+    std::array<double, RunSizeLimit> error_counts_;
+    std::array<double, RunSizeLimit> total_count_;
+
+    TRunErrorStats(const KMerData& data)
+        : data_(&data){
+
+          };
+
+    std::array<double, RunSizeLimit> EstimateAlphas(
+        size_t priorSize = 100) const {
+      const double priors[] = {0.002, 0.004, 0.01, 0.02,
+                               0.035, 0.05,  0.09, 0.11};
+
+      std::array<double, RunSizeLimit> alphas;
+      for (uint i = 0; i < RunSizeLimit; ++i) {
+        alphas[i] = (error_counts_[i] + priors[i] * (double)priorSize) /
+                    (total_count_[i] + (double)priorSize);
+      }
+      alphas[0] *= 2;
+      return alphas;
+    };
+
+    TRunErrorStats& operator+=(const TRunErrorStats& other) {
+      if (this != &other) {
+        for (uint i = 0; i < RunSizeLimit; ++i) {
+          error_counts_[i] += other.error_counts_[i];
+          total_count_[i] += other.total_count_[i];
+        }
+      }
+      return *this;
+    }
+
+    void Add(const std::vector<size_t>& indices, size_t centerIdx) {
+      const auto& center = (*data_)[centerIdx].kmer;
+
+      for (auto idx : indices) {
+        if (idx == centerIdx) {
+          continue;
+        }
+        double errKmerCount = (double)(*data_)[idx].count;
+        const auto& errKmer = (*data_)[idx].kmer;
+        for (uint i = 0; i < hammer::K; ++i) {
+          if (center[i].len > RunSizeLimit) {
+            continue;
+          }
+          const int len = center[i].len - 1;
+          total_count_[len] += errKmerCount;
+          if (center[i].len != errKmer[i].len) {
+            error_counts_[len] += errKmerCount;
+          }
+        }
+      }
+      for (uint i = 0; i < hammer::K; ++i) {
+        if (center[i].len > RunSizeLimit) {
+          continue;
+        }
+        total_count_[center[i].len - 1] += (*data_)[centerIdx].count;
+      }
+    }
+  };
+
+  inline void Expectation(const PoissonGammaDistribution& first,
+                          const PoissonGammaDistribution& second,
+                          const QualFunc& qualFunc,
+                          TClusterSufficientStat& center) const {
+    const double logPrior = qualFunc.GenomicLogLikelihood(center.qualtiy_) +
+                            log(boost::math::gamma_q(center.count_, threshold_));
+
+    const double firstLL = first.PartialLogLikelihood((size_t)center.count_) + logPrior;
+    const double secondLL = second.PartialLogLikelihood((size_t)center.count_) +
+                            log(max(1.0 - exp(logPrior), 1e-20));
+
+    const double posterior = 1.0 / (1.0 + exp(secondLL - firstLL));
+    center.genomic_class_prob_ = posterior;
+  }
+
+  inline void QualityExpectation(const QualFunc& qualFunc,
+                                 TClusterSufficientStat& center) const {
+    center.genomic_class_prob_ =
+        exp(qualFunc.GenomicLogLikelihood(center.qualtiy_));
+  }
+
+  inline TClusterSufficientStat Create(const size_t centerIdx) const {
+    TClusterSufficientStat stat;
+    stat.genomic_class_prob_ =
+        data_[centerIdx].count > 0
+            ? boost::math::gamma_q(data_[centerIdx].count, threshold_)
+            : 0;
+    stat.count_ = data_[centerIdx].count;
+    stat.qualtiy_ = data_[centerIdx].qual;
+    return stat;
+  }
+
+  std::vector<TClusterSufficientStat> CreateSufficientStats(
+      const std::vector<size_t>& clusterCenters) const {
+    std::vector<TClusterSufficientStat> clusterSufficientStat;
+    clusterSufficientStat.reserve(clusterCenters.size());
+
+    for (size_t i = 0; i < clusterCenters.size(); ++i) {
+      const size_t centerIdx = clusterCenters[i];
+      auto stat = Create(centerIdx);
+      if (stat.count_ > 0) {
+        clusterSufficientStat.push_back(stat);
+      }
+    }
+    return clusterSufficientStat;
+  }
+
+  std::vector<TQualityStat> CreateQualityStats(
+      const std::vector<std::vector<size_t>>& clusters,
+      const std::vector<size_t>& clusterCenters) const {
+    std::vector<TQualityStat> qualities;
+    qualities.reserve(clusterCenters.size());
+
+    for (size_t i = 0; i < clusterCenters.size(); ++i) {
+      const size_t centerIdx = clusterCenters[i];
+      if (data_[centerIdx].count >= threshold_) {
+        for (auto idx : clusters[i]) {
+          if (idx != centerIdx) {
+            qualities.push_back(TQualityStat(data_[idx].qual, 0));
+          }
+        }
+        qualities.push_back(TQualityStat(data_[centerIdx].qual, 1));
+      }
+    }
+    return qualities;
+  }
+
+  template <bool WEIGHTED = true>
+  class TCountsStat {
+   private:
+    double count_ = 0;
+    double count2_ = 0;
+    double weight_ = 0;
+
+   public:
+    void Add(const TClusterSufficientStat& stat) {
+      const double w = (WEIGHTED ? stat.genomic_class_prob_ : 1.0);
+      count_ += w * stat.count_;
+      count2_ += w * stat.count_ * stat.count_;
+      weight_ += w;
+    }
+
+    TCountsStat& operator+=(const TCountsStat& other) {
+      if (this != &other) {
+        count_ += other.count_;
+        count2_ += other.count2_;
+        weight_ += other.weight_;
+      }
+      return *this;
+    }
+
+    double GetWeightedSum() const { return count_; }
+
+    double GetWeightedSum2() const { return count2_; }
+
+    double GetWeight() const { return weight_; }
+  };
+
+  class TLogGammaStat {
+   private:
+    double genomic_shape_;
+    double non_genomic_shape_;
+    double genomic_log_gamma_sum_ = 0;
+    double non_genomic_log_gamma_sum_ = 0;
+
+   public:
+    TLogGammaStat(double genomicShape, double nonGenomicShape)
+        : genomic_shape_(genomicShape), non_genomic_log_gamma_sum_(nonGenomicShape) {}
+
+    void Add(const TClusterSufficientStat& stat) {
+      genomic_log_gamma_sum_ += stat.genomic_class_prob_ *
+                            boost::math::lgamma(stat.count_ + genomic_shape_);
+      non_genomic_log_gamma_sum_ +=
+          (1.0 - stat.genomic_class_prob_) *
+          boost::math::lgamma(stat.count_ + non_genomic_shape_);
+    }
+
+    TLogGammaStat& operator+=(const TLogGammaStat& other) {
+      if (this != &other) {
+        genomic_log_gamma_sum_ += other.genomic_log_gamma_sum_;
+        non_genomic_log_gamma_sum_ += other.non_genomic_log_gamma_sum_;
+      }
+      return *this;
+    }
+
+    double GetGenomicLogGammaSum() const { return genomic_log_gamma_sum_; }
+
+    double GetNonGenomicLogGammaSum() const { return non_genomic_log_gamma_sum_; }
+  };
+
+  class TQualityLogitLinearRegressionPoint {
+   private:
+    // p(genomic) = exp(Alpha qual + beta) / (1.0 + exp(Alpha qual + beta))
+    QualFunc func_;
+
+    double likelihood_ = 0;
+
+    double der_alpha_ = 0;
+    double der_beta_ = 0;
+
+    double der2_alpha_ = 0;
+    double der2_beta_ = 0;
+    double der2_alpha_beta_ = 0;
+
+   public:
+    TQualityLogitLinearRegressionPoint(QualFunc func) : func_(func) {}
+
+    void Add(const TClusterSufficientStat& statistic) {
+      Add(statistic.genomic_class_prob_, statistic.qualtiy_);
+    }
+
+    void Add(const TQualityStat& statistic) {
+      Add(statistic.class_, statistic.quality_);
+    }
+
+    void Add(const double firstClassProb, double qual) {
+      const double val = func_(qual);
+      const double expPoint = exp(val);
+      const double p =
+          std::isfinite(expPoint) ? expPoint / (1.0 + expPoint) : 1.0;
+
+      der_alpha_ += (firstClassProb - p) * qual;
+      der_beta_ += firstClassProb - p;
+
+      der2_alpha_ -= sqr(qual) * p * (1 - p);
+      der2_beta_ -= p * (1 - p);
+      der2_alpha_beta_ -= qual * p * (1 - p);
+
+      likelihood_ += firstClassProb * val -
+                    (std::isfinite(expPoint) ? log(1 + expPoint) : val);
+    }
+
+    TQualityLogitLinearRegressionPoint& operator+=(
+        const TQualityLogitLinearRegressionPoint& other) {
+      if (this != &other) {
+        likelihood_ += other.likelihood_;
+
+        der_alpha_ += other.der_alpha_;
+        der_beta_ += other.der_beta_;
+
+        der2_alpha_ += other.der2_alpha_;
+        der2_beta_ += other.der2_beta_;
+        der2_alpha_beta_ += other.der2_alpha_beta_;
+      }
+      return *this;
+    }
+
+    double GetLikelihood() const { return likelihood_; }
+
+    double GetDerAlpha() const { return der_alpha_; }
+
+    double GetDerBeta() const { return der_beta_; }
+
+    double GetDer2Alpha() const { return der2_alpha_; }
+
+    double GetDer2Beta() const { return der2_beta_; }
+
+    double GetDer2AlphaBeta() const { return der2_alpha_beta_; }
+  };
+
+  QualFunc Update(const QualFunc& current,
+                   const TQualityLogitLinearRegressionPoint& pointStats) const {
+    const double dera = pointStats.GetDerAlpha();
+    const double derb = pointStats.GetDerBeta();
+
+    const double daa = pointStats.GetDer2Alpha() + 1e-3;
+    const double dbb = pointStats.GetDer2Beta() + 1e-3;
+    const double dab = pointStats.GetDer2AlphaBeta();
+    const double det = daa * dbb - sqr(dab);
+
+    double stepAlpha = (dbb * dera - dab * derb) / det;
+    double stepBeta = (daa * derb - dab * dera) / det;
+
+    INFO("Quality estimation iteration gradient: " << dera << " " << derb);
+    INFO("Quality estimation likelihood: " << pointStats.GetLikelihood());
+
+    return {current.alpha_ - stepAlpha, current.beta_ - stepBeta};
+  }
+
+  class TGammaDerivativesStats {
+   private:
+    double first_class_shift_;
+    double second_class_shift_;
+
+    double digamma_sum_first_ = 0;
+    double trigamma_sum_first_ = 0;
+
+    double digamma_sum_second_ = 0;
+    double trigamma_sum_second_ = 0;
+
+   public:
+    TGammaDerivativesStats(double firstShift, double secondShift)
+        : first_class_shift_(firstShift), second_class_shift_(secondShift) {}
+
+    void Add(const TClusterSufficientStat& statistic) {
+      const double p = statistic.genomic_class_prob_;
+      digamma_sum_first_ +=
+          p > 1e-3 ? p * boost::math::digamma(statistic.count_ + first_class_shift_)
+                   : 0;
+      trigamma_sum_first_ +=
+          p > 1e-3
+              ? p * boost::math::trigamma(statistic.count_ + first_class_shift_)
+              : 0;
+
+      digamma_sum_second_ +=
+          p < (1.0 - 1e-3) ? (1.0 - p) * boost::math::digamma(statistic.count_ +
+                                                              second_class_shift_)
+                           : 0;
+      trigamma_sum_second_ +=
+          p < (1.0 - 1e-3) ? (1.0 - p) * boost::math::trigamma(statistic.count_ +
+                                                               second_class_shift_)
+                           : 0;
+    }
+
+    TGammaDerivativesStats& operator+=(const TGammaDerivativesStats& other) {
+      if (this != &other) {
+        digamma_sum_first_ += other.digamma_sum_first_;
+        trigamma_sum_first_ += other.trigamma_sum_first_;
+
+        digamma_sum_second_ = other.digamma_sum_second_;
+        trigamma_sum_second_ += other.trigamma_sum_second_;
+      }
+      return *this;
+    }
+
+    double GetDigammaSumFirst() const { return digamma_sum_first_; }
+
+    double GetTrigammaSumFirst() const { return trigamma_sum_first_; }
+
+    double GetDigammaSumSecond() const { return digamma_sum_second_; }
+
+    double GetTrigammaSumSecond() const { return trigamma_sum_second_; }
+  };
+
+  static inline double sqr(double x) { return x * x; }
+
+  struct TDirection {
+    double Direction;
+    double GradientNorm;
+    double Mu;
+  };
+
+  static TDirection MoveDirection(double shape, const double weightedSum,
+                                  const double weight, const double digammaSum,
+                                  const double trigammaSum,
+                                  double regularizer = 1e-4) {
+    const double mu = weight / weightedSum;
+    const double digammaAtShape = boost::math::digamma(shape);
+    const double trigammaAtShape = boost::math::trigamma(shape);
+
+    const double b = mu * shape;
+
+    const double der =
+        weight * (log(b) - log(b + 1) - digammaAtShape) + digammaSum;
+    const double der2 =
+        trigammaSum + weight * (1.0 / shape - mu / (b + 1) - trigammaAtShape);
+
+    return {-der / (der2 + regularizer), std::abs(der), mu};
+  }
+
+  double Likelihood(GammaDistribution& prior, double weightedSum,
+                    double weight, double lgammaSum) {
+    const double a = prior.GetShape();
+    const double b = prior.GetRate();
+    return weight * a * (log(b) - log(b + 1)) + weightedSum * log(b + 1) +
+           lgammaSum - weight * prior.LogGammaAtShape();
+  }
+
+ public:
+  TClusterModelEstimator(const KMerData& data, double threshold,
+                         uint num_threads = 16,
+                         size_t maxIterations = 40,
+                         bool calcLikelihood = false)
+      : data_(data),
+        threshold_(threshold),
+        num_threads_(num_threads),
+        max_terations_(maxIterations),
+        calc_likelihood_(calcLikelihood) {}
+
+  static inline GammaDistribution Update(const GammaDistribution& point,
+                                          const TDirection& direction,
+                                          double minShape = 0.01) {
+    double shape = std::max(point.GetShape() + direction.Direction, minShape);
+    double rate = shape * direction.Mu;
+    return GammaDistribution(shape, rate);
+  }
+
+  static GammaDistribution MomentMethodEstimator(const double sum,
+                                                  const double sum2,
+                                                  const double weight) {
+    const double m = sum / weight;
+    const double var = sum2 / weight - m * m;
+    const double rate = 1.0 / max(var / m - 1, 1e-3);
+    const double shape = m * rate;
+    return GammaDistribution(shape, rate);
+  }
+
+  ParametricClusterModel Estimate(
+      const std::vector<std::vector<size_t>>& clusters,
+      const std::vector<size_t>& clusterCenter, const bool useEM = false,
+      const size_t sample = 0) {
+    if (sample && clusters.size() > sample) {
+      std::vector<std::vector<size_t>> sampledClusters;
+      std::vector<size_t> sampledCenters;
+    }
+
+    const auto qualityFunc = [&]() -> QualFunc {
+      auto qualStats = CreateQualityStats(clusters, clusterCenter);
+
+      QualFunc cursor = {-1e-5, 0.0};
+
+      for (uint i = 0; i < 15; ++i) {
+        const auto qualDerStats =
+            n_computation_utils::TAdditiveStatisticsCalcer<
+                TQualityStat, TQualityLogitLinearRegressionPoint>(qualStats,
+                                                                  num_threads_)
+                .Calculate([&]() -> TQualityLogitLinearRegressionPoint {
+                  return TQualityLogitLinearRegressionPoint(cursor);
+                });
+
+        cursor = Update(cursor, qualDerStats);
+
+        if ((std::abs(qualDerStats.GetDerAlpha()) +
+             std::abs(qualDerStats.GetDerBeta())) < 1e-2) {
+          break;
+        }
+      }
+
+      INFO("Quality function: " << cursor.alpha_ << "q + " << cursor.beta_);
+      return cursor;
+    }();
+
+    auto alphas = [&]() -> std::array<double, RunSizeLimit> {
+      TRunErrorStats errorStats =
+          n_computation_utils::ParallelStatisticsCalcer<TRunErrorStats>(
+              num_threads_)
+              .Calculate(
+                  clusters.size(),
+                  [&]() -> TRunErrorStats { return TRunErrorStats(data_); },
+                  [&](TRunErrorStats& stat, size_t k) {
+                    if (data_[clusterCenter[k]].count >= threshold_) {
+                      stat.Add(clusters[k], clusterCenter[k]);
+                    }
+                  });
+      return errorStats.EstimateAlphas();
+    }();
+
+    std::vector<TClusterSufficientStat> clusterSufficientStat =
+        CreateSufficientStats(clusterCenter);
+
+    const auto totalStats =
+        n_computation_utils::TAdditiveStatisticsCalcer<TClusterSufficientStat,
+                                                     TCountsStat<false>>(
+            clusterSufficientStat, num_threads_)
+            .Calculate([]() -> TCountsStat<false> {
+              return TCountsStat<false>();
+            });
+
+#pragma omp parallel for num_threads(num_threads_)
+    for (size_t k = 0; k < clusterSufficientStat.size(); ++k) {
+      QualityExpectation(qualityFunc, clusterSufficientStat[k]);
+    }
+
+    auto countsStats =
+        n_computation_utils::TAdditiveStatisticsCalcer<TClusterSufficientStat,
+                                                     TCountsStat<true>>(
+            clusterSufficientStat, num_threads_)
+            .Calculate([]() -> TCountsStat<true> {
+              return TCountsStat<true>();
+            });
+
+    GammaDistribution genomicPrior = [&]() -> GammaDistribution {
+      const double m = countsStats.GetWeightedSum() / countsStats.GetWeight();
+      const double var =
+          countsStats.GetWeightedSum2() / countsStats.GetWeight() - m * m;
+      const double rate = 1.0 / max(var / m - 1, 1e-3);
+      const double shape = m * rate;
+      return GammaDistribution(shape, rate);
+    }();
+
+    GammaDistribution nonGenomicPrior = [&]() -> GammaDistribution {
+      const double m =
+          (totalStats.GetWeightedSum() - countsStats.GetWeightedSum()) /
+          (totalStats.GetWeight() - countsStats.GetWeight());
+      const double var =
+          (totalStats.GetWeightedSum2() - countsStats.GetWeightedSum2()) /
+              (totalStats.GetWeight() - countsStats.GetWeight()) -
+          m * m;
+      const double rate = 1.0 / max(var / m - 1, 1e-3);
+      const double shape = m * rate;
+      return GammaDistribution(shape, rate);
+    }();
+
+    for (uint i = 0, steps = 0; i < max_terations_; ++i, ++steps) {
+      auto gammaDerStats =
+          n_computation_utils::TAdditiveStatisticsCalcer<TClusterSufficientStat,
+                                                       TGammaDerivativesStats>(
+              clusterSufficientStat, num_threads_)
+              .Calculate([&]() -> TGammaDerivativesStats {
+                return TGammaDerivativesStats(genomicPrior.GetShape(),
+                                              nonGenomicPrior.GetShape());
+              });
+
+      auto genomicDirection = MoveDirection(
+          genomicPrior.GetShape(), countsStats.GetWeightedSum(),
+          countsStats.GetWeight(), gammaDerStats.GetDigammaSumFirst(),
+          gammaDerStats.GetTrigammaSumFirst());
+
+      auto nonGenomicDirection = MoveDirection(
+          nonGenomicPrior.GetShape(),
+          totalStats.GetWeightedSum() - countsStats.GetWeightedSum(),
+          totalStats.GetWeight() - countsStats.GetWeight(),
+          gammaDerStats.GetDigammaSumSecond(),
+          gammaDerStats.GetTrigammaSumSecond());
+
+      auto gradientNorm =
+          genomicDirection.GradientNorm + nonGenomicDirection.GradientNorm;
+
+      INFO("Iteration #" << i << " gradient norm " << gradientNorm);
+
+      genomicPrior = Update(genomicPrior, genomicDirection);
+      nonGenomicPrior = Update(nonGenomicPrior, nonGenomicDirection);
+
+      if (calc_likelihood_) {
+        auto logGammaStats =
+            n_computation_utils::TAdditiveStatisticsCalcer<TClusterSufficientStat,
+                                                         TLogGammaStat>(
+                clusterSufficientStat, num_threads_)
+                .Calculate([&]() -> TLogGammaStat {
+                  return TLogGammaStat(genomicPrior.GetShape(),
+                                       nonGenomicPrior.GetShape());
+                });
+
+        INFO("Genomic likelihood: " << Likelihood(
+                 genomicPrior, countsStats.GetWeightedSum(),
+                 countsStats.GetWeight(),
+                 logGammaStats.GetGenomicLogGammaSum()));
+
+        INFO("NonGenomic likelihood: " << Likelihood(
+                 nonGenomicPrior,
+                 totalStats.GetWeightedSum() - countsStats.GetWeightedSum(),
+                 totalStats.GetWeight() - countsStats.GetWeight(),
+                 logGammaStats.GetNonGenomicLogGammaSum()));
+      }
+
+      {
+        INFO("Genomic gamma prior estimation step: shape "
+             << genomicPrior.GetShape() << " and rate "
+             << genomicPrior.GetRate());
+        INFO("Nongenomic gamma prior estimation step: shape "
+             << nonGenomicPrior.GetShape() << " and rate "
+             << nonGenomicPrior.GetRate());
+      }
+
+      double shapeDiff = std::abs(genomicDirection.Direction) +
+                         std::abs(nonGenomicDirection.Direction);
+
+      if (useEM) {
+        if ((shapeDiff < 1e-2) || gradientNorm < 1e-1 ||
+            (steps == 5 && (i < max_terations_ - 10))) {
+          PoissonGammaDistribution genomic(genomicPrior);
+          PoissonGammaDistribution nonGenomic(nonGenomicPrior);
+#pragma omp parallel for num_threads(num_threads_)
+          for (size_t k = 0; k < clusterSufficientStat.size(); ++k) {
+            Expectation(genomic, nonGenomic, qualityFunc,
+                        clusterSufficientStat[k]);
+          }
+
+          countsStats = n_computation_utils::TAdditiveStatisticsCalcer<
+                            TClusterSufficientStat, TCountsStat<true>>(
+                            clusterSufficientStat, num_threads_)
+                            .Calculate([]() -> TCountsStat<true> {
+                              return TCountsStat<true>();
+                            });
+          steps = 0;
+        }
+      } else {
+        if ((shapeDiff < 1e-4) || gradientNorm < 1e-2) {
+          break;
+        }
+      }
+    }
+
+    INFO("Genomic gamma prior genomic estimated with shape "
+         << genomicPrior.GetShape() << " and rate " << genomicPrior.GetRate());
+    INFO("Nongenomic Gamma prior estimated with shape "
+         << nonGenomicPrior.GetShape() << " and rate "
+         << nonGenomicPrior.GetRate());
+
+    return ParametricClusterModel(
+        GammaMixture(genomicPrior, nonGenomicPrior,
+                      countsStats.GetWeight() / totalStats.GetWeight()),
+        qualityFunc, threshold_, alphas);
+  }
+
+  static GammaDistribution EstimatePrior(const std::vector<size_t>& counts) {
+    const size_t observations = counts.size();
+    double sum = 0;
+    double sum2 = 0;
+    for (auto count : counts) {
+      sum += (double)count;
+      sum2 += (double)count * (double)count;
+    }
+
+    GammaDistribution prior =
+        TClusterModelEstimator::MomentMethodEstimator(sum, sum2, (double)observations);
+
+    for (uint i = 0, steps = 0; i < 10; ++i, ++steps) {
+      double digammaSum = 0;
+      double trigammaSum = 0;
+      for (auto count : counts) {
+        digammaSum += boost::math::digamma((double)count + prior.GetShape());
+        trigammaSum += boost::math::trigamma((double)count + prior.GetShape());
+      }
+
+      auto direction = MoveDirection(prior.GetShape(), sum, (double)observations,
+                                     digammaSum, trigammaSum);
+
+      const double shapeDiff = std::abs(direction.Direction);
+      if (shapeDiff < 1e-3 || (direction.GradientNorm < 1e-4)) {
+        break;
+      }
+      prior = Update(prior, direction, 1e-2);
+    }
+    return prior;
+  }
+};
+
+}  // namespace NGammaPoissonModel
+
+#endif  // PROJECT_GAMMA_POISSON_MODEL_HPP
diff --git a/src/projects/ionhammer/hamcluster.cpp b/src/projects/ionhammer/hamcluster.cpp
deleted file mode 100644
index a905ddf..0000000
--- a/src/projects/ionhammer/hamcluster.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#include "hamcluster.hpp"
-
-#include "hkmer_distance.hpp"
-#include "common/adt/concurrent_dsu.hpp"
-#include "io/kmers/mmapped_reader.hpp"
-
-#include <iostream>
-#include <sstream>
-
-#ifdef USE_GLIBCXX_PARALLEL
-#include <parallel/algorithm>
-#endif
-
-struct SubKMerComparator {
-  bool operator()(const SubKMerData &lhs, const SubKMerData &rhs) {
-    return SubKMer::less2_fast()(lhs.data, rhs.data);
-  }
-};
-
-std::pair<size_t, size_t> SubKMerSplitter::split() {
-  std::vector<SubKMerData> data;
-
-  MMappedReader ifs(ifname_, /* unlink */ true);
-  std::ofstream ofs(ofname_, std::ios::out | std::ios::binary);
-  VERIFY(ofs.good());
-  size_t icnt = 0, ocnt = 0;
-  while (ifs.good()) {
-    SubKMerComparator comp;
-
-    deserialize(data, ifs);
-
-#ifdef USE_GLIBCXX_PARALLEL
-    // Explicitly force a call to parallel sort routine.
-    __gnu_parallel::sort(data.begin(), data.end(), comp);
-#else
-    std::sort(data.begin(), data.end(), comp);
-#endif
-    for (auto start = data.begin(), end = data.end(); start != end;) {
-      auto chunk_end = std::upper_bound(start + 1, data.end(), *start, comp);
-      serialize(ofs, start, chunk_end);
-      start = chunk_end;
-      ocnt += 1;
-    }
-    icnt += 1;
-  }
-  VERIFY(!ofs.fail());
-
-  ofs.close();
-
-  return std::make_pair(icnt, ocnt);
-}
-
-#if 1
-static bool canMerge(const ConcurrentDSU &uf, unsigned x, unsigned y) {
-  size_t szx = uf.set_size(x), szy = uf.set_size(y);
-  const size_t hardthr = 2500;
-
-  // Global threshold - no cluster larger than hard threshold
-  if (szx + szy > hardthr)
-    return false;
-
-  // If one of the clusters is moderately large, than attach "almost" singletons
-  // only.
-  if ((szx > hardthr * 3 / 4 && szy > 50) ||
-      (szy > hardthr * 3 / 4 && szx > 50))
-    return false;
-
-  return true;
-}
-#else
-static bool canMerge(const ConcurrentDSU &uf, unsigned x, unsigned y) {
-  return (uf.set_size(x) + uf.set_size(y)) < 10000;
-}
-#endif
-
-
-static void processBlockQuadratic(ConcurrentDSU  &uf,
-                                  const std::vector<size_t> &block,
-                                  const KMerData &data,
-                                  unsigned tau) {
-  size_t blockSize = block.size();
-  for (size_t i = 0; i < blockSize; ++i) {
-    auto x = static_cast<unsigned>(block[i]);
-    hammer::HKMer kmerx = data[x].kmer;
-    hammer::HKMer rkmerx = !kmerx;
-    auto rcx = static_cast<unsigned>(data.seq_idx(rkmerx));
-
-    for (size_t j = i + 1; j < blockSize; j++) {
-      auto y = static_cast<unsigned>(block[j]);
-      hammer::HKMer kmery = data[y].kmer;
-      hammer::HKMer rkmery = !kmery;
-      auto rcy = static_cast<unsigned>(data.seq_idx(rkmery));
-      if ((uf.find_set(x) != uf.find_set(y) || uf.find_set(rcx) !=
-           uf.find_set(rcy)) &&
-          (canMerge(uf, x, y) || canMerge(uf, rcx, rcy)) &&
-          (hammer::distanceHKMer(kmerx.begin(), kmerx.end(),
-                                 kmery.begin(), kmery.end(), tau) <= tau ||
-           hammer::distanceHKMer(rkmerx.begin(), rkmerx.end(),
-                                 rkmery.begin(), rkmery.end(), tau) <= tau)) {
-          uf.unite(x, y);
-          uf.unite(rcx, rcy);
-      }
-    }
-  }
-}
-
-void KMerHamClusterer::cluster(const std::string &prefix,
-                               const KMerData &data,
-                               ConcurrentDSU &uf) {
-  // First pass - split & sort the k-mers
-  std::ostringstream tmp;
-  tmp << prefix << ".first";
-  std::string fname(tmp.str());
-  std::ofstream ofs(fname, std::ios::out | std::ios::binary);
-  VERIFY(ofs.good());
-
-  INFO("Serializing sub-kmers.");
-  for (unsigned i = 0; i < tau_ + 1; ++i) {
-    // size_t from = (*Globals::subKMerPositions)[i];
-    // size_t to = (*Globals::subKMerPositions)[i+1];
-    size_t from = 0 + i*hammer::K / (tau_ + 1);
-    size_t to = 0 + (i+1)*hammer::K / (tau_ + 1);
-
-    INFO("Serializing: [" << from << ", " << to << ")");
-    serialize(ofs, data, NULL,
-              SubKMerPartSerializer(from, to));
-  }
-  VERIFY(!ofs.fail());
-  ofs.close();
-
-  size_t big_blocks1 = 0;
-  {
-    INFO("Splitting sub-kmers, pass 1.");
-    SubKMerSplitter Splitter(fname, fname + ".blocks");
-    std::pair<size_t, size_t> stat = Splitter.split();
-    INFO("Splitting done."
-            " Processed " << stat.first << " blocks."
-            " Produced " << stat.second << " blocks.");
-
-    // Sanity check - there cannot be more blocks than tau + 1 times of total
-    // kmer number. And on the first pass we have only tau + 1 input blocks!
-    VERIFY(stat.first == tau_ + 1);
-    VERIFY(stat.second <= (tau_ + 1) * data.size());
-
-    // Ok, now in the files we have everything grouped in blocks in the output files.
-
-    std::vector<size_t> block;
-
-    INFO("Merge sub-kmers, pass 1");
-    SubKMerBlockFile blocks(fname + ".blocks", /* unlink */ true);
-
-    std::ostringstream tmp;
-    tmp << prefix << ".second";
-    fname = tmp.str();
-
-    ofs.open(fname, std::ios::out | std::ios::binary);
-    VERIFY(ofs.good());
-    while (blocks.get_block(block)) {
-      // unsigned block_thr = cfg::get().hamming_blocksize_quadratic_threshold;
-      unsigned block_thr = 50;
-      if (block.size() < block_thr) {
-        // Merge small blocks.
-        processBlockQuadratic(uf, block, data, tau_);
-      } else {
-        big_blocks1 += 1;
-        // Otherwise - dump for next iteration.
-        for (unsigned i = 0; i < tau_ + 1; ++i) {
-          serialize(ofs, data, &block,
-                    SubKMerStridedSerializer(i, tau_ + 1));
-        }
-      }
-    }
-    VERIFY(!ofs.fail());
-    ofs.close();
-    INFO("Merge done, total " << big_blocks1 << " new blocks generated.");
-  }
-
-  size_t big_blocks2 = 0;
-  {
-    INFO("Spliting sub-kmers, pass 2.");
-    SubKMerSplitter Splitter(fname, fname + ".blocks");
-    std::pair<size_t, size_t> stat = Splitter.split();
-    INFO("Splitting done."
-            " Processed " << stat.first << " blocks."
-            " Produced " << stat.second << " blocks.");
-
-    // Sanity check - there cannot be more blocks than tau + 1 times of total
-    // kmer number. And there should be tau + 1 times big_blocks input blocks.
-    VERIFY(stat.first == (tau_ + 1)*big_blocks1);
-    VERIFY(stat.second <= (tau_ + 1) * (tau_ + 1) * data.size());
-
-    INFO("Merge sub-kmers, pass 2");
-    SubKMerBlockFile blocks(fname + ".blocks", /* unlink */ true);
-    std::vector<size_t> block;
-
-    size_t nblocks = 0;
-    while (blocks.get_block(block)) {
-      if (block.size() > 50) {
-        big_blocks2 += 1;
-#if 0
-        for (size_t i = 0; i < block.size(); ++i) {
-          std::string s(Globals::blob + data[block[i]], K);
-          INFO("" << block[i] << ": " << s);
-        }
-#endif
-      }
-      processBlockQuadratic(uf, block, data, tau_);
-      nblocks += 1;
-    }
-    INFO("Merge done, saw " << big_blocks2 << " big blocks out of " << nblocks << " processed.");
-  }
-}
diff --git a/src/projects/ionhammer/hamcluster.hpp b/src/projects/ionhammer/hamcluster.hpp
deleted file mode 100644
index 17d9b60..0000000
--- a/src/projects/ionhammer/hamcluster.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-//***************************************************************************
-//* Copyright (c) 2015 Saint Petersburg State University
-//* Copyright (c) 2011-2014 Saint Petersburg Academic University
-//* All Rights Reserved
-//* See file LICENSE for details.
-//***************************************************************************
-
-#ifndef HAMMER_SUBKMER_SORTER_HPP
-#define HAMMER_SUBKMER_SORTER_HPP
-
-#include "kmer_data.hpp"
-#include "io/kmers/mmapped_reader.hpp"
-
-#include "utils/logger/logger.hpp"
-#include "HSeq.hpp"
-
-#include <iostream>
-#include <vector>
-
-class ConcurrentDSU;
-
-typedef hammer::HSeq<(hammer::K + 1) / 2> SubKMer;
-
-struct SubKMerData {
-  uint64_t idx;
-  SubKMer data;
-};
-
-template<class Reader>
-inline void binary_read(Reader &is, SubKMerData &s) {
-  SubKMer::DataType seq_data[SubKMer::DataSize];
-
-  is.read((char*)&s.idx, sizeof(s.idx));
-  is.read((char*)seq_data, sizeof(seq_data));
-
-  s.data = SubKMer(seq_data, seq_data + SubKMer::DataSize);
-}
-
-template<class Writer>
-inline Writer &binary_write(Writer &os, const SubKMerData &s) {
-  os.write((char*)&s.idx, sizeof(s.idx));
-  os.write((char*)s.data.data(), SubKMer::TotalBytes);
-
-  return os;
-}
-
-static_assert(sizeof(SubKMerData) == 16, "Too big SubKMer");
-
-class SubKMerPartSerializer{
-  size_t from_;
-  size_t to_;
-
-public:
-  SubKMerPartSerializer(size_t from, size_t to)
-      :from_(from), to_(to) { VERIFY(to_ - from_ <= hammer::K); }
-
-  SubKMerData serialize(hammer::HKMer k, size_t fidx) const {
-    SubKMerData s;
-
-    s.idx = fidx;
-    s.data = SubKMer(k.data() + from_, k.data() + to_);    
-
-    // Yay for NRVO!
-    return s;
-  }
-};
-
-class SubKMerStridedSerializer{
-  size_t from_;
-  size_t stride_;
-
-public:
-  SubKMerStridedSerializer(size_t from, size_t stride)
-      :from_(from), stride_(stride) { VERIFY(from_ + stride_ <= hammer::K); }
-
-  SubKMerData serialize(hammer::HKMer k, size_t fidx) const {
-    SubKMerData s;
-
-    s.idx = fidx;
-
-    size_t sz = (hammer::K - from_ + stride_ - 1) / stride_;
-
-    std::vector<hammer::HKMer::DataType> v(sz);
-    for (size_t i = from_, j = 0; i < hammer::K; i+= stride_, ++j)
-      v[j] = k[i];
-
-    s.data = SubKMer(&v[0], &v[0] + sz);
-
-    // Yay for NRVO!
-    return s;
-  }
-};
-
-class SubKMerBlockFile {
-  MMappedReader ifs_;
-
- public:
-  SubKMerBlockFile(const std::string &fname, bool unlink = false)
-      : ifs_(fname, unlink) { }
-
-  bool get_block(std::vector<size_t> &block) {
-    block.clear();
-#if 0
-    block.shrink_to_fit();
-#else
-    std::vector<size_t>().swap(block);
-#endif
-
-    if (!ifs_.good())
-      return false;
-
-    size_t sz;
-    ifs_.read((char*)&sz, sizeof(sz));
-    block.resize(sz);
-    for (size_t i = 0; i < sz; ++i) {
-      SubKMerData s;
-      binary_read(ifs_, s);
-      block[i] = s.idx;
-    }
-
-    return true;
-  }
-};
-
-template<class Writer,
-         class SubKMerSerializer>
-void serialize(Writer &os,
-               const KMerData &data, const std::vector<size_t> *block = NULL,
-               const SubKMerSerializer &serializer = SubKMerSerializer()) {
-  size_t sz = (block == NULL ? data.size() : block->size());
-  os.write((char*)&sz, sizeof(sz));
-  for (size_t i = 0, e = sz; i != e; ++i) {
-    size_t idx = (block == NULL ? i : (*block)[i]);
-    SubKMerData s = serializer.serialize(data[idx].kmer, idx);
-    binary_write(os, s);
-  }
-}
-
-class SubKMerSplitter {
-  const std::string ifname_;
-  const std::string ofname_;
-
- public:
-  SubKMerSplitter(const std::string &ifname, const std::string &ofname)
-      : ifname_(ifname), ofname_(ofname) {}
-
-  template<class Writer>
-  void serialize(Writer &os,
-                 const std::vector<SubKMerData>::iterator &start,
-                 const std::vector<SubKMerData>::iterator &end) {
-    size_t sz = end - start;
-
-    os.write((char*)&sz, sizeof(sz));
-    for (auto I = start, E = end; I != E; ++I)
-      binary_write(os, *I);
-  }
-
-  template<class Reader>
-  void deserialize(std::vector<SubKMerData> &res,
-                   Reader &is) {
-    res.clear();
-#if 0
-    res.shrink_to_fit();
-#else
-    std::vector<SubKMerData>().swap(res);
-#endif
-
-    size_t sz;
-    is.read((char*)&sz, sizeof(sz));
-    res.resize(sz);
-
-    for (size_t i = 0, e = sz; i != e; ++i)
-      binary_read(is, res[i]);
-  }
-
-  std::pair<size_t, size_t> split();
-};
-
-class KMerHamClusterer {
-  unsigned tau_;
-
- public:
-  KMerHamClusterer(unsigned tau)
-      : tau_(tau) {}
-
-  void cluster(const std::string &prefix, const KMerData &data, ConcurrentDSU &uf);
- private:
-  DECL_LOGGER("Hamming Clustering");
-};
-
-#endif // HAMMER_SUBKMER_SORTER_HPP
diff --git a/src/projects/ionhammer/hamcluster_1.cpp b/src/projects/ionhammer/hamcluster_1.cpp
new file mode 100644
index 0000000..18ef5c3
--- /dev/null
+++ b/src/projects/ionhammer/hamcluster_1.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Vasiliy Ershov on 25/09/16.
+//
+
+#include "hamcluster_1.h"
diff --git a/src/projects/ionhammer/hamcluster_1.h b/src/projects/ionhammer/hamcluster_1.h
new file mode 100644
index 0000000..2276e5b
--- /dev/null
+++ b/src/projects/ionhammer/hamcluster_1.h
@@ -0,0 +1,81 @@
+//
+// Created by Vasiliy Ershov on 25/09/16.
+//
+
+#ifndef PROJECT_HAMCLUSTER_1_H
+#define PROJECT_HAMCLUSTER_1_H
+
+#include <common/adt/concurrent_dsu.hpp>
+#include <common/pipeline/config_singl.hpp>
+#include "HSeq.hpp"
+#include "kmer_data.hpp"
+#include "utils/logger/logger.hpp"
+#include "valid_hkmer_generator.hpp"
+
+namespace hammer {
+
+using HRun = HomopolymerRun;
+
+class TOneErrorClustering {
+ private:
+  const KMerData& data_;
+  dsu::ConcurrentDSU clusters_;
+
+  bool TryMergeClusters(const HKMer& source,
+                        const size_t source_idx,
+                        const HKMer& fixed) {
+    auto fixed_idx = data_.checking_seq_idx(fixed);
+    if (fixed_idx == (-1ULL)) {
+      return false;
+    }
+    if (data_[fixed_idx].count > 0) {
+      clusters_.unite(source_idx, fixed_idx);
+      auto rSource = !source;
+      auto rFixed = !fixed;
+      clusters_.unite(data_.seq_idx(rSource), data_.seq_idx(rFixed));
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  void TryCorrection(const KMerStat& source_stat, size_t source_idx) {
+    const auto& source = source_stat.kmer;
+    auto fixed = source;
+    for (uint k = 0; k < K; ++k) {
+      for (uint i = (uint)std::max(source[k].len - 1, 1);
+           i <= (uint)(source[k].len + 1); ++i) {
+        if (i == source[k].len) {
+          continue;
+        }
+        fixed[k].len = i & 0x3F;
+
+        TryMergeClusters(source, source_idx, fixed);
+      }
+      fixed[k].len = source[k].len;
+    }
+  }
+
+ public:
+
+  TOneErrorClustering(const KMerData& data,
+                      const uint num_threads = 16)
+      : data_(data), clusters_(data.size()) {
+
+    (void)num_threads;  // stupid compiler
+#pragma omp parallel for num_threads(num_threads)
+    for (size_t idx = 0; idx < data_.size(); ++idx) {
+      if (data_[idx].count > 0) {
+        TryCorrection(data_[idx], idx);
+      }
+    }
+  }
+
+  void FillClasses(std::vector<std::vector<size_t> >& clusters) {
+    clusters_.get_sets(clusters);
+  }
+};
+
+}  // namespace hammer
+
+#endif  // PROJECT_HAMCLUSTER_1_H
diff --git a/src/projects/ionhammer/hkmer.hpp b/src/projects/ionhammer/hkmer.hpp
index 538d1a7..6ab8833 100644
--- a/src/projects/ionhammer/hkmer.hpp
+++ b/src/projects/ionhammer/hkmer.hpp
@@ -8,13 +8,43 @@
 #ifndef __HAMMER_HKMER_HPP__
 #define __HAMMER_HKMER_HPP__
 
+#include <cstdlib>
+#include <limits>
 #include "HSeq.hpp"
 
 namespace hammer {
 
 const uint32_t K = 16;
-typedef HSeq<K> HKMer;
+using HKMer = HSeq<K>;
 
+struct HKMerDistanceResult {
+  double hamming_ = 0;
+  double levenshtein_ = 0;
+
+  HKMerDistanceResult(double hamming = 0, double lev = 0)
+      : hamming_(hamming), levenshtein_(lev) {}
 };
 
-#endif // __HAMMER_HKMER_HPP__
+inline HKMerDistanceResult hkmerDistance(const HKMer& left,
+                                         const HKMer& right) {
+  HKMerDistanceResult dist = {0, 0};
+
+  for (uint32_t i = 0; i < K; ++i) {
+    if (left[i].nucl != right[i].nucl) {
+      return {std::numeric_limits<double>::infinity(),
+              std::numeric_limits<double>::infinity()};
+    }
+
+    if (left[i].len != right[i].len) {
+      dist.hamming_ += 1;
+      dist.levenshtein_ += std::abs(left[i].len - right[i].len);
+    }
+  }
+  return dist;
+}
+
+
+
+};  // namespace hammer
+
+#endif  // __HAMMER_HKMER_HPP__
diff --git a/src/projects/ionhammer/hkmer_distance.hpp b/src/projects/ionhammer/hkmer_distance.hpp
index e9e7ffa..ee91eb0 100644
--- a/src/projects/ionhammer/hkmer_distance.hpp
+++ b/src/projects/ionhammer/hkmer_distance.hpp
@@ -8,8 +8,8 @@
 #ifndef __HAMMER_HKMER_DISTANCE_HPP__
 #define __HAMMER_HKMER_DISTANCE_HPP__
 
-#include "hkmer.hpp"
 #include "err_helper_table.hpp"
+#include "hkmer.hpp"
 
 namespace hammer {
 
@@ -29,9 +29,8 @@ struct IonPairAlignEvent {
   unsigned length;
 };
 
-template<typename It1, typename It2>
+template <typename It1, typename It2>
 class IonPairAligner {
-
   It1 x_it_;
   It1 x_end_;
   It2 y_it_;
@@ -40,17 +39,16 @@ class IonPairAligner {
   bool empty_;
   hammer::HomopolymerRun cx_, cy_;
   int end_diff_;
-  bool at_the_start_; // turned off once we find a pair of runs with same nucleotide
+  bool at_the_start_;  // turned off once we find a pair of runs with same
+                       // nucleotide
 
   IonPairAlignEvent<It1, It2> front_;
 
   // true iff alignment process is not yet finished
   bool checkForZeroLengthRuns() {
-    if (x_it_ == x_end_ || y_it_ == y_end_)
-      return false;
+    if (x_it_ == x_end_ || y_it_ == y_end_) return false;
 
-    if (cx_.len > 0 && cy_.len > 0)
-      return true;
+    if (cx_.len > 0 && cy_.len > 0) return true;
 
     bool result = true;
     while (cx_.len == 0) {
@@ -74,16 +72,14 @@ class IonPairAligner {
 
   bool fetchNextX() {
     ++x_it_;
-    if (x_it_ == x_end_)
-      return false;
+    if (x_it_ == x_end_) return false;
     cx_ = *x_it_;
     return true;
   }
 
   bool fetchNextY() {
     ++y_it_;
-    if (y_it_ == y_end_)
-      return false;
+    if (y_it_ == y_end_) return false;
     cy_ = *y_it_;
     return true;
   }
@@ -118,19 +114,25 @@ class IonPairAligner {
   void finishAlignmentProcess() {
     empty_ = true;
     if (x_it_ != x_end_) {
-        end_diff_ += x_end_ - x_it_;
+      end_diff_ += int(x_end_ - x_it_);
     }
     if (y_it_ != y_end_) {
-        end_diff_ -= y_end_ - y_it_;
+      end_diff_ -= int(y_end_ - y_it_);
     }
   }
 
  public:
-  IonPairAligner(const It1 &x_begin, const It1 &x_end,
-                 const It2 &y_begin, const It2 &y_end)
-    : x_it_(x_begin), x_end_(x_end), y_it_(y_begin), y_end_(y_end),
-        empty_(false), cx_(*x_it_), cy_(*y_it_), end_diff_(0), at_the_start_(true)
-  {
+  IonPairAligner(const It1 &x_begin, const It1 &x_end, const It2 &y_begin,
+                 const It2 &y_end)
+      : x_it_(x_begin),
+        x_end_(x_end),
+        y_it_(y_begin),
+        y_end_(y_end),
+        empty_(false),
+        cx_(*x_it_),
+        cy_(*y_it_),
+        end_diff_(0),
+        at_the_start_(true) {
     popFront();
   }
 
@@ -178,8 +180,7 @@ class IonPairAligner {
         at_the_start_ = false;
       }
 
-      if (!end)
-        break;
+      if (!end) break;
     }
 
     if (!checkForZeroLengthRuns()) {
@@ -193,7 +194,6 @@ class IonPairAligner {
     VERIFY(y_it_ < y_end_);
 
     if (cx_.nucl == cy_.nucl) {
-
       if (cx_.len >= 4 && cy_.len >= 4) {
         if (cx_.len < cy_.len)
           yieldBaseInsertion();
@@ -217,7 +217,6 @@ class IonPairAligner {
       yieldMismatch();
       return;
     } else {
-
       using namespace hammer::errHelper;
       auto hint = getHint(x_it_, x_end_, y_it_, y_end_, cx_.len, cy_.len);
 
@@ -239,20 +238,18 @@ class IonPairAligner {
   }
 };
 
-
 // returns distance between two homopolymer sequences;
 // optionally, fills *end_diff:
 //  [ --------- X ----------- ]
 // [---------- Y -------]######
 //                       \____/
 //                       end_diff
-template <int kMismatchCost=1,
-          int kBaseInsertionCost=1, int kRunInsertionCost=1,
-          int kBaseDeletionCost=1, int kRunDeletionCost=1,
-          typename It1, typename It2>
+template <int kMismatchCost = 1, int kBaseInsertionCost = 1,
+          int kRunInsertionCost = 1, int kBaseDeletionCost = 1,
+          int kRunDeletionCost = 1, typename It1, typename It2>
 inline unsigned distanceHKMer(const It1 &x_begin, const It1 &x_end,
                               const It2 &y_begin, const It2 &y_end,
-                              unsigned tau = -1, int *end_diff=NULL) {
+                              unsigned tau = -1, int *end_diff = NULL) {
   unsigned dist = 0;
 
   IonPairAligner<It1, It2> aligner(x_begin, x_end, y_begin, y_end);
@@ -261,24 +258,28 @@ inline unsigned distanceHKMer(const It1 &x_begin, const It1 &x_end,
     auto event = aligner.front();
     switch (event.type) {
       case kIonEventMismatch:
-        dist += kMismatchCost * event.length; break;
+        dist += kMismatchCost * event.length;
+        break;
       case kIonEventBaseInsertion:
-        dist += kBaseInsertionCost * event.length; break;
+        dist += kBaseInsertionCost * event.length;
+        break;
       case kIonEventBaseDeletion:
-        dist += kBaseDeletionCost * event.length; break;
+        dist += kBaseDeletionCost * event.length;
+        break;
       case kIonEventRunInsertion:
-        dist += kRunInsertionCost * event.length; break;
+        dist += kRunInsertionCost * event.length;
+        break;
       case kIonEventRunDeletion:
-        dist += kRunDeletionCost * event.length; break;
-      default: break;
-    }
-    if (dist > tau && end_diff == NULL)
+        dist += kRunDeletionCost * event.length;
+        break;
+      default:
         break;
+    }
+    if (dist > tau && end_diff == NULL) break;
     aligner.popFront();
   }
 
-  if (end_diff != NULL)
-      *end_diff = aligner.endDiff();
+  if (end_diff != NULL) *end_diff = aligner.endDiff();
 
   return dist;
 }
@@ -287,92 +288,90 @@ inline unsigned distanceHKMer(const It1 &x_begin, const It1 &x_end,
 #include <iostream>
 namespace unittest {
 
-  namespace detail {
-
-    typedef hammer::HKMer::StorageType::const_iterator It;
-
-    inline unsigned distanceHKMer(It beg1, It end1, It beg2, It end2) {
-      unsigned dist = 0;
-
-      IonPairAligner<It, It> aligner(beg1, end1, beg2, end2);
-
-      while (!aligner.empty()) {
-        auto event = aligner.front();
-        switch (event.type) {
-        case kIonEventMismatch:
-          std::cerr << event.length << 'X';
-          dist += event.length; break;
-        case kIonEventBaseInsertion:
-          std::cerr << event.length << 'I';
-          dist += event.length; break;
-        case kIonEventBaseDeletion:
-          std::cerr << event.length << 'D';
-          dist += event.length; break;
-        case kIonEventRunInsertion:
-          std::cerr << event.length << 'I';
-          dist += event.length; break;
-        case kIonEventRunDeletion:
-          std::cerr << event.length << 'D';
-          dist += event.length; break;
-        default: break;
-        }
-        aligner.popFront();
-      }
+namespace detail {
 
-      std::cerr << " (end. diff. = " << aligner.endDiff() << ")" << std::endl;
-      return dist;
-    }
+typedef hammer::HKMer::StorageType::const_iterator It;
+
+inline unsigned distanceHKMer(It beg1, It end1, It beg2, It end2) {
+  unsigned dist = 0;
 
-    inline unsigned distance(const std::string &s, const std::string &t) {
-      using namespace hammer;
-      HKMer k1, k2;
-      for (size_t i = 0; i < s.size(); ++i)
-        k1 <<= s[i];
-      for (size_t i = 0; i < t.size(); ++i)
-        k2 <<= t[i];
+  IonPairAligner<It, It> aligner(beg1, end1, beg2, end2);
 
-      return distanceHKMer(k1.begin(), k1.end(), k2.begin(), k2.end());
+  while (!aligner.empty()) {
+    auto event = aligner.front();
+    switch (event.type) {
+      case kIonEventMismatch:
+        std::cerr << event.length << 'X';
+        dist += event.length;
+        break;
+      case kIonEventBaseInsertion:
+        std::cerr << event.length << 'I';
+        dist += event.length;
+        break;
+      case kIonEventBaseDeletion:
+        std::cerr << event.length << 'D';
+        dist += event.length;
+        break;
+      case kIonEventRunInsertion:
+        std::cerr << event.length << 'I';
+        dist += event.length;
+        break;
+      case kIonEventRunDeletion:
+        std::cerr << event.length << 'D';
+        dist += event.length;
+        break;
+      default:
+        break;
     }
+    aligner.popFront();
   }
 
-  inline void hkmer_distance() {
-    using namespace detail;
+  std::cerr << " (end. diff. = " << aligner.endDiff() << ")" << std::endl;
+  return dist;
+}
 
-    assert(distance("ACGTACGTACGTACGT",
-                    "CGTACGTACGTACGTA") > 1);
+inline unsigned distance(const std::string &s, const std::string &t) {
+  using namespace hammer;
+  HKMer k1, k2;
+  for (size_t i = 0; i < s.size(); ++i) k1 <<= s[i];
+  for (size_t i = 0; i < t.size(); ++i) k2 <<= t[i];
 
-    assert(distance("AACGTACGTACGTACGT",
-                    "CGTACGTACGTACGTA") > 1);
+  return distanceHKMer(k1.begin(), k1.end(), k2.begin(), k2.end());
+}
+}  // namespace detail
 
-    assert(distance("GATAGCGATTTGTTCGGTTTAGGGGGGG",
-                    "GATAGCGATTTGTTCGTTTAG") >= 7);
+inline void hkmer_distance() {
+  using namespace detail;
 
-    assert(distance("ATAGCGATTTGTTCGGTTTAGGGGGGGT",
-                    "ATAGCGATTTGTTCGTTTAGA") >= 7);
+  assert(distance("ACGTACGTACGTACGT", "CGTACGTACGTACGTA") > 1);
 
-    assert(distance("GATTTGTTCGGTTTAGGGGGGGTAGGGGGATTA",
-                    "GATTTGTTCGTTTAGGGGGGGTAGGGGGATTA") == 1);
+  assert(distance("AACGTACGTACGTACGT", "CGTACGTACGTACGTA") > 1);
 
-    assert(distance("TTAAGGCTTACAAAGACTGCGTTT",
-                    "TTAAGGCTTACAAAGACTGCGTTTT") == 1);
+  assert(distance("GATAGCGATTTGTTCGGTTTAGGGGGGG", "GATAGCGATTTGTTCGTTTAG") >=
+         7);
 
-    assert(distance("AAGGCTTACAAAGACTGCGTTTAA",
-                    "AAGGCTTACAAAGACTGCGTA") >= 2);
+  assert(distance("ATAGCGATTTGTTCGGTTTAGGGGGGGT", "ATAGCGATTTGTTCGTTTAGA") >=
+         7);
 
-    assert(distance("ACAAAGACTGCGTTTAAGAGC",
-                    "ACAAAGACTGCGTTTTAAGAGC") == 1);
+  assert(distance("GATTTGTTCGGTTTAGGGGGGGTAGGGGGATTA",
+                  "GATTTGTTCGTTTAGGGGGGGTAGGGGGATTA") == 1);
 
-    assert(distance("CTAGGAATGAAAAAGAGAACAAGAA",
-                    "CTAGGAATGAAAAAGAGAAAAAAGAATG") == 2);
+  assert(distance("TTAAGGCTTACAAAGACTGCGTTT", "TTAAGGCTTACAAAGACTGCGTTTT") ==
+         1);
 
-    assert(distance("ACACACAGGGTTTTTGAACTGGATT",
-                    "ACACACAGGGTTTTGAACTGGATT") == 1);
+  assert(distance("AAGGCTTACAAAGACTGCGTTTAA", "AAGGCTTACAAAGACTGCGTA") >= 2);
 
-    assert(distance("ACATAAGCCTTTGTACTTAGC",
-                    "ACATAAGCCTTTGACTTAGCA") == 1);
-  }
-}
+  assert(distance("ACAAAGACTGCGTTTAAGAGC", "ACAAAGACTGCGTTTTAAGAGC") == 1);
 
+  assert(distance("CTAGGAATGAAAAAGAGAACAAGAA",
+                  "CTAGGAATGAAAAAGAGAAAAAAGAATG") == 2);
 
-};
-#endif // __HAMMER_HKMER_DISTANCE_HPP__
+  assert(distance("ACACACAGGGTTTTTGAACTGGATT", "ACACACAGGGTTTTGAACTGGATT") ==
+         1);
+
+  assert(distance("ACATAAGCCTTTGTACTTAGC", "ACATAAGCCTTTGACTTAGCA") == 1);
+}
+}  // namespace unittest
+
+};      // namespace hammer
+#endif  // __HAMMER_HKMER_DISTANCE_HPP__
diff --git a/src/projects/ionhammer/io_read_corrector.hpp b/src/projects/ionhammer/io_read_corrector.hpp
new file mode 100644
index 0000000..f329a60
--- /dev/null
+++ b/src/projects/ionhammer/io_read_corrector.hpp
@@ -0,0 +1,230 @@
+//***************************************************************************
+//* Copyright (c) 2015 Saint Petersburg State University
+//* Copyright (c) 2011-2014 Saint Petersburg Academic University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
+
+#ifndef __HAMMER_IT_IO_READ_CORRECTOR_HPP__
+#define __HAMMER_IT_IO_READ_CORRECTOR_HPP__
+
+#include "HSeq.hpp"
+#include "config_struct.hpp"
+#include "consensus.hpp"
+#include "flow_space_read.hpp"
+#include "hkmer_distance.hpp"
+#include "valid_hkmer_generator.hpp"
+
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/storage.hpp>
+#include <boost/optional.hpp>
+
+#include <bamtools/api/BamAlignment.h>
+#include <bamtools/api/SamHeader.h>
+#include "kmer_data.hpp"
+#include "seqeval/BaseHypothesisEvaluator.h"
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <string>
+#include <vector>
+
+#if 1
+#include <iomanip>
+#include <iostream>
+#endif
+
+namespace hammer {
+namespace correction {
+
+template <class ReadCorrector>
+class SingleReadCorrector {
+  const KMerData& hkmer_data_;
+  using PenaltyCalcer = typename ReadCorrector::PenaltyCalcer;
+  using Factory = typename PenaltyCalcer::PenaltyCalcerFactory;
+
+ public:
+  struct ReadSelectionPredicate {
+    virtual bool operator()(const io::SingleRead& read) = 0;
+  };
+
+  struct DebugOutputPredicate : public ReadSelectionPredicate {};
+
+  struct NoDebug : public DebugOutputPredicate {
+    virtual bool operator()(const io::SingleRead&) { return false; }
+  };
+
+  struct FullDebug : public DebugOutputPredicate {
+    virtual bool operator()(const io::SingleRead&) { return true; }
+  };
+
+  class DebugIfContains : public DebugOutputPredicate {
+    Sequence Needle;
+    Sequence NeedleRc;
+
+   public:
+    DebugIfContains(const Sequence& seq) : Needle(seq), NeedleRc(!seq) {}
+
+    virtual bool operator()(const io::SingleRead& read) {
+      auto readSeq = read.sequence();
+      if (readSeq.size() < Needle.size()) return false;
+      if (readSeq.find(Needle, 0) != -1ULL) return true;
+      return readSeq.find(NeedleRc, 0) != -1ULL ? true : false;
+    }
+  };
+
+  struct SelectPredicate : public ReadSelectionPredicate {};
+
+  struct SelectAll : public SelectPredicate {
+    virtual bool operator()(const io::SingleRead&) { return true; }
+  };
+
+  class SelectByName : public SelectPredicate {
+    std::set<std::string> Names;
+
+   public:
+    SelectByName(const std::set<std::string>& names) : Names(names) {}
+
+    virtual bool operator()(const io::SingleRead& r) {
+      return Names.find(r.name()) != Names.end();
+    }
+  };
+
+ private:
+  BamTools::SamHeader* sam_header_ptr_;
+  DebugOutputPredicate& debug_predicate_;
+  SelectPredicate& select_predicate_;
+  const Factory& penalty_factory_;
+  ReadCorrector read_corrector_;
+
+ public:
+  SingleReadCorrector(const KMerData& kmer_data,
+                      const Factory& penalty_factory,
+                      BamTools::SamHeader* sam_header,
+                      DebugOutputPredicate& debug, SelectPredicate& select)
+      : hkmer_data_(kmer_data),
+        sam_header_ptr_(sam_header),
+        debug_predicate_(debug),
+        select_predicate_(select),
+        penalty_factory_(penalty_factory),
+        read_corrector_(hkmer_data_, penalty_factory_) {}
+
+  SingleReadCorrector(const KMerData& kmer_data,
+                      const Factory& penalty_factory,
+                      DebugOutputPredicate& debug, SelectPredicate& select)
+      : hkmer_data_(kmer_data),
+        sam_header_ptr_(NULL),
+        debug_predicate_(debug),
+        select_predicate_(select),
+        penalty_factory_(penalty_factory),
+        read_corrector_(hkmer_data_, penalty_factory_) {}
+
+  std::unique_ptr<io::SingleRead> operator()(std::unique_ptr<io::SingleRead> r) {
+    return SingleReadCorrector::operator()(*r);
+  }
+
+  std::unique_ptr<io::SingleRead> operator()(const io::SingleRead& read) {
+    if (!select_predicate_(read)) {
+      return nullptr;
+    }
+
+    bool debug_mode = debug_predicate_(read);
+    if (debug_mode) {
+      std::cerr << "=============================================" << std::endl;
+      std::cerr << '>' << read.name() << '\n'
+                << read.GetSequenceString() << std::endl;
+    }
+    auto corected_seq = read_corrector_.Correct(
+        read, cfg::get().keep_uncorrected_ends, debug_mode);
+
+    if (corected_seq.empty()) {
+      return nullptr;
+    }
+
+    auto result = std::unique_ptr<io::SingleRead>(new io::SingleRead(read.name(), corected_seq));
+    return result;
+  }
+
+  std::unique_ptr<io::BamRead> operator()(std::unique_ptr<BamTools::BamAlignment> alignment) {
+    VERIFY(sam_header_ptr_);
+    io::SingleRead r(alignment->Name, alignment->QueryBases);
+    // reverse strand means we're working with a mapped BAM, might be
+    // the case for datasets downloaded from IonCommunity
+    if (alignment->IsReverseStrand()) r = !r;
+    auto corrected_r = SingleReadCorrector::operator()(r);
+    std::string rg;
+    if (!alignment->GetTag("RG", rg) || !corrected_r) return nullptr;
+    auto flow_order = sam_header_ptr_->ReadGroups[rg].FlowOrder;
+
+    float delta_score, fit_score;
+    auto seq = corrected_r->GetSequenceString();
+    if (alignment->IsReverseStrand()) {
+      std::reverse(seq.begin(), seq.end());
+      for (auto it = seq.begin(); it != seq.end(); ++it) {
+        switch (*it) {
+          case 'A':
+            *it = 'T';
+            break;
+          case 'C':
+            *it = 'G';
+            break;
+          case 'G':
+            *it = 'C';
+            break;
+          case 'T':
+            *it = 'A';
+            break;
+          default:
+            break;
+        }
+      }
+    }
+
+    BaseHypothesisEvaluator(*alignment, flow_order, seq, delta_score, fit_score,
+                            0);
+    std::stringstream ss;
+    ss << alignment->Name << "_" << delta_score << "_" << fit_score;
+    alignment->Name = ss.str();
+    if (delta_score >= cfg::get().delta_score_threshold)
+      return std::unique_ptr<io::BamRead>(new io::BamRead(*alignment));
+
+    BamTools::BamAlignment corrected(*alignment);
+    corrected.QueryBases = corrected_r->GetSequenceString();
+    return std::unique_ptr<io::BamRead>(new io::BamRead(corrected));
+  }
+};
+
+template <class ReadCorrector>
+class PairedReadCorrector : public SingleReadCorrector<ReadCorrector> {
+public:
+
+  using PenaltyCalcer = typename ReadCorrector::PenaltyCalcer;
+  using Factory = typename PenaltyCalcer::PenaltyCalcerFactory;
+
+ public:
+  PairedReadCorrector(
+      const KMerData& kmerData, const Factory& penaltyFactory,
+      typename SingleReadCorrector<ReadCorrector>::DebugOutputPredicate& debug,
+      typename SingleReadCorrector<ReadCorrector>::SelectPredicate& select)
+      : SingleReadCorrector<ReadCorrector>(kmerData, penaltyFactory, debug,
+                                            select) {}
+
+  std::unique_ptr<io::PairedRead> operator()(std::unique_ptr<io::PairedRead> r) {
+    auto corrected_r = SingleReadCorrector<ReadCorrector>::operator()(r->first());
+    auto corrected_l = SingleReadCorrector<ReadCorrector>::operator()(r->second());
+
+    if (!corrected_r || !corrected_l) return nullptr;
+
+    return std::unique_ptr<io::PairedRead>(
+        new io::PairedRead(*corrected_r, *corrected_l, 0));
+  }
+};
+
+};      // namespace correction
+};      // namespace hammer
+#endif  // __HAMMER_IT_IO_READ_CORRECTOR_HPP__
diff --git a/src/projects/ionhammer/kmer_data.cpp b/src/projects/ionhammer/kmer_data.cpp
index 3ba9779..d3eb68b 100644
--- a/src/projects/ionhammer/kmer_data.cpp
+++ b/src/projects/ionhammer/kmer_data.cpp
@@ -9,8 +9,10 @@
 #include "config_struct.hpp"
 #include "valid_hkmer_generator.hpp"
 
-#include "utils/mph_index/kmer_index_builder.hpp"
+#include "utils/kmer_mph/kmer_index_builder.hpp"
 
+#include <mutex>
+#include <random>
 #include "io/kmers/mmapped_writer.hpp"
 #include "io/reads/file_reader.hpp"
 #include "io/reads/read_processor.hpp"
@@ -19,12 +21,12 @@ using namespace hammer;
 
 class BufferFiller;
 
-class HammerKMerSplitter : public KMerSortingSplitter<HKMer> {
+class HammerKMerSplitter : public utils::KMerSortingSplitter<HKMer> {
  public:
   HammerKMerSplitter(const std::string &work_dir)
       : KMerSortingSplitter<HKMer>(work_dir, hammer::K) {}
 
-  path::files_t Split(size_t num_files) override;
+  fs::files_t Split(size_t num_files, unsigned nthreads) override;
 
   friend class BufferFiller;
 };
@@ -35,22 +37,22 @@ class BufferFiller {
 
  public:
   BufferFiller(HammerKMerSplitter &splitter)
-          : processed_(0), splitter_(splitter) {}
+      : processed_(0), splitter_(splitter) {}
 
   size_t processed() const { return processed_; }
 
-    bool operator()(std::unique_ptr<io::SingleRead> r) {
+  bool operator()(std::unique_ptr<io::SingleRead> r) {
     ValidHKMerGenerator<hammer::K> gen(*r);
     unsigned thread_id = omp_get_thread_num();
 
-#   pragma omp atomic
+#pragma omp atomic
     processed_ += 1;
 
     bool stop = false;
     while (gen.HasMore()) {
       HKMer seq = gen.kmer();
 
-      stop |= splitter_.push_back_internal( seq, thread_id);
+      stop |= splitter_.push_back_internal(seq, thread_id);
       stop |= splitter_.push_back_internal(!seq, thread_id);
 
       gen.Next();
@@ -60,13 +62,10 @@ class BufferFiller {
   }
 };
 
-path::files_t HammerKMerSplitter::Split(size_t num_files) {
-  unsigned nthreads = cfg::get().max_nthreads;
+fs::files_t HammerKMerSplitter::Split(size_t num_files, unsigned nthreads) {
   size_t reads_buffer_size = cfg::get().count_split_buffer;
 
-  INFO("Splitting kmer instances into " << num_files << " buckets. This might take a while.");
-
-  path::files_t out = PrepareBuffers(num_files, nthreads, reads_buffer_size);
+  fs::files_t out = PrepareBuffers(num_files, nthreads, reads_buffer_size);
 
   size_t n = 15;
   BufferFiller filler(*this);
@@ -93,48 +92,71 @@ path::files_t HammerKMerSplitter::Split(size_t num_files) {
 }
 
 static inline void Merge(KMerStat &lhs, const KMerStat &rhs) {
-  if (lhs.count == 0)
-    lhs.kmer = rhs.kmer;
+  if (lhs.count == 0) lhs.kmer = rhs.kmer;
 
   lhs.count += rhs.count;
-  lhs.qual *= rhs.qual;
+  lhs.qual += rhs.qual;
 }
 
 static void PushKMer(KMerData &data, HKMer kmer, double qual) {
   KMerStat &kmc = data[kmer];
   kmc.lock();
-  Merge(kmc, KMerStat(1, kmer, qual));
+  Merge(kmc, KMerStat(1, kmer, (float)qual));
   kmc.unlock();
 }
 
 static void PushKMerRC(KMerData &data, HKMer kmer, double qual) {
-  kmer = !kmer;
-
-  KMerStat &kmc = data[kmer];
-  kmc.lock();
-  Merge(kmc, KMerStat(1, kmer, qual));
-  kmc.unlock();
+  PushKMer(data, !kmer, qual);
 }
 
 class KMerDataFiller {
-  KMerData &data_;
+  KMerData &Data;
+  mutable std::default_random_engine RandomEngine;
+  mutable std::uniform_real_distribution<double> UniformRandGenerator;
+  mutable std::mutex Lock;
+  double SampleRate;
 
  public:
-  KMerDataFiller(KMerData &data)
-      : data_(data) {}
+  KMerDataFiller(KMerData &data, double sampleRate = 1.0)
+      : Data(data),
+        RandomEngine(42),
+        UniformRandGenerator(0, 1),
+        SampleRate(sampleRate) {}
+
+  double NextUniform() const {
+    std::lock_guard<std::mutex> guard(Lock);
+    return UniformRandGenerator(RandomEngine);
+  }
 
-    bool operator()(std::unique_ptr<io::SingleRead> r) const {
+  bool operator()(std::unique_ptr<io::SingleRead> &&r) const {
     ValidHKMerGenerator<hammer::K> gen(*r);
-    while (gen.HasMore()) {
-      HKMer kmer = gen.kmer();
-      double correct = gen.correct_probability();
 
-      PushKMer(data_, kmer, 1 - correct);
-      PushKMerRC(data_, kmer, 1 - correct);
+    // tiny quality regularization
+    const double decay = 0.9999;
+    double prior = 1.0;
 
-      gen.Next();
+    bool skipRead = SampleRate < 1.0 && (NextUniform() > SampleRate);
+
+    if (skipRead) {
+      return false;
     }
 
+    while (gen.HasMore()) {
+      const HKMer kmer = gen.kmer();
+      const double p = gen.correct_probability();
+      gen.Next();
+
+      assert(p < 1.0);
+      assert(p >= 0);
+      const double correct = p * prior;
+
+      prior *= decay;
+      {
+        PushKMer(Data, kmer, log(1 - correct));
+
+        PushKMerRC(Data, kmer, log(1 - correct));
+      }
+    }
     // Do not stop
     return false;
   }
@@ -142,31 +164,44 @@ class KMerDataFiller {
 
 void KMerDataCounter::FillKMerData(KMerData &data) {
   HammerKMerSplitter splitter(cfg::get().working_dir);
-  KMerDiskCounter<hammer::HKMer> counter(cfg::get().working_dir, splitter);
-  size_t sz = KMerIndexBuilder<HammerKMerIndex>(cfg::get().working_dir, num_files_, cfg::get().max_nthreads).BuildIndex(data.index_, counter);
+  utils::KMerDiskCounter<hammer::HKMer> counter(cfg::get().working_dir, splitter);
+
+  size_t sz = utils::KMerIndexBuilder<HammerKMerIndex>(cfg::get().working_dir, num_files_, cfg::get().max_nthreads).BuildIndex(data.index_, counter);
+
 
   // Now use the index to fill the kmer quality information.
   INFO("Collecting K-mer information, this takes a while.");
   data.data_.resize(sz);
 
-  const auto& dataset = cfg::get().dataset;
-  for (auto it = dataset.reads_begin(), et = dataset.reads_end(); it != et; ++it) {
+  const auto &dataset = cfg::get().dataset;
+  for (auto it = dataset.reads_begin(), et = dataset.reads_end(); it != et;
+       ++it) {
     INFO("Processing " << *it);
     io::FileReadStream irs(*it, io::PhredOffset);
-    KMerDataFiller filler(data);
+    KMerDataFiller filler(data, cfg::get().sample_rate);
     hammer::ReadProcessor(cfg::get().max_nthreads).Run(irs, filler);
   }
 
   INFO("Collection done, postprocessing.");
 
   size_t singletons = 0;
+  size_t skipped = 0;
   for (size_t i = 0; i < data.size(); ++i) {
-    VERIFY(data[i].count);
-
-    if (data[i].count == 1)
+    if (data[i].count == 1) {
       singletons += 1;
+    }
+    if (data[i].count == 0) {
+      skipped += 1;
+    }
   }
 
-  INFO("Merge done. There are " << data.size() << " kmers in total. "
-       "Among them " << singletons << " (" <<  100.0 * double(singletons) / double(data.size()) << "%) are singletons.");
+  INFO("Merge done. There are "
+       << data.size()
+       << " kmers in total. "
+          "Among them "
+       << singletons << " (" << 100.0 * double(singletons) / double(data.size())
+       << "%) are singletons."
+       << "Among them " << skipped << " ("
+       << 100.0 * double(skipped) / double(data.size())
+       << "%) are skipped during sampling.");
 }
diff --git a/src/projects/ionhammer/kmer_data.hpp b/src/projects/ionhammer/kmer_data.hpp
index e27458a..35c090d 100644
--- a/src/projects/ionhammer/kmer_data.hpp
+++ b/src/projects/ionhammer/kmer_data.hpp
@@ -8,7 +8,11 @@
 #ifndef __HAMMER_KMER_DATA_HPP__
 #define __HAMMER_KMER_DATA_HPP__
 
-#include "utils/mph_index/kmer_index.hpp"
+
+#include "config_struct.hpp"
+#include "utils/kmer_mph/kmer_index.hpp"
+#include "utils/logger/logger.hpp"
+
 #include "hkmer.hpp"
 
 #include <vector>
@@ -18,28 +22,39 @@
 namespace hammer {
 
 struct KMerStat {
-  size_t count;
+  int count;
   HKMer kmer;
-  double qual;
-  unsigned changeto;
+  float qual;
+  float posterior_genomic_ll = -10000;
+  bool dist_one_subcluster = false;
   uint8_t lock_;
 
-  KMerStat(size_t count = 0, HKMer kmer = HKMer(), double qual = 1.0, unsigned changeto = -1)
-      : count(count), kmer(kmer), qual(qual), changeto(changeto), lock_(0) { }
+  KMerStat(int count = 0, HKMer kmer = HKMer(), float qual = 0.0)
+      : count(count), kmer(kmer), qual(qual), lock_(0) {}
 
   void lock() {
-    while (__sync_val_compare_and_swap(&lock_, 0, 1) == 1)
-      sched_yield();
+    while (__sync_val_compare_and_swap(&lock_, 0, 1) == 1) sched_yield();
   }
   void unlock() {
     lock_ = 0;
     __sync_synchronize();
   }
+
+  bool good() const {
+    return posterior_genomic_ll > goodThreshold();  // log(0.5)
+  }
+
+  static double goodThreshold() { return cfg::get().good_threshold; }
+
+  bool skip() const {
+    return posterior_genomic_ll > cfg::get().skip_threshold &&  !dist_one_subcluster;  // log(0.9)
+  }
+
 };
-  
-};
 
-typedef KMerIndex<kmer_index_traits<hammer::HKMer> > HammerKMerIndex;
+};  // namespace hammer
+
+typedef utils::KMerIndex<utils::kmer_index_traits<hammer::HKMer> > HammerKMerIndex;
 
 class KMerData {
   typedef std::vector<hammer::KMerStat> KMerDataStorageType;
@@ -55,7 +70,7 @@ class KMerData {
     KMerDataStorageType().swap(data_);
     KMerDataStorageType().swap(push_back_buffer_);
   }
-  size_t push_back(const hammer::KMerStat &k) {
+  size_t push_back(const hammer::KMerStat& k) {
     push_back_buffer_.push_back(k);
 
     return data_.size() + push_back_buffer_.size() - 1;
@@ -69,24 +84,35 @@ class KMerData {
     size_t dsz = data_.size();
     return (idx < dsz ? data_[idx] : push_back_buffer_[idx - dsz]);
   }
-  hammer::KMerStat& operator[](hammer::HKMer s) { return operator[](index_.seq_idx(s)); }
-  const hammer::KMerStat& operator[](hammer::HKMer s) const { return operator[](index_.seq_idx(s)); }
-  size_t seq_idx(hammer::HKMer s) const { return index_.seq_idx(s); }
+  hammer::KMerStat& operator[](const hammer::HKMer& s) {
+    return operator[](index_.seq_idx(s));
+  }
+  const hammer::KMerStat& operator[](const hammer::HKMer& s) const {
+    return operator[](index_.seq_idx(s));
+  }
+  size_t seq_idx(const hammer::HKMer& s) const { return index_.seq_idx(s); }
+
+  size_t checking_seq_idx(const hammer::HKMer& s) const {
+    size_t idx = seq_idx(s);
+    if (idx >= size()) return -1ULL;
+
+    return (s == operator[](idx).kmer ? idx : -1ULL);
+  }
 
   template <class Writer>
-  void binary_write(Writer &os) {
+  void binary_write(Writer& os) {
     size_t sz = data_.size();
     os.write((char*)&sz, sizeof(sz));
-    os.write((char*)&data_[0], sz*sizeof(data_[0]));
+    os.write((char*)&data_[0], sz * sizeof(data_[0]));
     index_.serialize(os);
   }
 
   template <class Reader>
-  void binary_read(Reader &is) {
+  void binary_read(Reader& is) {
     size_t sz = 0;
     is.read((char*)&sz, sizeof(sz));
     data_.resize(sz);
-    is.read((char*)&data_[0], sz*sizeof(data_[0]));
+    is.read((char*)&data_[0], sz * sizeof(data_[0]));
     index_.deserialize(is);
   }
 
@@ -99,13 +125,14 @@ class KMerData {
 };
 
 struct CountCmp {
-  const KMerData &kmer_data_;
+  const KMerData& kmer_data_;
 
-  CountCmp(const KMerData &kmer_data)
-      : kmer_data_(kmer_data) {}
+  CountCmp(const KMerData& kmer_data) : kmer_data_(kmer_data) {}
 
   bool operator()(unsigned lhs, unsigned rhs) {
-    return kmer_data_[lhs].count > kmer_data_[rhs].count;
+    return (kmer_data_[lhs].count != kmer_data_[rhs].count)
+           ? kmer_data_[lhs].count > kmer_data_[rhs].count
+            : kmer_data_[lhs].kmer.size() < kmer_data_[rhs].kmer.size();
   }
 };
 
@@ -115,10 +142,10 @@ class KMerDataCounter {
  public:
   KMerDataCounter(unsigned num_files) : num_files_(num_files) {}
 
-  void FillKMerData(KMerData &data);
+  void FillKMerData(KMerData& data);
 
  private:
   DECL_LOGGER("K-mer Counting");
 };
 
-#endif // __HAMMER_KMER_DATA_HPP__
+#endif  // __HAMMER_KMER_DATA_HPP__
diff --git a/src/projects/ionhammer/kmer_evaluator.cpp b/src/projects/ionhammer/kmer_evaluator.cpp
new file mode 100644
index 0000000..94dffe1
--- /dev/null
+++ b/src/projects/ionhammer/kmer_evaluator.cpp
@@ -0,0 +1,53 @@
+//***************************************************************************
+//* Copyright (c) 2015 Saint Petersburg State University
+//* Copyright (c) 2011-2014 Saint Petersburg Academic University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
+
+#include <memory>
+#include "hkmer.hpp"
+#include "io/reads/read_processor.hpp"
+#include "kmer_helpers.h"
+
+void printUsage() {
+  std::cerr << "usage: ./kmer_evaluator <reference.fasta> <contigs.fasta>"
+            << std::endl;
+}
+
+void runComparison(const HKMerSet& reference_kmers,
+                   const HKMerSet& contig_kmers) {
+  size_t total_genomic = reference_kmers.size();
+  size_t total_contig = contig_kmers.size();
+
+  size_t contig_genomic = 0;
+
+  for (auto it = reference_kmers.cbegin(), et = reference_kmers.cend();
+       it != et; ++it)
+    if (contig_kmers.find(*it) != contig_kmers.end()) ++contig_genomic;
+
+  long contig_non_genomic = total_contig - contig_genomic;
+
+  std::cout << "Reference kmers:    " << total_genomic << std::endl;
+  std::cout << "Contig kmers:       " << total_contig << std::endl;
+  std::cout << "  Genomic:          " << contig_genomic << " ("
+            << ((double)contig_genomic * 100.0 / (double)total_genomic) << "%)" << std::endl;
+  std::cout << "  Non-genomic:      " << contig_non_genomic << std::endl;
+}
+
+int main(int argc, char** argv) {
+  if (argc < 3) {
+    printUsage();
+    return 0;
+  }
+
+  HKMerSet reference, contigs;
+  reference.reserve(10000000);
+  contigs.reserve(200000000);
+  std::cout << "Filling set of reference kmers..." << std::endl;
+  FillSet(reference, argv[1]);
+  std::cout << "Filling set of contig kmers..." << std::endl;
+  FillSet(contigs, argv[2]);
+  std::cout << "Running comparison " << std::endl;
+  runComparison(reference, contigs);
+}
diff --git a/src/projects/ionhammer/kmer_helpers.cpp b/src/projects/ionhammer/kmer_helpers.cpp
new file mode 100644
index 0000000..ed84276
--- /dev/null
+++ b/src/projects/ionhammer/kmer_helpers.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Vasiliy Ershov on 10/07/16.
+//
+
+#include "kmer_helpers.h"
diff --git a/src/projects/ionhammer/kmer_helpers.h b/src/projects/ionhammer/kmer_helpers.h
new file mode 100644
index 0000000..ebd92f1
--- /dev/null
+++ b/src/projects/ionhammer/kmer_helpers.h
@@ -0,0 +1,69 @@
+//
+// Created by Vasiliy Ershov on 10/07/16.
+//
+
+#ifndef PROJECT_KMER_HELPERS_H
+#define PROJECT_KMER_HELPERS_H
+
+#include <mutex>
+#include <unordered_set>
+#include "hkmer.hpp"
+#include "io/reads/file_reader.hpp"
+#include "io/reads/read_processor.hpp"
+#include "valid_hkmer_generator.hpp"
+
+using HKMerSet = std::unordered_set<hammer::HKMer>;
+
+namespace std {
+template <>
+struct hash<hammer::HSeq<hammer::K> > {
+  size_t operator()(hammer::HSeq<hammer::K> seq) const { return seq.GetHash(); }
+};
+}  // namespace std
+
+class SetFiller {
+ private:
+  std::unordered_set<hammer::HKMer>& kmers_;
+  std::mutex mutex_;
+
+ private:
+  void ProcessString(const std::string& seq) {
+    if (seq.empty()) {
+      return;
+    }
+    std::vector<hammer::HKMer> kmers;
+    kmers.reserve(seq.size());
+    ValidHKMerGenerator<hammer::K> generator(seq.data(), nullptr, seq.size());
+    while (generator.HasMore()) {
+      kmers.push_back(generator.kmer());
+      kmers.push_back(!generator.kmer());
+      generator.Next();
+    }
+    PushKMers(kmers);
+  }
+
+  void PushKMers(const std::vector<hammer::HKMer>& hkmers) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    for (auto it = hkmers.begin(); it != hkmers.end(); ++it) {
+      auto& hkmer = *it;
+      kmers_.insert(hkmer);
+    }
+  }
+
+ public:
+  SetFiller(std::unordered_set<hammer::HKMer>& kmers) : kmers_(kmers) {}
+
+  bool operator()(std::unique_ptr<io::SingleRead>&& read) {
+    ProcessString(read->GetSequenceString());
+    return false;
+  }
+};
+
+inline void FillSet(HKMerSet& kmers, const char* filename) {
+  const unsigned num_threads = 16;
+  SetFiller filler(kmers);
+  io::FileReadStream irs(filename, io::PhredOffset);
+  hammer::ReadProcessor(num_threads).Run(irs, filler);
+}
+
+#endif  // PROJECT_KMER_HELPERS_H
diff --git a/src/projects/ionhammer/main.cpp b/src/projects/ionhammer/main.cpp
index ab6fd5b..6998f74 100644
--- a/src/projects/ionhammer/main.cpp
+++ b/src/projects/ionhammer/main.cpp
@@ -13,22 +13,24 @@
 #include "io/reads/osequencestream.hpp"
 #include "io/reads/read_processor.hpp"
 
-#include "common/adt/concurrent_dsu.hpp"
+#include "adt/concurrent_dsu.hpp"
 
 #include "utils/segfault_handler.hpp"
-#include "utils/memory_limit.hpp"
+#include "utils/perf/memory_limit.hpp"
 
 #include "HSeq.hpp"
+#include "config_struct.hpp"
+#include "err_helper_table.hpp"
+#include "io_read_corrector.hpp"
 #include "kmer_data.hpp"
-#include "hamcluster.hpp"
+#include "penalty_estimator.hpp"
+#include "read_corrector_new.hpp"
 #include "subcluster.hpp"
-#include "err_helper_table.hpp"
-#include "read_corrector.hpp"
-#include "expander.hpp"
-#include "config_struct.hpp"
 
-#include "utils/openmp_wrapper.h"
+#include "utils/parallel/openmp_wrapper.h"
 
+#include "hamcluster_1.h"
+#include "quality_metrics.h"
 #include "version.hpp"
 
 #include <fstream>
@@ -37,266 +39,373 @@
 #include <bamtools/api/BamReader.h>
 #include <bamtools/api/SamHeader.h>
 
+#include "gamma_poisson_model.hpp"
+#include "normal_quality_model.hpp"
+
 void create_console_logger() {
   using namespace logging;
 
-  logger *lg = create_logger("");
+  logger* lg = create_logger("");
   lg->add_writer(std::make_shared<console_writer>());
   attach_logger(lg);
 }
 
 struct UfCmp {
-  bool operator()(const std::vector<unsigned long> &lhs,
-                  const std::vector<unsigned long> &rhs) {
+  bool operator()(const std::vector<unsigned long>& lhs,
+                  const std::vector<unsigned long>& rhs) {
     return lhs.size() > rhs.size();
   }
 };
 
-// This is weird workaround for bug in gcc 4.4.7
-static bool stage(hammer_config::HammerStage start, hammer_config::HammerStage current) {
-  switch (start) {
-    case hammer_config::HammerStage::KMerCounting:
-      return true;
-    case hammer_config::HammerStage::HammingClustering:
-      return current != hammer_config::HammerStage::KMerCounting;
-    case hammer_config::HammerStage::SubClustering:
-      return (current != hammer_config::HammerStage::KMerCounting &&
-              current != hammer_config::HammerStage::HammingClustering);
-    case hammer_config::HammerStage::ReadCorrection:
-      return current == hammer_config::HammerStage::ReadCorrection;
+using namespace n_gamma_poisson_model;
+
+namespace hammer {
+namespace correction {
+
+using namespace n_gamma_poisson_model;
+using namespace n_normal_model;
+
+class TKMerDataEstimator {
+  KMerData& Data;
+  const uint NumFiles;
+  const hammer_config::hammer_config& Config;
+  std::vector<std::vector<size_t> > Classes;
+  NormalClusterModel ClusterModel;
+
+  // This is weird workaround for bug in gcc 4.4.7
+  static bool stage(hammer_config::HammerStage start,
+                    hammer_config::HammerStage current) {
+    switch (start) {
+      case hammer_config::HammerStage::KMerCounting:
+        return true;
+      case hammer_config::HammerStage::HammingClustering:
+        return current != hammer_config::HammerStage::KMerCounting;
+      case hammer_config::HammerStage::SubClustering:
+        return (current != hammer_config::HammerStage::KMerCounting &&
+                current != hammer_config::HammerStage::HammingClustering);
+      case hammer_config::HammerStage::ReadCorrection:
+        return current == hammer_config::HammerStage::ReadCorrection;
+    }
+    assert(0);
   }
-  assert(0);
-}
 
-int main(int argc, char** argv) {
-  segfault_handler sh;
+  void SaveKMerData(const std::string &filename = "count.kmdata") {
+    INFO("Debug mode on. Saving K-mer index.");
+    std::ofstream ofs(fs::append_path(cfg::get().working_dir, filename), std::ios::binary);
+    Data.binary_write(ofs);
+  }
 
-  srand(42);
-  srandom(42);
+  void SaveClusters() {
+    INFO("Debug mode on. Writing down clusters.");
+    std::ofstream ofs(fs::append_path(Config.working_dir, "hamming.cls"),
+                      std::ios::binary);
+    const size_t num_classes = Classes.size();
+    ofs.write((char*)&num_classes, sizeof(num_classes));
+    for (size_t i = 0; i < Classes.size(); ++i) {
+      size_t sz = Classes[i].size();
+      ofs.write((char*)&sz, sizeof(sz));
+      ofs.write((char*)&Classes[i][0], sz * sizeof(Classes[i][0]));
+    }
+  }
 
-  try {
-    create_console_logger();
+  void LoadKMerData(std::string filename) {
+    INFO("Loading K-mer index.");
+    std::ifstream ifs(fs::append_path(Config.working_dir, filename),
+                      std::ios::binary);
+    VERIFY(ifs.good());
+    Data.binary_read(ifs);
+    INFO("Total " << Data.size() << " entries were loader");
+  }
 
-    std::string config_file = "hammer-it.cfg";
-    if (argc > 1) config_file = argv[1];
-    INFO("Starting IonHammer, built from " SPADES_GIT_REFSPEC ", git revision " SPADES_GIT_SHA1);
-    INFO("Loading config from " << config_file.c_str());
-    cfg::create_instance(config_file);
+  void CountKMers() { KMerDataCounter(NumFiles).FillKMerData(Data); }
 
-    // hard memory limit
-    const size_t GB = 1 << 30;
-    limit_memory(cfg::get().hard_memory_limit * GB);
-
-    KMerData kmer_data;
-    if (stage(cfg::get().start_stage, hammer_config::HammerStage::KMerCounting)) {
-      // FIXME: Actually it's num_files here
-      KMerDataCounter(32).FillKMerData(kmer_data);
-      if (cfg::get().debug_mode) {
-        INFO("Debug mode on. Saving K-mer index.");
-        std::ofstream ofs(path::append_path(cfg::get().working_dir, "count.kmdata"), std::ios::binary);
-        kmer_data.binary_write(ofs);
+  void ClusterHammingGraph() {
+    INFO("Clustering Hamming graph.");
+    {
+      const auto num_threads = cfg::get().max_nthreads;
+      TOneErrorClustering oneErrorClustering(Data, num_threads);
+      oneErrorClustering.FillClasses(Classes);
+    }
+    const size_t num_classes = Classes.size();
+    INFO("Clustering done. Total clusters: " << num_classes);
+  }
+
+  void LoadClusters() {
+    INFO("Loading clusters.");
+    std::ifstream ifs(fs::append_path(Config.working_dir, "hamming.cls"),
+                      std::ios::binary);
+    VERIFY(ifs.good());
+
+    size_t num_classes = 0;
+    ifs.read((char*)&num_classes, sizeof(num_classes));
+    Classes.resize(num_classes);
+
+    for (size_t i = 0; i < num_classes; ++i) {
+      size_t sz = 0;
+      ifs.read((char*)&sz, sizeof(sz));
+      Classes[i].resize(sz);
+      ifs.read((char*)&Classes[i][0], sz * sizeof(Classes[i][0]));
+    }
+  }
+
+  void EstimateGenomicCenters() {
+    const auto num_threads = cfg::get().max_nthreads;
+    QualityTransform trans;
+    n_normal_model::ModelEstimator priorEstimator(Data, cfg::get().max_nthreads,
+                                                 50, false);
+
+    ClusterModel = priorEstimator.Estimate(Classes);
+
+    INFO("Subclustering.");
+    TGenomicHKMersEstimator genomicHKMersEstimator(Data, ClusterModel, cfg::get().center_type);
+
+#pragma omp parallel for num_threads(num_threads)
+    for (size_t i = 0; i < Classes.size(); ++i) {
+      auto& cluster = Classes[i];
+      genomicHKMersEstimator.ProceedCluster(cluster);
+    }
+  }
+
+  void CalcGenomicEstimationQuality(ClusteringQuality& quality) {
+    const auto num_threads = cfg::get().max_nthreads;
+    (void)num_threads;
+#pragma omp parallel for num_threads(num_threads)
+    for (size_t idx = 0; idx < Data.size(); ++idx) {
+      if (Data[idx].count > 3) {
+        quality.AddKMer(idx);
       }
-    } else {
-      INFO("Loading K-mer index.");
-      std::ifstream ifs(path::append_path(cfg::get().working_dir, "count.kmdata"), std::ios::binary);
-      VERIFY(ifs.good());
-      kmer_data.binary_read(ifs);
-      INFO("Total " << kmer_data.size() << " entries were loader");
     }
+  }
 
-    std::vector<std::vector<size_t> > classes;
-    if (stage(cfg::get().start_stage, hammer_config::HammerStage::HammingClustering)) {
-      ConcurrentDSU uf(kmer_data.size());
-      KMerHamClusterer clusterer(cfg::get().tau);
-      INFO("Clustering Hamming graph.");
-      clusterer.cluster(path::append_path(cfg::get().working_dir, "kmers.hamcls"), kmer_data, uf);
-      uf.get_sets(classes);
-      size_t num_classes = classes.size();
-      INFO("Clustering done. Total clusters: " << num_classes);
-
-      if (cfg::get().debug_mode) {
-        INFO("Debug mode on. Writing down clusters.");
-        std::ofstream ofs(path::append_path(cfg::get().working_dir, "hamming.cls"), std::ios::binary);
-
-        ofs.write((char*)&num_classes, sizeof(num_classes));
-        for (size_t i=0; i < classes.size(); ++i) {
-          size_t sz = classes[i].size();
-          ofs.write((char*)&sz, sizeof(sz));
-          ofs.write((char*)&classes[i][0], sz * sizeof(classes[i][0]));
-        }
+ public:
+  TKMerDataEstimator(KMerData& kmerData,
+                     const hammer_config::hammer_config& config,
+                     const uint numFiles = 32)
+      : Data(kmerData), NumFiles(numFiles), Config(config) {}
+
+  void Estimate() {
+    if (stage(Config.start_stage, hammer_config::HammerStage::KMerCounting)) {
+      CountKMers();
+      if (Config.debug_mode) {
+        SaveKMerData("count.kmdata");
       }
     } else {
-      INFO("Loading clusters.");
-      std::ifstream ifs(path::append_path(cfg::get().working_dir, "hamming.cls"), std::ios::binary);
-      VERIFY(ifs.good());
-
-      size_t num_classes = 0;
-      ifs.read((char*)&num_classes, sizeof(num_classes));
-      classes.resize(num_classes);
-
-      for (size_t i = 0; i < num_classes; ++i) {
-        size_t sz = 0;
-        ifs.read((char*)&sz, sizeof(sz));
-        classes[i].resize(sz);
-        ifs.read((char*)&classes[i][0], sz * sizeof(classes[i][0]));
+      LoadKMerData("count.kmdata");
+    }
+
+    if (stage(Config.start_stage,
+              hammer_config::HammerStage::HammingClustering)) {
+      ClusterHammingGraph();
+      if (Config.debug_mode) {
+        SaveClusters();
       }
+    } else {
+      LoadClusters();
     }
 
-    size_t singletons = 0;
-    for (size_t i = 0; i < classes.size(); ++i)
-      if (classes[i].size() == 1)
-        singletons += 1;
-    INFO("Singleton clusters: " << singletons);
-
-    if (stage(cfg::get().start_stage, hammer_config::HammerStage::SubClustering)) {
-      size_t nonread = 0;
-#if 1
-      INFO("Subclustering.");
-#     pragma omp parallel for shared(nonread, classes, kmer_data)
-      for (size_t i = 0; i < classes.size(); ++i) {
-        auto& cluster = classes[i];
-
-#       pragma omp atomic
-        nonread += subcluster(kmer_data, cluster);
+    std::unique_ptr<TGenomReferenceOracle> oracle;
+    std::unique_ptr<ClusteringQuality> clusteringQuality;
+    std::string oraclePath = cfg::get().oracle_path;
+
+    if (oraclePath.length()) {
+      oracle.reset(new TGenomReferenceOracle(oraclePath));
+      clusteringQuality.reset(new ClusteringQuality(*oracle, Data));
+      for (size_t i = 0; i < Classes.size(); ++i) {
+        clusteringQuality->AddCluster(Classes[i]);
       }
-#else
-      INFO("Assigning centers");
-#     pragma omp parallel for shared(nonread, classes, kmer_data)
-      for (size_t i = 0; i < classes.size(); ++i) {
-        const auto& cluster = classes[i];
-#       pragma omp atomic
-        nonread += assign(kmer_data, cluster);
+    }
+
+    if (stage(Config.start_stage, hammer_config::HammerStage::SubClustering)) {
+      EstimateGenomicCenters();
+
+      if (clusteringQuality) {
+        CalcGenomicEstimationQuality(*clusteringQuality);
+        clusteringQuality->Info();
       }
-#endif
-      INFO("Total " << nonread << " nonread kmers were generated");
 
-      if (cfg::get().debug_mode) {
-        INFO("Debug mode on. Saving K-mer index.");
-        std::ofstream ofs(path::append_path(cfg::get().working_dir, "cluster.kmdata"), std::ios::binary);
-        kmer_data.binary_write(ofs);
+      if (Config.debug_mode) {
+        SaveKMerData("cluster.kmdata");
       }
     } else {
-      INFO("Loading K-mer index.");
-      std::ifstream ifs(path::append_path(cfg::get().working_dir, "cluster.kmdata"), std::ios::binary);
-      VERIFY(ifs.good());
-      kmer_data.binary_read(ifs);
-      INFO("Total " << kmer_data.size() << " entries were loader");
+      LoadKMerData("cluster.kmdata");
     }
+  }
 
-#if 0
-    INFO("Starting solid k-mers expansion in " << cfg::get().max_nthreads << " threads.");
-    while (true) {
-        Expander expander(kmer_data);
-        const io::DataSet<> &dataset = cfg::get().dataset;
-        for (auto I = dataset.reads_begin(), E = dataset.reads_end(); I != E; ++I) {
-            io::FileReadStream irs(*I, io::PhredOffset);
-            hammer::ReadProcessor rp(cfg::get().max_nthreads);
-            rp.Run(irs, expander);
-            VERIFY_MSG(rp.read() == rp.processed(), "Queue unbalanced");
-        }
-        INFO("" << expander.changed() << " solid k-mers were generated");
-        if (expander.changed() == 0)
-            break;
-    }
-#endif
+  NormalClusterModel GetClusterModel() const { return ClusterModel; }
 
-#if 0
+  void SaveCenters() {
     std::ofstream fasta_ofs("centers.fasta");
     fasta_ofs << std::fixed << std::setprecision(6) << std::setfill('0');
-    std::sort(classes.begin(), classes.end(),  UfCmp());
-    for (size_t i = 0; i < classes.size(); ++i) {
-      auto& cluster = classes[i];
-      std::sort(cluster.begin(), cluster.end(), CountCmp(kmer_data));
-      hammer::HKMer c = center(kmer_data, cluster);
-      size_t idx = kmer_data.seq_idx(c);
-      if (kmer_data[idx].kmer == c) {
-        fasta_ofs << '>' << std::setw(6) << i
-                  << "-cov_" << std::setw(0) << kmer_data[idx].count
-                  << "-qual_" << 1.0 - kmer_data[idx].qual;
-
-        if (cluster.size() == 1)
+    std::sort(Classes.begin(), Classes.end(), UfCmp());
+    for (size_t i = 0; i < Classes.size(); ++i) {
+      auto& cluster = Classes[i];
+      std::sort(cluster.begin(), cluster.end(), CountCmp(Data));
+      hammer::HKMer c = TGenomicHKMersEstimator::Center(Data, cluster);
+      size_t idx = Data.seq_idx(c);
+      if (Data[idx].kmer == c) {
+        fasta_ofs << '>' << std::setw(6) << i << "-cov_" << std::setw(0)
+                  << Data[idx].count << "-qual_" << std::setw(14)
+                  << 1.0 - Data[idx].qual;
+
+        if (cluster.size() == 1) {
           fasta_ofs << "_singleton";
+        }
         fasta_ofs << '\n' << c << '\n';
       }
     }
+  }
+
+#if 0
+  void SolidKMerExpansion() {
+  INFO("Starting solid k-mers expansion in " << Config.max_nthreads << " threads.");
+        while (true) {
+            Expander expander(Data);
+            const io::DataSet<> &dataset = Config.dataset;
+            for (auto I = dataset.reads_begin(), E = dataset.reads_end(); I != E; ++I) {
+                io::FileReadStream irs(*I, io::PhredOffset);
+                hammer::ReadProcessor rp(Config.max_nthreads);
+                rp.Run(irs, expander);
+                VERIFY_MSG(rp.read() == rp.processed(), "Queue unbalanced");
+            }
+            INFO("" << expander.changed() << " solid k-mers were generated");
+            if (expander.changed() == 0)
+                break;
+        }
+  }
 #endif
+};
+
+};  // namespace correction
+};  // namespace hammer
+
+int main(int argc, char** argv) {
+  using namespace hammer::correction;
+  using TCorrector = ReadCorrector<GammaPoissonLikelihoodCalcer>;
+  using SingleReadsCorrector = SingleReadCorrector<TCorrector>;
+  using PairedReadsCorrector = PairedReadCorrector<TCorrector>;
+
+  utils::segfault_handler sh;
+  srand(42);
+  srandom(42);
+
+  try {
+    create_console_logger();
+    std::string config_file = "hammer-it.cfg";
+    if (argc > 1) config_file = argv[1];
+    INFO("Starting IonHammer, built from " SPADES_GIT_REFSPEC
+         ", git revision " SPADES_GIT_SHA1);
+    INFO("Loading config from " << config_file.c_str());
+    cfg::create_instance(config_file);
+
+    // hard memory limit
+    const size_t GB = 1 << 30;
+    utils::limit_memory(cfg::get().hard_memory_limit * GB);
+
+    KMerData kmerData;
+    NormalClusterModel clusterModel;
+
+    {
+      TKMerDataEstimator estimator(kmerData, cfg::get());
+      estimator.Estimate();
+      clusterModel = estimator.GetClusterModel();
+    }
+
+    GammaPoissonLikelihoodCalcer::Factory calcerFactory(kmerData);
 
     INFO("Correcting reads.");
     using namespace hammer::correction;
-    SingleReadCorrector::NoDebug debug_pred;
-    SingleReadCorrector::SelectAll select_pred;
+    typename SingleReadsCorrector::NoDebug debug_pred;
+    typename SingleReadsCorrector::SelectAll select_pred;
     const auto& dataset = cfg::get().dataset;
     io::DataSet<> outdataset;
     size_t ilib = 0;
-    for (auto it = dataset.library_begin(), et = dataset.library_end(); it != et; ++it, ++ilib) {
+    for (auto it = dataset.library_begin(), et = dataset.library_end();
+         it != et; ++it, ++ilib) {
       const auto& lib = *it;
       auto outlib = lib;
       outlib.clear();
 
       size_t iread = 0;
       // First, correct all the paired FASTQ files
-      for (auto I = lib.paired_begin(), E = lib.paired_end(); I != E; ++I, ++iread) {
-          if (path::extension(I->first) == ".bam" || path::extension(I->second) == ".bam")
-              continue;
+      for (auto I = lib.paired_begin(), E = lib.paired_end(); I != E;
+           ++I, ++iread) {
+        if (fs::extension(I->first) == ".bam" ||
+            fs::extension(I->second) == ".bam") {
+          continue;
+        }
 
-          INFO("Correcting pair of reads: " << I->first << " and " << I->second);
+        INFO("Correcting pair of reads: " << I->first << " and " << I->second);
 
-          std::string usuffix = std::to_string(ilib) + "_" +
-                                std::to_string(iread) + ".cor.fasta";
+        std::string usuffix =
+            std::to_string(ilib) + "_" + std::to_string(iread) + ".cor.fasta";
 
-          std::string outcorl = path::append_path(cfg::get().output_dir, path::basename(I->first) + usuffix);
-          std::string outcorr = path::append_path(cfg::get().output_dir, path::basename(I->second) + usuffix);
+        std::string outcorl = fs::append_path(
+            cfg::get().output_dir, fs::basename(I->first) + usuffix);
+        std::string outcorr = fs::append_path(
+            cfg::get().output_dir, fs::basename(I->second) + usuffix);
 
-          io::PairedOutputSequenceStream ors(outcorl, outcorr);
+        io::PairedOutputSequenceStream ors(outcorl, outcorr);
 
-          io::SeparatePairedReadStream irs(I->first, I->second, 0, false, false);
-          PairedReadCorrector read_corrector(kmer_data, debug_pred, select_pred);
-          hammer::ReadProcessor(cfg::get().max_nthreads).Run(irs, read_corrector, ors);
+        io::SeparatePairedReadStream irs(I->first, I->second, 0, false, false);
+        PairedReadsCorrector read_corrector(kmerData, calcerFactory, debug_pred,
+                                            select_pred);
+        hammer::ReadProcessor(cfg::get().max_nthreads)
+            .Run(irs, read_corrector, ors);
 
-          outlib.push_back_paired(outcorl, outcorr);
+        outlib.push_back_paired(outcorl, outcorr);
       }
 
       // Second, correct all the single FASTQ files
-      for (auto I = lib.single_begin(), E = lib.single_end(); I != E; ++I, ++iread) {
-          if (path::extension(*I) == ".bam")
-              continue;
+      for (auto I = lib.single_begin(), E = lib.single_end(); I != E;
+           ++I, ++iread) {
+        if (fs::extension(*I) == ".bam") {
+          continue;
+        }
 
-          INFO("Correcting " << *I);
+        INFO("Correcting " << *I);
 
-          std::string usuffix = std::to_string(ilib) + "_" +
-                                std::to_string(iread) + ".cor.fasta";
+        std::string usuffix =
+            std::to_string(ilib) + "_" + std::to_string(iread) + ".cor.fasta";
 
-          std::string outcor = path::append_path(cfg::get().output_dir, path::basename(*I) + usuffix);
-          io::osequencestream ors(outcor);
+        std::string outcor = fs::append_path(cfg::get().output_dir,
+                                               fs::basename(*I) + usuffix);
+        io::OutputSequenceStream ors(outcor);
 
-          io::FileReadStream irs(*I, io::PhredOffset);
-          SingleReadCorrector read_corrector(kmer_data, debug_pred, select_pred);
-          hammer::ReadProcessor(cfg::get().max_nthreads).Run(irs, read_corrector, ors);
+        io::FileReadStream irs(*I, io::PhredOffset);
+        SingleReadsCorrector read_corrector(kmerData, calcerFactory, debug_pred,
+                                            select_pred);
+        hammer::ReadProcessor(cfg::get().max_nthreads)
+            .Run(irs, read_corrector, ors);
 
-          outlib.push_back_single(outcor);
+        outlib.push_back_single(outcor);
       }
 
       // Finally, correct all the BAM stuff in a row
-      for (auto I = lib.reads_begin(), E = lib.reads_end(); I != E; ++I, ++iread) {
-        if (path::extension(*I) != ".bam")
-              continue;
+      for (auto I = lib.reads_begin(), E = lib.reads_end(); I != E;
+           ++I, ++iread) {
+        if (fs::extension(*I) != ".bam") {
+          continue;
+        }
 
         INFO("Correcting " << *I);
 
-        std::string usuffix = std::to_string(ilib) + "_" +
-                              std::to_string(iread) + ".cor.fasta";
+        std::string usuffix =
+            std::to_string(ilib) + "_" + std::to_string(iread) + ".cor.fasta";
 
-        std::string outcor = path::append_path(cfg::get().output_dir, path::basename(*I) + usuffix);
-        io::osequencestream ors(outcor);
+        std::string outcor = fs::append_path(cfg::get().output_dir,
+                                               fs::basename(*I) + usuffix);
+        io::OutputSequenceStream ors(outcor);
 
         BamTools::BamReader bam_reader;
         bam_reader.Open(*I);
         auto header = bam_reader.GetHeader();
         bam_reader.Close();
 
-        SingleReadCorrector read_corrector(kmer_data, &header, debug_pred, select_pred);
+        SingleReadsCorrector read_corrector(kmerData, calcerFactory, &header,
+                                            debug_pred, select_pred);
         io::UnmappedBamStream irs(*I);
-        hammer::ReadProcessor(cfg::get().max_nthreads).Run(irs, read_corrector, ors);
+        hammer::ReadProcessor(cfg::get().max_nthreads)
+            .Run(irs, read_corrector, ors);
 
         outlib.push_back_single(outcor);
       }
@@ -305,20 +414,12 @@ int main(int argc, char** argv) {
     }
     cfg::get_writable().dataset = outdataset;
 
-    std::string fname = path::append_path(cfg::get().output_dir, "corrected.yaml");
+    std::string fname = fs::append_path(cfg::get().output_dir, "corrected.yaml");
     INFO("Saving corrected dataset description to " << fname);
     cfg::get_writable().dataset.save(fname);
-
-#if 0
-    std::sort(classes.begin(), classes.end(),  UfCmp());
-    for (size_t i = 0; i < classes.size(); ++i) {
-      auto& cluster = classes[i];
-      std::sort(cluster.begin(), cluster.end(), CountCmp(kmer_data));
-      dump(kmer_data, cluster);
-    }
-#endif
   } catch (std::bad_alloc const& e) {
-    std::cerr << "Not enough memory to run IonHammer. " << e.what() << std::endl;
+    std::cerr << "Not enough memory to run IonHammer. " << e.what()
+              << std::endl;
     return EINTR;
   } catch (std::exception const& e) {
     std::cerr << "Exception caught " << e.what() << std::endl;
diff --git a/src/projects/ionhammer/normal_quality_model.cpp b/src/projects/ionhammer/normal_quality_model.cpp
new file mode 100644
index 0000000..952f303
--- /dev/null
+++ b/src/projects/ionhammer/normal_quality_model.cpp
@@ -0,0 +1,14 @@
+//
+// Created by Vasiliy Ershov on 27/03/2017.
+//
+
+#include "normal_quality_model.hpp"
+
+using namespace n_normal_model;
+
+std::vector<double> NormalClusterModel::left_likelihoods_ = {
+    -9.98, -4.95, -3.95, -3.5, -3, -2.5, -2.2, -2};
+std::vector<double> NormalClusterModel::equal_likelihoods_ = {
+    -0.001, -0.001, -0.019, -0.05, -0.07, -0.15, -0.2, -0.25};
+std::vector<double> NormalClusterModel::right_likelihoods_ = {
+    -5.99, -5.95, -5, -4.35, -3.8, -3, -2.8, -2.5};
\ No newline at end of file
diff --git a/src/projects/ionhammer/normal_quality_model.hpp b/src/projects/ionhammer/normal_quality_model.hpp
new file mode 100644
index 0000000..456d350
--- /dev/null
+++ b/src/projects/ionhammer/normal_quality_model.hpp
@@ -0,0 +1,490 @@
+//
+// Created by Vasiliy Ershov on 08/11/2016.
+//
+
+#ifndef PROJECT_NORMAL_QUALITY_MODEL_HPP
+#define PROJECT_NORMAL_QUALITY_MODEL_HPP
+
+#include <common/utils/parallel/openmp_wrapper.h>
+#include <array>
+#include <boost/math/special_functions/binomial.hpp>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/trigamma.hpp>
+#include <vector>
+#include "config_struct.hpp"
+#include "kmer_data.hpp"
+#include "quality_thresholds_estimator.h"
+#include "thread_utils.h"
+#include "valid_hkmer_generator.hpp"
+//
+
+namespace n_normal_model {
+
+struct QualityTransform {
+  double bias_;
+
+  QualityTransform(double bias = 60.0) : bias_(bias) {}
+
+  double Apply(double quality, double count) const {
+    return quality / (count + 60);
+  }
+};
+
+class NormalDistribution {
+ private:
+  double mean_;
+  double sigma_sqr_;
+
+ public:
+  NormalDistribution(const NormalDistribution&) = default;
+
+  NormalDistribution& operator=(const NormalDistribution&) = default;
+
+  NormalDistribution(const double mean = 0, const double sigma = 1)
+      : mean_(mean), sigma_sqr_(sigma) {}
+
+  inline double GetMean() const { return mean_; }
+
+  inline double GetSigmaSqr() const { return sigma_sqr_; }
+
+  double LogLikelihood(double x) const {
+    return -0.5 *
+           ((x - mean_) * (x - mean_) / sigma_sqr_ + log(2 * M_PI * sigma_sqr_));
+  }
+
+  double LogLikelihoodFromStats(const double sum,
+                                const double sum2,
+                                const double weight) const {
+    return -0.5 * ((sum2 - 2 * sum * mean_ + weight * mean_ * mean_) / sigma_sqr_ +
+                   weight * log(2 * M_PI * sigma_sqr_));
+  }
+
+  static NormalDistribution FromStats(const double sum,
+                                      const double sum2,
+                                      const double weight) {
+    const double mu = sum / weight;
+    const double var = sum2 / weight - mu * mu;
+    return NormalDistribution(mu, var);
+  }
+};
+
+class NormalMixture {
+ private:
+  NormalDistribution first_;
+  NormalDistribution second_;
+  double first_weight_;
+
+ public:
+  NormalMixture() : first_weight_(0) {}
+
+  NormalMixture(const NormalDistribution& first,
+                const NormalDistribution& second,
+                double weight)
+      : first_(first), second_(second), first_weight_(weight) {}
+
+  const NormalDistribution& GetFirst() const { return first_; }
+
+  const NormalDistribution& GetSecond() const { return second_; }
+
+  double GetFirstWeight() const { return first_weight_; }
+
+  double LogLikelihood(double x) const {
+    return log(first_weight_ * exp(first_.LogLikelihood(x)) +
+               (1 - first_weight_) * exp(second_.LogLikelihood(x)));
+  }
+
+  double FirstComponentPosterior(double x) const {
+    double firstLL = first_.LogLikelihood(x) + log(first_weight_);
+    double secondLL = second_.LogLikelihood(x) + log(1.0 - first_weight_);
+    const double expDiff = exp(secondLL - firstLL);
+
+    return std::isfinite(expDiff) ? -log(1.0 + exp(secondLL - firstLL))
+                                  : firstLL - secondLL;
+  }
+};
+
+class Binarizer {
+ private:
+  std::vector<double> borders_;
+
+ public:
+  Binarizer() {
+    for (int i = 17; i < 30; ++i) {
+      borders_.push_back(i);
+    }
+  }
+
+  Binarizer(const vector<double>& borders) : borders_(borders) {}
+
+  int GetBin(double value) const {
+    uint index = 0;
+    while (index < borders_.size() && value > borders_[index]) {
+      ++index;
+    }
+    return index;
+  }
+
+  size_t GetBinCount() const { return borders_.size() + 1; }
+
+  double GetBorder(int bin) {
+    --bin;
+    bin = std::min(bin, (const int)(borders_.size() - 1));
+    if (bin < 0) {
+      return 0;
+    }
+    return borders_[bin];
+  }
+};
+
+class NormalClusterModel {
+ private:
+  std::vector<NormalMixture> mixtures_;
+  Binarizer binarizer_;
+  std::vector<double> median_qualities_;
+  QualityTransform trans_;
+  double lower_quality_threshold_;
+
+  static std::vector<double> left_likelihoods_;
+  static std::vector<double> equal_likelihoods_;
+  static std::vector<double> right_likelihoods_;
+
+ public:
+  NormalClusterModel() {}
+
+  NormalClusterModel(const std::vector<NormalMixture>& mixtures,
+                      const Binarizer& binarizer,
+                      const std::vector<double>& medianQualities,
+                      const QualityTransform& trans)
+      : mixtures_(mixtures),
+        binarizer_(binarizer),
+        median_qualities_(medianQualities),
+        trans_(trans) {
+    lower_quality_threshold_ = cfg::get().noise_filter_count_threshold;  // threshold >= 10 ? 1 : 0;
+  }
+
+  NormalClusterModel(const NormalClusterModel& other) = default;
+
+  NormalClusterModel& operator=(const NormalClusterModel&) = default;
+
+  bool NeedSubcluster(const hammer::KMerStat& stat) const {
+    return stat.count > 15 && GenomicLogLikelihood(stat) > -0.0001;
+  }
+
+  double StatTransform(const hammer::KMerStat& stat) const {
+    return trans_.Apply(stat.qual, stat.count);
+  }
+
+  double GenomicLogLikelihood(const hammer::KMerStat& stat) const {
+    return GenomicLogLikelihood(binarizer_.GetBin((double)GetKmerBinIdx(stat.kmer)),
+                                stat.qual, stat.count);
+  }
+
+  bool IsHighQuality(const hammer::KMerStat& stat) const {
+    const auto bin = binarizer_.GetBin((double)GetKmerBinIdx(stat.kmer));
+    return trans_.Apply(stat.qual, stat.count) <= median_qualities_[bin];
+  }
+
+  double GenomicLogLikelihood(int bin, double quality, double count) const {
+    if (count <= lower_quality_threshold_) {
+      return -1e5;
+    }
+    const double x = trans_.Apply(quality, count);
+    return mixtures_[bin].FirstComponentPosterior(x);
+  }
+
+  static size_t GetKmerBinIdx(const hammer::HKMer& kmer) {
+    if (kmer.size() > 21) {
+      return 1 + kmer.max_run_length();
+    } else {
+      return 0;
+    }
+  }
+
+  static double ErrorLogLikelihood(int from, int to) {
+    int diff = std::abs(from - to);
+    from = std::max(from, 0);
+    --from;
+    int sign = from > to ? -1 : 1;
+    from = std::min((int)equal_likelihoods_.size() - 1, from);
+    if (diff == 0) {
+      return equal_likelihoods_[from];
+    }
+    if (sign == -1) {
+      return left_likelihoods_[from] * diff;
+    }
+    return right_likelihoods_[from] * diff;
+  }
+};
+
+class NormalMixtureEstimator {
+ private:
+  uint num_threads_;
+  size_t max_iterations_;
+  bool calc_likelihoods_;
+
+ private:
+  std::vector<double> BuildPriors(const std::vector<double>& observations) const {
+    double threshold = SimpleTwoClassClustering::SimpleThresholdEstimation(
+                           observations.begin(), observations.end())
+                           .split_;
+
+    std::vector<double> priors(observations.size());
+
+#pragma omp parallel for num_threads(num_threads_)
+    for (size_t i = 0; i < observations.size(); ++i) {
+      priors[i] = observations[i] <= threshold ? 1 : 0;
+    }
+
+    return priors;
+  }
+
+  struct Stats {
+    double sum_left_ = 0;
+    double sum2_left_ = 0;
+    double weight_left_ = 0;
+    double sum_right_ = 0;
+    double sum2_right_ = 0;
+
+    Stats& operator+=(const Stats& other) {
+      if (this != &other) {
+        sum_left_ += other.sum_left_;
+        sum2_left_ += other.sum2_left_;
+        sum_right_ += other.sum_right_;
+        sum2_right_ += other.sum2_right_;
+        weight_left_ += other.weight_left_;
+      }
+      return *this;
+    }
+  };
+
+ public:
+  NormalMixtureEstimator(uint num_threads,
+                         size_t max_iterations,
+                          bool calc_likelihood)
+      : num_threads_(num_threads),
+        max_iterations_(max_iterations),
+        calc_likelihoods_(calc_likelihood) {}
+
+  NormalMixture Estimate(std::vector<double>& observations) const {
+    std::sort(observations.begin(), observations.end(), std::greater<double>());
+    observations.resize(observations.size());
+    std::reverse(observations.begin(), observations.end());
+
+    std::vector<double> priors = BuildPriors(observations);
+
+    NormalMixture mixture;
+
+    for (size_t iter = 0; iter < max_iterations_; ++iter) {
+      auto stats =
+          n_computation_utils::ParallelStatisticsCalcer<Stats>(num_threads_)
+              .Calculate(observations.size(),
+                         [&]() -> Stats { return Stats(); },
+                         [&](Stats& stat, size_t k) {
+                           const double x = observations[k];
+                           const double w = priors[k];
+                           stat.sum2_left_ += w * x * x;
+                           stat.sum_left_ += w * x;
+                           stat.weight_left_ += w;
+                           stat.sum2_right_ += (1 - w) * x * x;
+                           stat.sum_right_ += (1 - w) * x;
+                         });
+
+      mixture =
+          NormalMixture(NormalDistribution::FromStats(
+                             stats.sum_left_, stats.sum2_left_, stats.weight_left_),
+                         NormalDistribution::FromStats(
+                             stats.sum_right_, stats.sum2_right_,
+                             (double)observations.size() - stats.weight_left_),
+                         stats.weight_left_ / (double)observations.size());
+
+// expectation
+#pragma omp parallel for num_threads(num_threads_)
+      for (size_t i = 0; i < observations.size(); ++i) {
+        priors[i] = exp(mixture.FirstComponentPosterior(observations[i]));
+      }
+
+      if (calc_likelihoods_) {
+        double ll = 0;
+        for (size_t i = 0; i < observations.size(); ++i) {
+          const double x = observations[i];
+          ll += mixture.LogLikelihood(x);
+        }
+        INFO("LogLikelihood: " << ll);
+      }
+
+      if (iter == 0 || iter == (max_iterations_ - 1)) {
+        const double llFirst = mixture.GetFirst().LogLikelihoodFromStats(
+            stats.sum_left_, stats.sum2_left_, stats.weight_left_);
+        INFO("Likelihood first: " << llFirst);
+        const double llSecond = mixture.GetSecond().LogLikelihoodFromStats(
+            stats.sum_right_, stats.sum2_right_,
+            (double)observations.size() - stats.weight_left_);
+        INFO("Likelihood second: " << llSecond);
+        INFO("First weights: " << mixture.GetFirstWeight());
+      }
+    }
+    return mixture;
+  };
+};
+
+// this class estimate prior distribution.
+class ModelEstimator {
+ private:
+  const KMerData& data_;
+  uint num_threads_;
+  size_t max_iterations_;
+  bool is_calc_likelihood_;
+
+ public:
+  ModelEstimator(const KMerData& data,
+                 uint num_threads = 16,
+                 size_t maxIterations = 40,
+                 bool calc_likelihood = false)
+      : data_(data),
+        num_threads_(num_threads),
+        max_iterations_(maxIterations),
+        is_calc_likelihood_(calc_likelihood) {}
+
+  NormalClusterModel Estimate(
+      const std::vector<std::vector<size_t> >& clusters) {
+    QualityTransform trans;
+
+    std::vector<size_t> cluster_center;
+    {
+      cluster_center.resize(clusters.size());
+#pragma omp parallel for num_threads(num_threads_)
+      for (uint i = 0; i < clusters.size(); ++i) {
+        auto& cluster = clusters[i];
+
+        double best_qual =
+            trans.Apply(data_[cluster[0]].qual, data_[cluster[0]].count);
+        size_t bestIdx = cluster[0];
+
+        for (auto idx : cluster) {
+          const auto qual = trans.Apply(data_[idx].qual, data_[idx].count);
+          if (qual < best_qual ||
+              (qual == best_qual &&
+               data_[idx].kmer.size() < data_[bestIdx].kmer.size())) {
+            best_qual = qual;
+            bestIdx = idx;
+          }
+          cluster_center[i] = bestIdx;
+        }
+      }
+    }
+
+    std::vector<std::vector<double> > qualities;
+    qualities.reserve(16);
+    const size_t sampleMaxThreshold = (size_t)1e9;
+    const size_t min_sample_size = (size_t)1e4;
+
+    {
+      double skip_threshold = cfg::get().noise_filter_count_threshold;  // threshold >= 10 ? 1 : 0;
+
+      for (size_t i = 0; i < cluster_center.size(); ++i) {
+        const auto& stat = data_[cluster_center[i]];
+
+        if (stat.count <= skip_threshold) {
+          continue;
+        }
+        const size_t bin = NormalClusterModel::GetKmerBinIdx(stat.kmer);
+
+        if (bin >= qualities.size()) {
+          qualities.resize(bin + 1);
+        }
+
+        if (qualities[bin].size() > sampleMaxThreshold) {
+          continue;
+        }
+        auto trans_qual = trans.Apply(stat.qual, stat.count);
+        qualities[bin].push_back(trans_qual);
+      }
+    }
+
+    std::vector<NormalMixture> models;
+    std::vector<double> borders;
+    std::vector<double> median_qualities;
+
+    size_t total_count = 0;
+    for (const auto& qual : qualities) {
+      total_count += qual.size();
+    }
+    assert(qualities[1].size() == 0);
+
+    {
+      auto model = NormalMixtureEstimator(num_threads_, max_iterations_, is_calc_likelihood_).Estimate(qualities[0]);
+
+      const double median_quality = FindHighQualityThreshold(qualities[0], model);
+      INFO("For kmer length <= 21");
+      INFO("Median quality " << median_quality);
+      INFO("Sample size " << qualities[0].size());
+      INFO("Genomic dist: " << model.GetFirst().GetMean() << " "
+                            << model.GetFirst().GetSigmaSqr());
+      INFO("NonGenomic dist: " << model.GetSecond().GetMean() << " "
+                               << model.GetSecond().GetSigmaSqr());
+      models.push_back(model);
+      median_qualities.push_back(median_quality);
+      borders.push_back(0);
+      total_count -= qualities[0].size();
+    }
+
+    const auto len_limit = std::min(qualities.size(), 7UL);
+    for (uint max_run_len = 2; max_run_len < len_limit; ++max_run_len) {
+      if (total_count < min_sample_size) {
+        break;
+      }
+
+      const size_t bin = max_run_len + 1;
+      auto bin_qualities = qualities[bin];
+      total_count -= bin_qualities.size();
+
+      if (bin_qualities.size() < min_sample_size) {
+        if (bin + 1 < qualities.size()) {
+          qualities[bin + 1].insert(qualities[bin + 1].end(),
+                                    bin_qualities.begin(),
+                                    bin_qualities.end());
+        }
+        continue;
+      }
+
+      auto model = NormalMixtureEstimator(num_threads_, max_iterations_, is_calc_likelihood_).Estimate(bin_qualities);
+
+      const double median_quality = FindHighQualityThreshold(bin_qualities, model);
+
+      INFO("Sample size " << bin_qualities.size());
+      INFO("Median quality " << median_quality);
+      INFO("For max run length >= " << max_run_len);
+      INFO("Genomic dist: " << model.GetFirst().GetMean() << " "
+                            << model.GetFirst().GetSigmaSqr());
+      INFO("NonGenomic dist: " << model.GetSecond().GetMean() << " "
+                               << model.GetSecond().GetSigmaSqr());
+      median_qualities.push_back(median_quality);
+      models.push_back(model);
+      borders.push_back((double)bin);
+    }
+    borders.resize(borders.size() - 1);
+
+    return NormalClusterModel(models, Binarizer(borders), median_qualities,
+                               trans);
+  }
+
+  double FindHighQualityThreshold(const std::vector<double>& bin_quality,
+                                  const NormalMixture& model) const {
+    std::vector<double> good_samples;
+    good_samples.reserve(bin_quality.size());
+    for (size_t i = 0; i < bin_quality.size(); ++i) {
+      if (model.FirstComponentPosterior(bin_quality[i]) > -0.69) {
+        good_samples.push_back(bin_quality[i]);
+      }
+    }
+
+    const size_t quantile = (size_t)((double)good_samples.size() * cfg::get().dist_one_subcluster_alpha);
+    std::nth_element(good_samples.begin(), good_samples.begin() + quantile,
+                     good_samples.end());
+    return good_samples[quantile];
+  }
+};
+
+}  // namespace NNormalModel
+
+#endif  // PROJECT_NORMAL_QUALITY_MODEL_HPP
diff --git a/src/projects/ionhammer/penalty_estimator.hpp b/src/projects/ionhammer/penalty_estimator.hpp
new file mode 100644
index 0000000..b3c0870
--- /dev/null
+++ b/src/projects/ionhammer/penalty_estimator.hpp
@@ -0,0 +1,326 @@
+//***************************************************************************
+//* Copyright (c) 2015 Saint Petersburg State University
+//* Copyright (c) 2011-2014 Saint Petersburg Academic University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
+
+#ifndef __HAMMER_IT_PENALTY_ESTIMATOR_HPP__
+#define __HAMMER_IT_PENALTY_ESTIMATOR_HPP__
+
+#include "HSeq.hpp"
+#include "config_struct.hpp"
+#include "consensus.hpp"
+#include "flow_space_read.hpp"
+#include "hkmer_distance.hpp"
+#include "valid_hkmer_generator.hpp"
+
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/storage.hpp>
+#include <boost/optional.hpp>
+
+#include <bamtools/api/BamAlignment.h>
+#include <bamtools/api/SamHeader.h>
+#include "seqeval/BaseHypothesisEvaluator.h"
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <string>
+#include <vector>
+
+#if 1
+#include <iomanip>
+#include <iostream>
+#endif
+
+#include <atomic>
+#include <fstream>
+#include "gamma_poisson_model.hpp"
+#include "read_corrector_structs_new.h"
+
+namespace hammer {
+namespace correction {
+
+struct Interval {
+  size_t left_;
+  size_t right_;
+};
+
+class GammaPoissonLikelihoodCalcer {
+ private:
+  const n_gamma_poisson_model::GammaDistribution prior_;
+  n_gamma_poisson_model::PoissonGammaDistribution count_distribution_;
+  double upper_quantile_;
+  double lower_quantile_;
+  size_t noise_quantiles_lower_;
+  size_t noise_quantile_upper_;
+  double correction_penalty_;
+  double bad_kmer_penalty_;
+  const KMerData& data_;
+
+ public:
+  class PenaltyState {
+    friend class GammaPoissonLikelihoodCalcer;
+
+   private:
+    double likelihood_ = 0;
+    size_t last_correction_distances_ = 0;
+    size_t hkmer_distance_to_read_ = 0;
+    HKMer read_kmer_;
+
+   public:
+    double Penalty() const { return likelihood_; }
+  };
+
+ public:
+  class Factory {
+   private:
+    KMerData& data_;
+
+   public:
+    Factory(KMerData& data) : data_(data) {}
+
+    GammaPoissonLikelihoodCalcer operator()(const std::string& read) const {
+      ValidHKMerGenerator<hammer::K> generator(read.data(), nullptr,
+                                               read.length());
+
+      std::vector<size_t> counts;
+      double sum_count = 0;
+      double sum_weight = 0;
+
+      while (generator.HasMore()) {
+        size_t idx = data_.checking_seq_idx(generator.kmer());
+
+        if (idx != -1ULL) {
+          const auto& kmer_stat = data_[idx];
+          if (kmer_stat.skip()) {
+            counts.push_back(data_[idx].count);
+          }
+          const double p = exp(kmer_stat.posterior_genomic_ll);
+          sum_count += p * data_[idx].count;
+          sum_weight += p;
+        }
+        generator.Next();
+      }
+
+      n_gamma_poisson_model::GammaDistribution read_prior =
+          [&]() -> n_gamma_poisson_model::GammaDistribution {
+        if (counts.size() < 10) {
+          return n_gamma_poisson_model::GammaDistribution(sum_count + 0.1,
+                                                        sum_weight + 0.1);
+        } else {
+          return n_gamma_poisson_model::TClusterModelEstimator::EstimatePrior(
+              counts);
+        }
+      }();
+
+      return GammaPoissonLikelihoodCalcer(read_prior, data_);
+    }
+  };
+
+  using PenaltyCalcerFactory = Factory;
+
+  GammaPoissonLikelihoodCalcer(
+      const n_gamma_poisson_model::GammaDistribution& prior, const KMerData& data)
+      : prior_(prior), count_distribution_(prior_), data_(data) {
+    upper_quantile_ = count_distribution_.Quantile(1.0 - cfg::get().count_dist_skip_quantile);
+    lower_quantile_ = count_distribution_.Quantile(cfg::get().count_dist_skip_quantile);
+
+    const double eps = cfg::get().count_dist_eps;
+    noise_quantiles_lower_ = (size_t)max(count_distribution_.Quantile(eps), 1.0);
+    noise_quantile_upper_ = (size_t)count_distribution_.Quantile(1.0 - eps);
+
+    correction_penalty_ = cfg::get().correction_penalty;
+    bad_kmer_penalty_ = cfg::get().bad_kmer_penalty;
+    assert(lower_quantile_ < upper_quantile_);
+  }
+
+  inline void UpdateInitial(PenaltyState& state, const IonEvent& event,
+                            const hammer::KMerStat* const) const {
+    state.read_kmer_ <<= event.FixedHRun();
+  }
+
+  inline void Update(PenaltyState& state, const IonEvent& event,
+                     const hammer::KMerStat* const last_kmer_stats) const {
+    assert(event.fixed_size_ >= 0);
+
+    if (std::isinf(state.likelihood_)) {
+      return;
+    }
+
+    const size_t last_kmer_count = last_kmer_stats ? last_kmer_stats->count : 0;
+
+    const int bits = 4;
+    const uint dist =
+        min((const uint)std::abs(event.fixed_size_ - event.overserved_size_),
+            (uint)(1 << bits) - 1);
+
+    {
+      state.hkmer_distance_to_read_ += dist;
+      state.hkmer_distance_to_read_ -=
+          (state.last_correction_distances_ >> (bits * (hammer::K - 1))) &
+          ((1 << bits) - 1);
+      state.last_correction_distances_ =
+          ((state.last_correction_distances_ << bits) | (dist));
+      state.read_kmer_ <<= event.ObservedHRun();
+    }
+
+    if (state.hkmer_distance_to_read_ > hammer::K / 2) {
+      state.likelihood_ = -std::numeric_limits<double>::infinity();
+      return;
+    }
+
+    const bool is_good = last_kmer_stats ? last_kmer_stats->good() : false;
+
+    if (!is_good || (dist)) {
+      const size_t cnt = min(max(noise_quantiles_lower_, last_kmer_count), noise_quantile_upper_);
+
+      // state.Likelihood += dist * log(Model.ErrorRate(event.FixedSize));
+      state.likelihood_ += (double)state.hkmer_distance_to_read_ * correction_penalty_;
+      state.likelihood_ += count_distribution_.LogLikelihood(cnt);
+    }
+
+    if (!is_good) {
+      state.likelihood_ += bad_kmer_penalty_;
+    } else {
+      state.likelihood_ += std::max((double)(last_kmer_stats->posterior_genomic_ll), bad_kmer_penalty_);
+    }
+  }
+
+  inline bool Skip(const HKMer& kmer) const {
+    size_t idx = data_.checking_seq_idx(kmer);
+    if (idx == -1ULL) {
+      return false;
+    }
+    const auto& stat = data_[idx];
+
+    return stat.good() && (stat.count <= upper_quantile_) && (stat.count >= lower_quantile_) && !stat.dist_one_subcluster;
+  }
+
+  inline bool IsGood(const HKMer& kmer) const {
+    size_t idx = data_.checking_seq_idx(kmer);
+    if (idx == -1ULL) {
+      return false;
+    }
+
+    return data_[idx].good();
+  }
+
+  inline std::function<bool(const hammer::HKMer&)> Good() const {
+    return [this](const HKMer& hkMer) { return this->IsGood(hkMer); };
+  }
+
+  static PenaltyState CreateState(const bool, const uint) {
+    return PenaltyState();
+  }
+
+  std::string TrimLeft(const std::string& read) const {
+
+    ValidHKMerGenerator<K> generator(read.data(), nullptr, read.size());
+    size_t offset = 0;
+    while (generator.HasMore()) {
+      const auto& hkmer = generator.kmer();
+      if (IsGood(hkmer)) {
+        break;
+      }
+      offset += hkmer[0].len;
+      generator.Next();
+    }
+    const auto from = offset;//generator.pos() - generator.kmer().size();
+    if (from > 0) {
+      if (read[from - 1] == read[from])
+      {
+        assert(read[from - 1] != read[from]);
+      }
+    }
+    return read.substr(from);
+  }
+
+  std::string TrimBadQuality(const std::string& read) const {
+    return TrimLeft(ReverseComplement(TrimLeft(ReverseComplement(read))));
+  }
+
+  inline Interval SolidIsland(
+      ValidHKMerGenerator<K>& generator,
+      std::function<bool(const HKMer& kmer)> is_good_predicate) const {
+    size_t bestLeft = (size_t)-1ULL;
+    size_t bestRight = (size_t)-1ULL;
+    size_t solidLength = 0;
+
+    size_t leftPos = 0;
+    size_t rightPos = 0;
+
+    while (generator.HasMore()) {
+      const auto& hkmer = generator.kmer();
+      bool isGood = is_good_predicate(hkmer);
+
+      if (isGood) {
+        const auto lastHRunSize = hkmer[K - 1].len;
+        const auto hkmerSize = hkmer.size();
+        const auto hkmerStartPosition = generator.pos() - hkmerSize;
+        const auto prevEndPosition = generator.pos() - lastHRunSize;
+
+        if (prevEndPosition != rightPos) {
+          leftPos = hkmerStartPosition;
+        }
+        rightPos = generator.pos();
+
+        if (rightPos - leftPos > solidLength) {
+          bestLeft = leftPos;
+          bestRight = rightPos;
+          solidLength = rightPos - leftPos;
+        }
+      }
+      generator.Next();
+    }
+    return {bestLeft, bestRight};
+  }
+
+  inline Interval SolidIslandGood(ValidHKMerGenerator<K>& generator) const {
+    return SolidIsland(generator,
+                       [&](const HKMer& kmer) -> bool { return IsGood(kmer); });
+  }
+
+  inline Interval SolidIslandConservative(
+      ValidHKMerGenerator<K>& generator) const {
+    return SolidIsland(generator,
+                       [&](const HKMer& kmer) -> bool { return Skip(kmer); });
+  }
+
+  inline Interval SolidIsland(const std::string& read) const {
+    {
+      ValidHKMerGenerator<K> generator(&read[0], nullptr, read.size());
+      auto conservative = SolidIslandConservative(generator);
+      if (conservative.left_ != conservative.right_) {
+        return conservative;
+      }
+    }
+    {
+      ValidHKMerGenerator<K> generator(&read[0], nullptr, read.size());
+      return SolidIslandGood(generator);
+    }
+  }
+
+  inline Interval SolidIsland(const io::SingleRead& read) const {
+    {
+      ValidHKMerGenerator<K> generator(read);
+      auto conservative = SolidIslandConservative(generator);
+      if (conservative.left_ != conservative.right_) {
+        return conservative;
+      }
+    }
+    {
+      ValidHKMerGenerator<K> generator(read);
+      return SolidIslandGood(generator);
+    }
+  }
+};
+
+};  // namespace correction
+};  // namespace hammer
+#endif
diff --git a/src/projects/ionhammer/quality_metrics.cpp b/src/projects/ionhammer/quality_metrics.cpp
new file mode 100644
index 0000000..50338dc
--- /dev/null
+++ b/src/projects/ionhammer/quality_metrics.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Vasiliy Ershov on 10/07/16.
+//
+
+#include "quality_metrics.h"
diff --git a/src/projects/ionhammer/quality_metrics.h b/src/projects/ionhammer/quality_metrics.h
new file mode 100644
index 0000000..c9eb512
--- /dev/null
+++ b/src/projects/ionhammer/quality_metrics.h
@@ -0,0 +1,189 @@
+//
+// Created by Vasiliy Ershov on 10/07/16.
+//
+
+#ifndef PROJECT_QUALITY_METRICS_H
+#define PROJECT_QUALITY_METRICS_H
+
+#include "kmer_data.hpp"
+#include "reference.h"
+#include "subcluster.hpp"
+
+namespace hammer {
+
+struct TKmerQualitySample {
+  double quality_ = 0;
+  double posterior_ = 0;
+  size_t count_ = 0;
+  size_t idx_ = 0;
+
+  TKmerQualitySample(double quality, double posterior, size_t count, size_t idx)
+      : quality_(quality), posterior_(posterior), count_(count), idx_(idx) {}
+};
+
+class TKmerQualitySamples {
+ private:
+  std::vector<TKmerQualitySample> Samples;
+
+ public:
+  void Add(const TKmerQualitySample& sample) { Samples.push_back(sample); }
+
+  void PrintInfo(const std::string& message) const {
+    if (Samples.size() == 0) {
+      return;
+    }
+
+    std::vector<double> quality;
+    for (const auto& sample : Samples) {
+      quality.push_back(sample.quality_);
+    }
+
+    double sum = 0;
+    double sum2 = 0;
+    for (double q : quality) {
+      sum += q;
+      sum2 += q * q;
+    }
+    double mean = sum / ((double)quality.size());
+    double sd = sum2 / ((double)quality.size()) - mean * mean;
+
+    std::sort(quality.begin(), quality.end());
+
+    const size_t quantile99 = (size_t)((double)quality.size() * 0.99);
+    const size_t quantile001 = (size_t)((double)quality.size() * 0.001);
+    const auto quantile01 = (size_t)((double)quality.size() * 0.01);
+    const auto quantile999 = (size_t)((double)quality.size() * 0.999);
+    INFO(message << "\nmean\tmedian\tsd\t0.01\t0.99\t0.001\t0.999\n"
+                 << mean << "\t" << quality[quality.size() / 2] << "\t" << sd
+                 << "\t" << quality[quantile01] << "\t"
+                 << quality[quantile99] << "\t"
+                 << quality[quantile001] << "\t"
+                 << quality[quantile999]);
+  }
+
+  std::vector<TKmerQualitySample>::const_iterator begin() {
+    return Samples.begin();
+  }
+
+  std::vector<TKmerQualitySample>::const_iterator end() {
+    return Samples.end();
+  }
+};
+
+class ClusteringQuality {
+ private:
+  const TGenomReferenceOracle& oracle_;
+  const KMerData& data_;
+
+  HKMerSet singleton_kmers_;
+  HKMerSet non_singleton_kmers_;
+  HKMerSet center_cluster_kmers_;
+
+  HKMerSet good_kmers_;
+  HKMerSet bad_kmers_;
+
+  TKmerQualitySamples genomic_centers_;
+  TKmerQualitySamples non_genomic_centers_;
+
+ private:
+  static inline void AddKMer(const HKMer& kmer, HKMerSet& set) {
+    set.insert(kmer);
+    //    set.insert(!kmer);
+  }
+
+  void AddSingleton(const std::vector<size_t>& indices) {
+    assert(indices.size() == 1);
+    const auto& kmer = data_[indices[0]].kmer;
+    AddKMer(kmer, singleton_kmers_);
+  }
+
+  void AddNonSingleton(const std::vector<size_t>& indices) {
+    for (auto idx : indices) {
+      AddKMer(data_[idx].kmer, non_singleton_kmers_);
+    }
+  }
+
+ public:
+  ClusteringQuality(const TGenomReferenceOracle& oracle,
+                     const KMerData& kMerData)
+      : oracle_(oracle), data_(kMerData) {}
+
+  void AddCluster(const std::vector<size_t>& indices) {
+    HKMer center;
+    if (indices.size() == 1) {
+      AddSingleton(indices);
+      center = data_[indices[0]].kmer;
+    } else {
+      AddNonSingleton(indices);
+      center = TGenomicHKMersEstimator::Center(data_, indices);
+    }
+    AddKMer(center, center_cluster_kmers_);
+  }
+
+  void AddKMer(size_t idx) {
+    const KMerStat& kmerStat = data_[idx];
+    const auto& kmer = kmerStat.kmer;
+    bool isGood = kmerStat.good();
+
+#pragma omp critical
+    {
+      if (isGood) {
+        AddKMer(kmer, good_kmers_);
+      } else {
+        AddKMer(kmer, bad_kmers_);
+      }
+
+      TKmerQualitySample qualitySample = {kmerStat.qual,
+                                          exp(kmerStat.posterior_genomic_ll),
+                                          (size_t)kmerStat.count, idx};
+
+      if (oracle_.IsGenomic(kmer)) {
+        genomic_centers_.Add(qualitySample);
+      } else {
+        non_genomic_centers_.Add(qualitySample);
+      }
+    }
+  }
+
+  void Info() {
+    { oracle_.KMerSetStats(singleton_kmers_, "Singletons"); }
+    { oracle_.KMerSetStats(non_singleton_kmers_, "NonSingletons"); }
+    { oracle_.KMerSetStats(center_cluster_kmers_, "Center cluster kmers"); }
+
+    { oracle_.KMerSetStats(good_kmers_, "Good kmers"); }
+
+    { oracle_.KMerSetStats(bad_kmers_, "Bad not-filtered by clustering kmers"); }
+
+    {
+      //      GenomicCenters.PrintInfo("Genomic centers");
+      //      NonGenomicCenters.PrintInfo("Non genomic centers");
+
+      std::ofstream out("quality_samples.tsv");
+      out << "is_genomic\tlength\tmax_run_length\tquality\tposterior\tcount"
+          << std::endl;
+
+      for (const auto& entry : genomic_centers_) {
+        out << "1\t" << data_[entry.idx_].kmer.size() << "\t"
+            << MaxRunLength(data_[entry.idx_].kmer) << "\t" << entry.quality_
+            << "\t" << entry.posterior_ << "\t" << entry.count_ << "\n";
+      }
+
+      for (const auto& entry : non_genomic_centers_) {
+        out << "0\t" << data_[entry.idx_].kmer.size() << "\t"
+            << MaxRunLength(data_[entry.idx_].kmer) << "\t" << entry.quality_
+            << "\t" << entry.posterior_ << "\t" << entry.count_ << "\n";
+      }
+    }
+  }
+
+  int MaxRunLength(const HKMer& kmer) const {
+    int max_len = kmer[0].len;
+    for (uint i = 0; i < hammer::K; ++i) {
+      max_len = std::max(max_len, (int)kmer[i].len);
+    }
+    return max_len;
+  }
+};
+
+}  // namespace hammer
+#endif  // PROJECT_QUALITY_METRICS_H
\ No newline at end of file
diff --git a/src/projects/ionhammer/quality_thresholds_estimator.cpp b/src/projects/ionhammer/quality_thresholds_estimator.cpp
new file mode 100644
index 0000000..7a20aab
--- /dev/null
+++ b/src/projects/ionhammer/quality_thresholds_estimator.cpp
@@ -0,0 +1,5 @@
+//
+// Created by Vasiliy Ershov on 16/07/16.
+//
+
+#include "quality_thresholds_estimator.h"
diff --git a/src/projects/ionhammer/quality_thresholds_estimator.h b/src/projects/ionhammer/quality_thresholds_estimator.h
new file mode 100644
index 0000000..29775b7
--- /dev/null
+++ b/src/projects/ionhammer/quality_thresholds_estimator.h
@@ -0,0 +1,101 @@
+//
+// Created by Vasiliy Ershov on 16/07/16.
+//
+
+#ifndef PROJECT_QUALITY_THRESHOLDS_ESTIMATOR_H
+#define PROJECT_QUALITY_THRESHOLDS_ESTIMATOR_H
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+class SimpleTwoClassClustering {
+ private:
+  std::vector<double> samples_;
+
+  static inline double Score(double sum, double weight) {
+    return weight > 3 ? -sum * sum * (1 + 2 * log(weight + 1)) / weight : 0;
+  }
+
+  struct BestSplit {
+    double score_;
+    double split_;
+    double left_sum_;
+    double left_weight_;
+    double right_sum_;
+    double right_weight_;
+  };
+
+ public:
+  SimpleTwoClassClustering(size_t maxSize = 100) { samples_.reserve(maxSize); }
+
+  void Add(double sample) { samples_.push_back(sample); }
+
+  double FindBestSpit() {
+    std::sort(samples_.begin(), samples_.end());
+    auto bestSplit = SimpleThresholdEstimation(samples_.begin(), samples_.end());
+    return bestSplit.split_;
+  }
+
+  double EstimateAlpha() {
+    double minSample = 0;
+    std::sort(samples_.begin(), samples_.end());
+
+    for (auto sampl : samples_) {
+      minSample = std::min(sampl, minSample);
+    }
+    auto bestSplit = SimpleThresholdEstimation(samples_.begin(), samples_.end());
+    const double p = 0.5;
+    double alpha = log(1.0 - p) / bestSplit.split_;
+    return alpha;
+  }
+
+  // it's simple decision tree with quality target
+  static BestSplit SimpleThresholdEstimation(std::vector<double>::const_iterator from,
+                                              std::vector<double>::const_iterator to) {
+    const double total_sum = [&]() -> double {
+      double sum = 0;
+      for (auto sorted_samples_iterator = from; sorted_samples_iterator != to; ++sorted_samples_iterator) {
+        const auto sample = *sorted_samples_iterator;
+        sum += sample;
+      }
+      return sum;
+    }();
+    const double total_weight = (double)(to - from);
+
+    double best_score = 0;
+    double best_left_sum = 0;
+    double best_left_weight = 0;
+    double best_split = 0;
+
+    double sum = 0;
+    double weight = 0;
+
+    for (auto sorted_samples_iterator = from; sorted_samples_iterator != to; ++sorted_samples_iterator) {
+      const auto sample = *sorted_samples_iterator;
+      sum += sample;
+      ++weight;
+
+      const double right_leaf_sum = total_sum - sum;
+      const double right_leaf_weight = total_weight - weight;
+      const double split_score =
+          Score(sum, weight) + Score(right_leaf_sum, right_leaf_weight);
+
+      if (split_score <= best_score) {
+        best_score = split_score;
+        best_left_weight = weight;
+        best_left_sum = sum;
+        best_split = sample;
+      }
+    }
+
+    return {best_score,
+            best_split,
+            best_left_sum,
+            best_left_weight,
+            total_sum - best_left_sum,
+            total_weight - best_left_weight};
+  }
+};
+
+#endif  // PROJECT_QUALITY_THRESHOLDS_ESTIMATOR_H
diff --git a/src/projects/ionhammer/read_corrector.hpp b/src/projects/ionhammer/read_corrector.hpp
index e06df5b..25c5bd2 100644
--- a/src/projects/ionhammer/read_corrector.hpp
+++ b/src/projects/ionhammer/read_corrector.hpp
@@ -9,35 +9,35 @@
 #define __HAMMER_IT_READ_CORRECTOR_HPP__
 
 #include "HSeq.hpp"
+#include "config_struct.hpp"
+#include "consensus.hpp"
 #include "flow_space_read.hpp"
 #include "hkmer_distance.hpp"
-#include "consensus.hpp"
-#include "valid_hkmer_generator.hpp"
-#include "config_struct.hpp"
 #include "io/reads/single_read.hpp"
+#include "valid_hkmer_generator.hpp"
 
-#include <boost/optional.hpp>
 #include <boost/numeric/ublas/matrix.hpp>
 #include <boost/numeric/ublas/storage.hpp>
+#include <boost/optional.hpp>
 
 #include <bamtools/api/BamAlignment.h>
 #include <bamtools/api/SamHeader.h>
 #include "seqeval/BaseHypothesisEvaluator.h"
 
+#include <algorithm>
+#include <cassert>
 #include <deque>
-#include <vector>
+#include <fstream>
 #include <iterator>
 #include <limits>
-#include <cassert>
 #include <list>
 #include <string>
-#include <algorithm>
-#include <fstream>
+#include <vector>
 
 #if 1
-#include "sequence/nucl.hpp"
-#include <iostream>
 #include <iomanip>
+#include <iostream>
+#include "sequence/nucl.hpp"
 #endif
 
 namespace hammer {
@@ -50,16 +50,14 @@ typedef std::vector<ScoreMatrix> ScoreStorage;
 
 template <typename It1, typename It2>
 static bool exactAlignH(It1 a_begin, It1 a_initial_pos, It1 a_end,
-                        It2 b_initial_pos, It2 /*b_end*/,
-                        uint8_t max_offset, uint8_t n_cmp, int* p_offset)
-{
+                        It2 b_initial_pos, It2 /*b_end*/, uint8_t max_offset,
+                        uint8_t n_cmp, int *p_offset) {
   int M = max_offset * 2 + 1;
   for (int i = 0; i < M; i++) {
-    int offset = (i / 2) * ((i & 1) ? 1 : -1); // 0, -1, 1, -2, 2, ...
+    int offset = (i / 2) * ((i & 1) ? 1 : -1);  // 0, -1, 1, -2, 2, ...
     auto a_it = a_initial_pos + offset;
     auto b_it = b_initial_pos;
-    if (a_it < a_begin || a_it + n_cmp > a_end)
-      continue;
+    if (a_it < a_begin || a_it + n_cmp > a_end) continue;
     bool match = true;
     for (size_t j = 0; j < n_cmp; j++)
       if ((a_it + j)->raw != (b_it + j)->raw) {
@@ -76,22 +74,19 @@ static bool exactAlignH(It1 a_begin, It1 a_initial_pos, It1 a_end,
 
 template <typename It1, typename It2>
 static int overlapAlignH(It1 a_begin, It1 a_end, It2 b_begin, It2 b_end,
-                         uint8_t max_offset)
-{
+                         uint8_t max_offset) {
   // TODO: use dynamic programming
   int M = max_offset * 2 + 1;
   int best_offset = 0;
   int best_score = 0;
   for (int i = 0; i < M; i++) {
-    int offset = (i / 2) * ((i & 1) ? 1 : -1); // 0, -1, 1, -2, 2, ...
+    int offset = (i / 2) * ((i & 1) ? 1 : -1);  // 0, -1, 1, -2, 2, ...
     auto a_it = offset < 0 ? a_begin : a_begin + offset;
     auto b_it = offset < 0 ? b_begin - offset : b_begin;
-    if (b_it < b_begin || a_it >= a_end)
-        continue;
+    if (b_it < b_begin || a_it >= a_end) continue;
     int score = 0;
-    for ( ; a_it != a_end && b_it != b_end; ++a_it, ++b_it)
-      if (a_it->nucl == b_it->nucl)
-        score += std::min(a_it->len, b_it->len);
+    for (; a_it != a_end && b_it != b_end; ++a_it, ++b_it)
+      if (a_it->nucl == b_it->nucl) score += std::min(a_it->len, b_it->len);
     score -= i / 4;
     if (score > best_score) {
       best_offset = offset;
@@ -101,7 +96,6 @@ static int overlapAlignH(It1 a_begin, It1 a_end, It2 b_begin, It2 b_end,
   return best_offset;
 }
 
-
 struct Score {
   short value;
   short dir;
@@ -110,16 +104,15 @@ struct Score {
 
 #if 1
 template <typename It1, typename It2>
-static void dump(boost::numeric::ublas::matrix<Score> &scores,
-                 It1 x_begin, It1 x_end, It2 y_begin, It2 y_end) {
+static void dump(boost::numeric::ublas::matrix<Score> &scores, It1 x_begin,
+                 It1 x_end, It2 y_begin, It2 y_end) {
   std::cerr << "        ";
   for (auto it = x_begin; it != x_end; ++it)
     std::cerr << std::setw(3) << int(it->len) << nucl(it->nucl);
   std::cerr << "\n    ";
   auto m = x_end - x_begin;
   auto n = y_end - y_begin;
-  for (int i = 0; i <= m; i++)
-    std::cerr << std::setw(4) << scores(i, 0).value;
+  for (int i = 0; i <= m; i++) std::cerr << std::setw(4) << scores(i, 0).value;
   std::cerr << '\n';
   for (int i = 1; i <= n; i++) {
     auto run = *(y_begin + i - 1);
@@ -132,33 +125,29 @@ static void dump(boost::numeric::ublas::matrix<Score> &scores,
 #endif
 
 template <typename It1, typename It2>
-static int alignH(It1 read_begin, It1 read_end,
-                  It2 consensus_begin, It2 consensus_end,
-                  int approx_read_offset, size_t n_skip_consensus,
-                  uint8_t n_side = 5, uint8_t n_cmp = 8) {
-
+static int alignH(It1 read_begin, It1 read_end, It2 consensus_begin,
+                  It2 consensus_end, int approx_read_offset,
+                  size_t n_skip_consensus, uint8_t n_side = 5,
+                  uint8_t n_cmp = 8) {
   int left_offset = n_side;
   int read_len = int(read_end - read_begin);
   int consensus_len = int(consensus_end - consensus_begin);
 
   It1 x_begin = read_begin + std::max(approx_read_offset - n_side, 0);
-  if (x_begin == read_begin)
-    left_offset = approx_read_offset;
+  if (x_begin == read_begin) left_offset = approx_read_offset;
 
   if (approx_read_offset - n_side + n_cmp >= read_len) {
     x_begin = read_end - std::min(n_cmp + 2 * n_side, read_len);
     left_offset = int(read_begin + approx_read_offset - x_begin);
   }
 
-  auto x_end = x_begin + std::min(int(2 * n_side + n_cmp),
-                                  int(read_end - x_begin));
+  auto x_end =
+      x_begin + std::min(int(2 * n_side + n_cmp), int(read_end - x_begin));
 
-  auto y_begin = consensus_begin +
-    std::min(int(n_skip_consensus), consensus_len);
-  if (y_begin == consensus_end)
-      return 0; // weird situation
-  auto y_end = y_begin + std::min(int(n_cmp),
-                                  int(consensus_end - y_begin));
+  auto y_begin =
+      consensus_begin + std::min(int(n_skip_consensus), consensus_len);
+  if (y_begin == consensus_end) return 0;  // weird situation
+  auto y_end = y_begin + std::min(int(n_cmp), int(consensus_end - y_begin));
 
   // glocal alignment of homopolymer runs
   const short kDirUpLeft = 0;
@@ -256,10 +245,9 @@ static int alignH(It1 read_begin, It1 read_end,
 
   int min_acceptable_score = ((kNuclMatch + kFullMatch) * n_cmp * 4) / 5;
   if (scores(highest_x, highest_y).value < min_acceptable_score && n_cmp < 16U)
-    return alignH(read_begin, read_end,
-                  consensus_begin, consensus_end,
-                  approx_read_offset, n_skip_consensus,
-                  n_side, uint8_t(n_cmp * 2));
+    return alignH(read_begin, read_end, consensus_begin, consensus_end,
+                  approx_read_offset, n_skip_consensus, n_side,
+                  uint8_t(n_cmp * 2));
 
   int x = int(highest_x);
   int y = int(highest_y);
@@ -267,11 +255,14 @@ static int alignH(It1 read_begin, It1 read_end,
     int dir = scores(x, y).dir;
     switch (dir) {
       case kDirUp:
-        --x; break;
+        --x;
+        break;
       case kDirLeft:
-        --y; break;
+        --y;
+        break;
       case kDirUpLeft:
-        --x, --y; break;
+        --x, --y;
+        break;
       default:
         break;
     }
@@ -287,43 +278,43 @@ static int alignH(It1 read_begin, It1 read_end,
 
 // Not used now
 class HKMerProlonger {
-  const KMerData& kmer_data_;
+  const KMerData &kmer_data_;
 
  public:
   struct RightSide {
     static size_t changingPosition() { return hammer::K - 1; }
     static hammer::HKMer shift(const hammer::HKMer &kmer) {
       hammer::HKMer res;
-      for (size_t i = 1; i < hammer::K; ++i)
-        res[i - 1] = kmer[i];
+      for (size_t i = 1; i < hammer::K; ++i) res[i - 1] = kmer[i];
       return res;
     }
     template <typename T, typename U>
-    static void append(T& cont, U obj) { cont.push_back(obj); }
+    static void append(T &cont, U obj) {
+      cont.push_back(obj);
+    }
   };
 
   struct LeftSide {
     static size_t changingPosition() { return 0; }
     static hammer::HKMer shift(const hammer::HKMer &kmer) {
       hammer::HKMer res;
-      for (size_t i = 1; i < hammer::K; ++i)
-        res[i] = kmer[i - 1];
+      for (size_t i = 1; i < hammer::K; ++i) res[i] = kmer[i - 1];
       return res;
     }
     template <typename T, typename U>
-    static void append(T& cont, U obj) { cont.push_front(obj); }
+    static void append(T &cont, U obj) {
+      cont.push_front(obj);
+    }
   };
 
  public:
-
   /// @param[in] seed               kmer to prolong
   /// @param[in] bases_to_recover   maximum number of bases to recover
   template <typename Side>
   std::deque<hammer::HomopolymerRun> prolong(const hammer::HKMer &seed,
                                              size_t bases_to_recover) {
     std::deque<hammer::HomopolymerRun> good_runs(hammer::K);
-    for (size_t i = 0; i < hammer::K; ++i)
-      good_runs[i] = seed[i];
+    for (size_t i = 0; i < hammer::K; ++i) good_runs[i] = seed[i];
 
     auto good_kmer = seed;
     auto changing_pos = Side::changingPosition();
@@ -338,8 +329,7 @@ class HKMerProlonger {
       auto kmer = Side::shift(good_kmer);
 
       for (size_t nucl = 0; nucl < 4; ++nucl) {
-        if (nucl == good_kmer[changing_pos].nucl)
-          continue;
+        if (nucl == good_kmer[changing_pos].nucl) continue;
         for (size_t len = 1; len <= 4; ++len) {
           kmer[changing_pos] = hammer::HomopolymerRun(nucl, len);
           auto &k = kmer_data_[kmer];
@@ -366,14 +356,14 @@ class HKMerProlonger {
   }
 
  public:
-  HKMerProlonger(const KMerData& kmer_data) : kmer_data_(kmer_data) {}
+  HKMerProlonger(const KMerData &kmer_data) : kmer_data_(kmer_data) {}
 };
 
 static const double kLowScoreThreshold = 1.0;
 
 class CorrectedRead {
-  FlowSpaceRead raw_read_; // Uncorrected read
-  const KMerData& kmer_data_;
+  FlowSpaceRead raw_read_;  // Uncorrected read
+  const KMerData &kmer_data_;
   bool debug_mode_;
 
   // Stores runs after joining chunks
@@ -381,19 +371,15 @@ class CorrectedRead {
 
   // Contiguous part of read with strong consensus
   struct ConsensusChunk {
-    int approx_read_offset; // in the vector of raw read runs
+    int approx_read_offset;  // in the vector of raw read runs
     int approx_end_read_offset_;
     unsigned rollback_end;  // remove if don't align well
 
     int initial_read_offset_;
 
-    enum {
-      kChunkLeftAligned,
-      kChunkRightAligned,
-      kChunkNotAligned
-    } alignment;
+    enum { kChunkLeftAligned, kChunkRightAligned, kChunkNotAligned } alignment;
 
-    const FlowSpaceRead& raw_read;
+    const FlowSpaceRead &raw_read;
     size_t trimmed_left;
     size_t trimmed_right;
     bool debug_mode;
@@ -401,24 +387,21 @@ class CorrectedRead {
     std::vector<hammer::HomopolymerRun> consensus;
     std::vector<double> consensus_scores;
 
-    int raw_start_offset() const {
-      return initial_read_offset_;
-    }
+    int raw_start_offset() const { return initial_read_offset_; }
 
-    ConsensusChunk(int initial_read_offset,
-                   int approximate_read_offset,
-                   int approximate_end_read_offset,
-                   const ScoreStorage &scores,
-                   unsigned rollback_end,
-                   const FlowSpaceRead &read,
+    ConsensusChunk(int initial_read_offset, int approximate_read_offset,
+                   int approximate_end_read_offset, const ScoreStorage &scores,
+                   unsigned rollback_end, const FlowSpaceRead &read,
                    bool debug_mode)
-      :   approx_read_offset(approximate_read_offset),
+        : approx_read_offset(approximate_read_offset),
           approx_end_read_offset_(approximate_end_read_offset),
           rollback_end(rollback_end),
           initial_read_offset_(initial_read_offset),
-          alignment(kChunkNotAligned), raw_read(read),
-          trimmed_left(0), trimmed_right(0), debug_mode(debug_mode)
-    {
+          alignment(kChunkNotAligned),
+          raw_read(read),
+          trimmed_left(0),
+          trimmed_right(0),
+          debug_mode(debug_mode) {
       bool left_trim = true;
       for (size_t i = 0; i < scores.size(); ++i) {
         auto run = hammer::iontorrent::consensus(scores[i]);
@@ -431,8 +414,10 @@ class CorrectedRead {
         }
 
         if (debug_mode && left_trim) {
-            std::cerr << "[ConsensusChunk] trimmed from left: " << trimmed_left << std::endl;
-            std::cerr << "[ConsensusChunk] approx. read offset: " << approx_read_offset << std::endl;
+          std::cerr << "[ConsensusChunk] trimmed from left: " << trimmed_left
+                    << std::endl;
+          std::cerr << "[ConsensusChunk] approx. read offset: "
+                    << approx_read_offset << std::endl;
         }
 
         left_trim = false;
@@ -442,13 +427,11 @@ class CorrectedRead {
       }
 
       size_t right_end = consensus_scores.size();
-      if (right_end == 0)
-        return;
+      if (right_end == 0) return;
 
       while (consensus_scores[right_end - 1] <= kLowScoreThreshold) {
         --right_end;
-        if (right_end == 0)
-          break;
+        if (right_end == 0) break;
       }
 
       trimmed_right = consensus.size() - right_end;
@@ -456,39 +439,38 @@ class CorrectedRead {
       consensus_scores.resize(right_end);
     }
 
-    void AlignLeftEndAgainstRead(size_t skip=0) {
-      const auto& data = raw_read.data();
+    void AlignLeftEndAgainstRead(size_t skip = 0) {
+      const auto &data = raw_read.data();
 
-      int offset = alignH(data.begin(), data.end(),
-                          consensus.begin(), consensus.end(),
-                          approx_read_offset, skip);
+      int offset = alignH(data.begin(), data.end(), consensus.begin(),
+                          consensus.end(), approx_read_offset, skip);
 
       if (debug_mode) {
-        std::cerr << "[approx. read offset (left)] before: " << approx_read_offset << "; after: "
-                  << approx_read_offset + offset << std::endl;
+        std::cerr << "[approx. read offset (left)] before: "
+                  << approx_read_offset
+                  << "; after: " << approx_read_offset + offset << std::endl;
       }
 
       approx_read_offset += offset;
       alignment = kChunkLeftAligned;
     }
 
-    void AlignRightEndAgainstRead(size_t skip=0) {
-      const auto& data = raw_read.data();
+    void AlignRightEndAgainstRead(size_t skip = 0) {
+      const auto &data = raw_read.data();
       int position_on_read = approx_end_read_offset_ - 1;
-      int offset = alignH(data.rbegin(), data.rend(),
-                          consensus.rbegin(), consensus.rend(),
+      int offset = alignH(data.rbegin(), data.rend(), consensus.rbegin(),
+                          consensus.rend(),
                           int(data.size()) - 1 - position_on_read, skip);
       if (debug_mode) {
-        std::cerr << "[approx. read offset (right)] before: " << approx_read_offset << "; after: "
-                  << approx_read_offset - offset << std::endl;
+        std::cerr << "[approx. read offset (right)] before: "
+                  << approx_read_offset
+                  << "; after: " << approx_read_offset - offset << std::endl;
       }
       approx_read_offset -= offset;
       alignment = kChunkRightAligned;
     }
 
-    int approx_end_read_offset() const {
-      return approx_end_read_offset_;
-    }
+    int approx_end_read_offset() const { return approx_end_read_offset_; }
 
     int approx_end_read_offset_untrimmed() const {
       return approx_end_read_offset() + int(trimmed_right);
@@ -505,38 +487,37 @@ class CorrectedRead {
       rollback_end = 0;
     }
 
-    bool DoMerge(ConsensusChunk& chunk) {
+    bool DoMerge(ConsensusChunk &chunk) {
       int right_end_offset = approx_end_read_offset();
 
       if (debug_mode) {
-        std::cerr << "============== Merging chunks ===============" << std::endl;
-        std::cerr << "(" << approx_read_offset << " .. " << right_end_offset << ")";
-        std::cerr << " -- (" << chunk.approx_read_offset << " .. " << chunk.approx_end_read_offset() << ")" << std::endl;
+        std::cerr << "============== Merging chunks ==============="
+                  << std::endl;
+        std::cerr << "(" << approx_read_offset << " .. " << right_end_offset
+                  << ")";
+        std::cerr << " -- (" << chunk.approx_read_offset << " .. "
+                  << chunk.approx_end_read_offset() << ")" << std::endl;
 
         int white_l = 0;
         for (int i = right_end_offset - 1; i >= 0; --i)
           white_l += raw_read[i].len;
         for (size_t i = 0; i < consensus.size(); ++i)
           white_l -= consensus[i].len;
-        for (int i = 0; i < white_l; ++i)
-          std::cerr << ' ';
+        for (int i = 0; i < white_l; ++i) std::cerr << ' ';
         for (size_t i = std::max(-white_l, 0); i < consensus.size(); ++i)
           std::cerr << consensus[i].str();
         std::cerr << std::endl;
 
         for (int i = 0; i < chunk.approx_read_offset; ++i)
-          for (int j = 0; j < raw_read[i].len; ++j)
-            std::cerr << ' ';
+          for (int j = 0; j < raw_read[i].len; ++j) std::cerr << ' ';
         for (size_t i = 0; i < chunk.consensus.size(); ++i)
           std::cerr << chunk.consensus[i].str();
         std::cerr << std::endl;
       }
 
       if (right_end_offset <= chunk.approx_read_offset) {
-
         for (int i = right_end_offset; i < chunk.approx_read_offset; ++i) {
-          if (i >= static_cast<int>(raw_read.size()))
-            return false;
+          if (i >= static_cast<int>(raw_read.size())) return false;
           consensus.push_back(raw_read[i]);
           alignment = kChunkNotAligned;
 
@@ -544,8 +525,8 @@ class CorrectedRead {
           consensus_scores.push_back(0);
         }
 
-        consensus.insert(consensus.end(),
-                         chunk.consensus.begin(), chunk.consensus.end());
+        consensus.insert(consensus.end(), chunk.consensus.begin(),
+                         chunk.consensus.end());
 
         consensus_scores.insert(consensus_scores.end(),
                                 chunk.consensus_scores.begin(),
@@ -553,14 +534,11 @@ class CorrectedRead {
 
       } else {
         int overlap = right_end_offset - chunk.approx_read_offset;
-        overlap -= overlapAlignH(consensus.end() - overlap,
-                                 consensus.end(),
+        overlap -= overlapAlignH(consensus.end() - overlap, consensus.end(),
                                  chunk.consensus.begin(),
-                                 chunk.consensus.begin() + overlap,
-                                 5);
+                                 chunk.consensus.begin() + overlap, 5);
 
-        if (overlap > static_cast<int>(chunk.consensus.size()))
-          return false;
+        if (overlap > static_cast<int>(chunk.consensus.size())) return false;
 
         if (overlap < 0) {
           chunk.approx_read_offset = right_end_offset - overlap;
@@ -573,7 +551,8 @@ class CorrectedRead {
         // FIXME
         if (overlap > 0 && rollback_end > 0) {
           for (int i = 0; i < overlap; i++) {
-            if (n_runs - overlap + i < 0 || n_runs - overlap + i >= consensus.size())
+            if (n_runs - overlap + i < 0 ||
+                n_runs - overlap + i >= consensus.size())
               continue;
             auto left_run = consensus[n_runs - overlap + i];
             auto right_run = chunk.consensus[i];
@@ -586,11 +565,10 @@ class CorrectedRead {
         }
 
         if (overlap >= 3 && n_runs > overlap) {
-          for ( ; n_trim < overlap / 3; ++n_trim) {
+          for (; n_trim < overlap / 3; ++n_trim) {
             auto score1 = consensus_scores[n_runs - n_trim - 1];
             auto score2 = chunk.consensus_scores[overlap - n_trim - 1];
-            if (score1 > score2)
-              break;
+            if (score1 > score2) break;
           }
 
           consensus.resize(consensus.size() - n_trim);
@@ -601,43 +579,37 @@ class CorrectedRead {
                          chunk.consensus.begin() + overlap - n_trim,
                          chunk.consensus.end());
 
-        consensus_scores.insert(consensus_scores.end(),
-                                chunk.consensus_scores.begin() + overlap - n_trim,
-                                chunk.consensus_scores.end());
+        consensus_scores.insert(
+            consensus_scores.end(),
+            chunk.consensus_scores.begin() + overlap - n_trim,
+            chunk.consensus_scores.end());
       }
 
       approx_end_read_offset_ = chunk.approx_end_read_offset();
       return true;
     }
 
-    bool MergeWithDisjointChunk(ConsensusChunk& chunk) {
-      if (debug_mode)
-        std::cerr << "[MergeWithDisjointChunk]" << std::endl;
+    bool MergeWithDisjointChunk(ConsensusChunk &chunk) {
+      if (debug_mode) std::cerr << "[MergeWithDisjointChunk]" << std::endl;
       AlignRightEndAgainstRead();
-      if (chunk.alignment != kChunkLeftAligned)
-        chunk.AlignLeftEndAgainstRead();
+      if (chunk.alignment != kChunkLeftAligned) chunk.AlignLeftEndAgainstRead();
       return DoMerge(chunk);
     }
 
-    bool MergeWithOverlappingChunk(ConsensusChunk& chunk) {
-      if (debug_mode)
-        std::cerr << "[MergeWithOverlappingChunk]" << std::endl;
+    bool MergeWithOverlappingChunk(ConsensusChunk &chunk) {
+      if (debug_mode) std::cerr << "[MergeWithOverlappingChunk]" << std::endl;
       int right_end_offset = approx_end_read_offset_;
       size_t overlap = right_end_offset - chunk.approx_read_offset;
-      if (overlap > chunk.consensus_scores.size())
-        return false;
+      if (overlap > chunk.consensus_scores.size()) return false;
 
       AlignRightEndAgainstRead();
-      if (chunk.alignment != kChunkLeftAligned)
-        chunk.AlignLeftEndAgainstRead();
+      if (chunk.alignment != kChunkLeftAligned) chunk.AlignLeftEndAgainstRead();
       return DoMerge(chunk);
     }
 
    public:
-
-    bool TryMergeWith(ConsensusChunk& chunk) {
-      if (chunk.consensus.empty())
-        return true;
+    bool TryMergeWith(ConsensusChunk &chunk) {
+      if (chunk.consensus.empty()) return true;
 
       alignment = kChunkNotAligned;
       int right_end_offset = approx_end_read_offset_;
@@ -647,17 +619,14 @@ class CorrectedRead {
       else
         return MergeWithOverlappingChunk(chunk);
     }
-
   };
 
   // Chunks where strong consensus was obtained
   std::list<ConsensusChunk> chunks_;
   int trimmed_by_gen_;
 
-  void PushChunk(const ScoreStorage &scores,
-                 int initial_read_offset,
-                 int approx_read_offset,
-                 int approx_end_read_offset,
+  void PushChunk(const ScoreStorage &scores, int initial_read_offset,
+                 int approx_read_offset, int approx_end_read_offset,
                  unsigned rollback_end) {
     chunks_.push_back(ConsensusChunk(initial_read_offset, approx_read_offset,
                                      approx_end_read_offset, scores,
@@ -666,8 +635,7 @@ class CorrectedRead {
       auto &consensus = chunks_.back().consensus;
       size_t len = consensus.size();
       size_t nucl_len = 0;
-      for (size_t i = 0; i < len; ++i)
-        nucl_len += consensus[i].len;
+      for (size_t i = 0; i < len; ++i) nucl_len += consensus[i].len;
     }
 
     chunks_.back().AlignLeftEndAgainstRead();
@@ -675,9 +643,7 @@ class CorrectedRead {
       trimmed_by_gen_ = chunks_.back().raw_start_offset();
   }
 
-  const ConsensusChunk& LastChunk() const {
-    return chunks_.back();
-  }
+  const ConsensusChunk &LastChunk() const { return chunks_.back(); }
 
   class ChunkCollector {
     CorrectedRead &cread_;
@@ -716,17 +682,16 @@ class CorrectedRead {
       k[1] = kmer_data_[kmer_data_[!seq].changeto];
       k[1].kmer = !k[1].kmer;
 
-      if (k[0].qual > k[1].qual)
-        std::swap(k[0], k[1]);
+      if (k[0].qual > k[1].qual) std::swap(k[0], k[1]);
       using namespace hammer;
       for (size_t i = 0; i < 2; ++i) {
         auto &kmer = k[i].kmer;
         int end_diff;
-        auto dist = distanceHKMer(kmer.begin(), kmer.end(), seq.begin(), seq.end(), 3, &end_diff);
+        auto dist = distanceHKMer(kmer.begin(), kmer.end(), seq.begin(),
+                                  seq.end(), 3, &end_diff);
         if (debug_mode_) {
-          std::cerr << "[GetCenterOfCluster] distance("
-                    << seq << ", " << kmer << ") = " << dist << std::endl;
-
+          std::cerr << "[GetCenterOfCluster] distance(" << seq << ", " << kmer
+                    << ") = " << dist << std::endl;
         }
         if (dist <= 2) {
           return Center{kmer, start_pos + int(hammer::K) + end_diff};
@@ -736,8 +701,7 @@ class CorrectedRead {
     }
 
     bool IsInconsistent(const Center &center) const {
-      if (!last_good_center_is_defined)
-        return false;
+      if (!last_good_center_is_defined) return false;
 
       for (size_t i = 0; i < hammer::K - skipped - 1; ++i)
         if (last_good_center.seq[i + skipped + 1].nucl != center.seq[i].nucl)
@@ -750,17 +714,16 @@ class CorrectedRead {
       unsigned rollback_end = 0;
 
       if (replacing) {
-        if (rollback_size < 0)
-          rollback_size = 0;
+        if (rollback_size < 0) rollback_size = 0;
         if (rollback_size < int(scores.size()))
-            rollback_end = int(scores.size()) - rollback_size;
+          rollback_end = int(scores.size()) - rollback_size;
         replacing = false;
         rollback_size = 0;
       }
 
       if (scores.size() > hammer::K) {
-        cread_.PushChunk(scores, raw_chunk_start_pos,
-                         approx_read_offset, approx_end_read_offset, rollback_end);
+        cread_.PushChunk(scores, raw_chunk_start_pos, approx_read_offset,
+                         approx_end_read_offset, rollback_end);
         pos = cread_.LastChunk().approx_end_read_offset_untrimmed() - hammer::K;
         pos += skipped;
       } else {
@@ -779,12 +742,12 @@ class CorrectedRead {
 
     // side effect: changes chunk_pos, pos, and approx_n_insertions
     bool TryToAlignCurrentCenter(const Center &center) {
-      if (!last_good_center_is_defined)
-        return true;
+      if (!last_good_center_is_defined) return true;
 
       if (debug_mode_) {
         std::cerr << "[TryToAlignCurrentCenter] " << center.seq.str()
-                  << " (previous good center is " << last_good_center.seq.str() << ","
+                  << " (previous good center is " << last_good_center.seq.str()
+                  << ","
                   << " skipped " << skipped << " centers)" << std::endl;
       }
 
@@ -793,15 +756,15 @@ class CorrectedRead {
       int offset;
       bool aligned = exactAlignH(last_good_center.seq.begin(),
                                  last_good_center.seq.begin() + skipped + 1,
-                                 last_good_center.seq.end(),
-                                 center.seq.begin(), center.seq.end(), 3, 8, &offset);
+                                 last_good_center.seq.end(), center.seq.begin(),
+                                 center.seq.end(), 3, 8, &offset);
 
       bool result = aligned && chunk_pos + offset >= 0;
       if (result) {
         if (debug_mode_)
-          std::cerr << "[TryToAlignCurrentCenter] offset = " << offset << std::endl;
-        if (offset < 0)
-          approx_n_insertions -= offset;
+          std::cerr << "[TryToAlignCurrentCenter] offset = " << offset
+                    << std::endl;
+        if (offset < 0) approx_n_insertions -= offset;
         pos += offset;
         chunk_pos += offset;
       }
@@ -820,12 +783,12 @@ class CorrectedRead {
       auto k = kmer_data_[center.seq];
 
       for (size_t i = 0; i < hammer::K; ++i)
-        scores[chunk_pos + i](center.seq[i].nucl, center.seq[i].len) += double(k.count) * (1.0 - k.qual);
+        scores[chunk_pos + i](center.seq[i].nucl, center.seq[i].len) +=
+            double(k.count) * (1.0 - k.qual);
 
       last_good_center = center;
       last_good_center_is_defined = true;
-      if (raw_chunk_start_pos == -1)
-        raw_chunk_start_pos = raw_pos;
+      if (raw_chunk_start_pos == -1) raw_chunk_start_pos = raw_pos;
       approx_end_read_offset = center.end_offset;
       if (debug_mode_) {
         std::cerr << "e.o. = " << approx_end_read_offset << std::endl;
@@ -834,20 +797,27 @@ class CorrectedRead {
       skipped = 0;
     }
 
-  public:
-    ChunkCollector(const io::SingleRead& r, CorrectedRead &cread,
-                   const KMerData &kmer_data, bool debug_mode) :
-       cread_(cread), kmer_data_(kmer_data), debug_mode_(debug_mode),
-      gen(r), pos(int(gen.trimmed_left())), skipped(0),
-      last_good_center(), last_good_center_is_defined(false),
-      is_first_center(true),
-      replacing(false), rollback_size(0),
-      need_to_align(false),
-      approx_read_offset(0), approx_end_read_offset(0),
-      scores(), chunk_pos(0),
-      raw_chunk_start_pos(-1),
-      approx_n_insertions(0)
-    {
+   public:
+    ChunkCollector(const io::SingleRead &r, CorrectedRead &cread,
+                   const KMerData &kmer_data, bool debug_mode)
+        : cread_(cread),
+          kmer_data_(kmer_data),
+          debug_mode_(debug_mode),
+          gen(r),
+          pos(int(gen.trimmed_left())),
+          skipped(0),
+          last_good_center(),
+          last_good_center_is_defined(false),
+          is_first_center(true),
+          replacing(false),
+          rollback_size(0),
+          need_to_align(false),
+          approx_read_offset(0),
+          approx_end_read_offset(0),
+          scores(),
+          chunk_pos(0),
+          raw_chunk_start_pos(-1),
+          approx_n_insertions(0) {
       --pos;
       --chunk_pos;
     }
@@ -858,7 +828,7 @@ class CorrectedRead {
       raw_pos = int(gen.trimmed_left()) - 1;
 
       if (debug_mode_) {
-          std::cerr << "gen. trimmed = " << gen.trimmed_left() << std::endl;
+        std::cerr << "gen. trimmed = " << gen.trimmed_left() << std::endl;
       }
 
       while (gen.HasMore()) {
@@ -869,9 +839,10 @@ class CorrectedRead {
         ++raw_pos;
         if (debug_mode_) {
           std::cerr << "=================================" << std::endl;
-          std::cerr << "pos = " << pos << ", raw_pos = " << raw_pos <<
-            ", last_good_center_is_defined = " << last_good_center_is_defined <<
-            ", skipped = " << skipped << std::endl;
+          std::cerr << "pos = " << pos << ", raw_pos = " << raw_pos
+                    << ", last_good_center_is_defined = "
+                    << last_good_center_is_defined << ", skipped = " << skipped
+                    << std::endl;
         }
         ++chunk_pos;
 
@@ -884,21 +855,25 @@ class CorrectedRead {
           qual = kmer_data_[center.seq].qual;
         }
 
-        if (qual > lowQualThreshold && last_good_center_is_defined && skipped == 0) {
+        if (qual > lowQualThreshold && last_good_center_is_defined &&
+            skipped == 0) {
           if (debug_mode_) {
-            std::cerr << "raw_pos + hammer::K = " << raw_pos + hammer::K << std::endl;
-            std::cerr << "last_good_center.end_offset + 1 = " <<  last_good_center.end_offset + 1 << std::endl;
+            std::cerr << "raw_pos + hammer::K = " << raw_pos + hammer::K
+                      << std::endl;
+            std::cerr << "last_good_center.end_offset + 1 = "
+                      << last_good_center.end_offset + 1 << std::endl;
           }
           // Finding a center by means of clustering failed.
           // Let's try the following: take last good center and make a new one
-          // from it by appending next homopolymer run; if its quality is high, we use it.
+          // from it by appending next homopolymer run; if its quality is high,
+          // we use it.
           if (raw_pos + hammer::K < last_good_center.end_offset + 1) {
             --pos;
             --chunk_pos;
             if (debug_mode_) {
               std::cerr << "skipping low-quality hk-mer" << std::endl;
             }
-            continue; // move to next hk-mer
+            continue;  // move to next hk-mer
           } else if (raw_pos + hammer::K == last_good_center.end_offset + 1) {
             auto seq_corr = last_good_center.seq;
             for (size_t i = 0; i < hammer::K - 1; ++i)
@@ -907,7 +882,8 @@ class CorrectedRead {
             center = Center{seq_corr, last_good_center.end_offset + 1};
             qual = kmer_data_[center.seq].qual;
             if (debug_mode_) {
-              std::cerr << "seq_corr = " << seq_corr.str() << " , qual = " << qual << std::endl;
+              std::cerr << "seq_corr = " << seq_corr.str()
+                        << " , qual = " << qual << std::endl;
             }
 
             if (qual > lowQualThreshold && can_be_changed) {
@@ -922,9 +898,9 @@ class CorrectedRead {
         bool inconsistent = IsInconsistent(center);
 
         if (debug_mode_ && !low_qual && seq != center.seq) {
-          std::cerr << "replaced " << seq.str()
-                    << " (quality " << kmer_data_[seq].qual
-                    << ", count " << kmer_data_[seq].count << ")"
+          std::cerr << "replaced " << seq.str() << " (quality "
+                    << kmer_data_[seq].qual << ", count "
+                    << kmer_data_[seq].count << ")"
                     << " with " << center.seq.str() << std::endl;
         }
 
@@ -954,7 +930,8 @@ class CorrectedRead {
           }
 
           if (debug_mode_) {
-              std::cerr << "[include into consensus] raw_pos = " << raw_pos << std::endl;
+            std::cerr << "[include into consensus] raw_pos = " << raw_pos
+                      << std::endl;
           }
           IncludeIntoConsensus(center);
         }
@@ -964,34 +941,29 @@ class CorrectedRead {
     }
   };
 
-  void CollectChunks(const io::SingleRead& r) {
+  void CollectChunks(const io::SingleRead &r) {
     ChunkCollector chunk_collector(r, *this, kmer_data_, debug_mode_);
     chunk_collector.Run();
   }
 
  public:
-  CorrectedRead(const io::SingleRead& read, const KMerData& kmer_data,
-                bool debug_mode = false) :
-    raw_read_(read),
-    kmer_data_(kmer_data),
-    debug_mode_(debug_mode)
-  {
+  CorrectedRead(const io::SingleRead &read, const KMerData &kmer_data,
+                bool debug_mode = false)
+      : raw_read_(read), kmer_data_(kmer_data), debug_mode_(debug_mode) {
     CollectChunks(read);
   }
 
   void MergeChunks() {
-    if (chunks_.empty())
-      return;
+    if (chunks_.empty()) return;
 
     auto iter = chunks_.begin();
-    ConsensusChunk& merged = *iter;
+    ConsensusChunk &merged = *iter;
 
     if (debug_mode_) {
       if (chunks_.size() == 1) {
         iter->AlignLeftEndAgainstRead();
         for (int i = 0; i < iter->approx_read_offset; ++i)
-          for (int j = 0; j < raw_read_[i].len; ++j)
-            std::cerr << ' ';
+          for (int j = 0; j < raw_read_[i].len; ++j) std::cerr << ' ';
         for (size_t i = 0; i < iter->consensus.size(); ++i)
           std::cerr << iter->consensus[i].str();
         std::cerr << std::endl;
@@ -1000,8 +972,7 @@ class CorrectedRead {
 
     ++iter;
     while (iter != chunks_.end()) {
-      if (iter->consensus.size() > hammer::K)
-        merged.TryMergeWith(*iter);
+      if (iter->consensus.size() > hammer::K) merged.TryMergeWith(*iter);
       iter = chunks_.erase(iter);
     }
 
@@ -1010,16 +981,16 @@ class CorrectedRead {
 
   void AttachUncorrectedRuns() {
     // attach runs from the right
-    const auto& data = raw_read_.data();
+    const auto &data = raw_read_.data();
     int n_raw = int(raw_read_.size());
     int end_read_offset = LastChunk().approx_end_read_offset();
     if (end_read_offset < n_raw && end_read_offset >= 0) {
       corrected_runs_.insert(corrected_runs_.end(),
-                             data.begin() + end_read_offset,
-                             data.end());
+                             data.begin() + end_read_offset, data.end());
     }
     if (debug_mode_) {
-      std::cerr << "n_raw = " << n_raw << ", end_read_offset = " << end_read_offset << std::endl;
+      std::cerr << "n_raw = " << n_raw
+                << ", end_read_offset = " << end_read_offset << std::endl;
     }
 
     // attach runs from the left
@@ -1033,16 +1004,14 @@ class CorrectedRead {
   }
 
   std::string GetSequenceString() const {
-    if (chunks_.empty() && corrected_runs_.empty())
-      return "";
+    if (chunks_.empty() && corrected_runs_.empty()) return "";
     std::string res;
     if (!corrected_runs_.empty()) {
       for (auto it = corrected_runs_.begin(); it != corrected_runs_.end(); ++it)
         res += it->str();
     } else {
-      auto& runs = chunks_.front().consensus;
-      for (auto it = runs.begin(); it != runs.end(); ++it)
-        res += it->str();
+      auto &runs = chunks_.front().consensus;
+      for (auto it = runs.begin(); it != runs.end(); ++it) res += it->str();
     }
     return res;
   }
@@ -1052,7 +1021,6 @@ class SingleReadCorrector {
   const KMerData &kmer_data_;
 
  public:
-
   struct ReadSelectionPredicate {
     virtual bool operator()(const io::SingleRead &read) = 0;
   };
@@ -1060,76 +1028,70 @@ class SingleReadCorrector {
   struct DebugOutputPredicate : public ReadSelectionPredicate {};
 
   struct NoDebug : public DebugOutputPredicate {
-    virtual bool operator()(const io::SingleRead &) {
-      return false;
-    }
+    virtual bool operator()(const io::SingleRead &) { return false; }
   };
 
   struct FullDebug : public DebugOutputPredicate {
-    virtual bool operator()(const io::SingleRead &) {
-      return true;
-    }
+    virtual bool operator()(const io::SingleRead &) { return true; }
   };
 
   class DebugIfContains : public DebugOutputPredicate {
     Sequence needle_;
     Sequence needle_rc_;
-  public:
-    DebugIfContains(const Sequence &seq) :
-      needle_(seq), needle_rc_(!seq) {}
+
+   public:
+    DebugIfContains(const Sequence &seq) : needle_(seq), needle_rc_(!seq) {}
 
     virtual bool operator()(const io::SingleRead &read) {
       auto read_seq = read.sequence();
-      if (read_seq.size() < needle_.size())
-          return false;
-      if (read_seq.find(needle_, 0) != -1ULL)
-        return true;
-      if (read_seq.find(needle_rc_, 0) != -1ULL)
-        return true;
+      if (read_seq.size() < needle_.size()) return false;
+      if (read_seq.find(needle_, 0) != -1ULL) return true;
+      if (read_seq.find(needle_rc_, 0) != -1ULL) return true;
       return false;
     }
   };
 
   struct SelectPredicate : public ReadSelectionPredicate {};
   struct SelectAll : public SelectPredicate {
-    virtual bool operator()(const io::SingleRead &) {
-      return true;
-    }
+    virtual bool operator()(const io::SingleRead &) { return true; }
   };
 
   class SelectByName : public SelectPredicate {
     std::set<std::string> names_;
-  public:
-    SelectByName(const std::set<std::string>& names) :
-      names_(names) {}
+
+   public:
+    SelectByName(const std::set<std::string> &names) : names_(names) {}
     virtual bool operator()(const io::SingleRead &r) {
       return names_.find(r.name()) != names_.end();
     }
   };
 
-private:
-  BamTools::SamHeader* sam_header_;
+ private:
+  BamTools::SamHeader *sam_header_;
   DebugOutputPredicate &debug_pred_;
   SelectPredicate &select_pred_;
 
-public:
+ public:
   SingleReadCorrector(const KMerData &kmer_data,
                       BamTools::SamHeader *sam_header,
-                      DebugOutputPredicate &debug,
-                      SelectPredicate &select) :
-    kmer_data_(kmer_data), sam_header_(sam_header),
-    debug_pred_(debug), select_pred_(select) {}
+                      DebugOutputPredicate &debug, SelectPredicate &select)
+      : kmer_data_(kmer_data),
+        sam_header_(sam_header),
+        debug_pred_(debug),
+        select_pred_(select) {}
 
-  SingleReadCorrector(const KMerData &kmer_data,
-                      DebugOutputPredicate &debug,
-                      SelectPredicate &select) :
-    kmer_data_(kmer_data), sam_header_(NULL),
-    debug_pred_(debug), select_pred_(select) {}
+  SingleReadCorrector(const KMerData &kmer_data, DebugOutputPredicate &debug,
+                      SelectPredicate &select)
+      : kmer_data_(kmer_data),
+        sam_header_(NULL),
+        debug_pred_(debug),
+        select_pred_(select) {}
 
-  std::unique_ptr<io::SingleRead> operator()(std::unique_ptr<io::SingleRead> r) {
+  std::unique_ptr<io::SingleRead> operator()(
+      std::unique_ptr<io::SingleRead> r) {
     return operator()(*r);
   }
-  
+
   std::unique_ptr<io::SingleRead> operator()(const io::SingleRead &r) {
     if (!select_pred_(r)) return nullptr;
     bool debug_mode = debug_pred_(r);
@@ -1142,32 +1104,28 @@ public:
 
     CorrectedRead read(r, kmer_data_, debug_mode);
     read.MergeChunks();
-    if (cfg::get().keep_uncorrected_ends)
-      read.AttachUncorrectedRuns();
+    if (cfg::get().keep_uncorrected_ends) read.AttachUncorrectedRuns();
 
     if (debug_mode) {
       std::cerr << "final result: " << read.GetSequenceString() << std::endl;
     }
 
     auto seq = read.GetSequenceString();
-    if (seq.empty())
-      return nullptr;
+    if (seq.empty()) return nullptr;
 
     return std::unique_ptr<io::SingleRead>(new io::SingleRead(r.name(), seq));
   }
 
-  std::unique_ptr<io::BamRead>
-  operator()(std::unique_ptr<BamTools::BamAlignment> alignment) {
+  std::unique_ptr<io::BamRead> operator()(
+      std::unique_ptr<BamTools::BamAlignment> alignment) {
     VERIFY(sam_header_);
     io::SingleRead r(alignment->Name, alignment->QueryBases);
     // reverse strand means we're working with a mapped BAM, might be
     // the case for datasets downloaded from IonCommunity
-    if (alignment->IsReverseStrand())
-      r = !r;
+    if (alignment->IsReverseStrand()) r = !r;
     auto corrected_r = operator()(r);
     std::string rg;
-    if (!alignment->GetTag("RG", rg) || !corrected_r)
-      return nullptr;
+    if (!alignment->GetTag("RG", rg) || !corrected_r) return nullptr;
     auto flow_order = sam_header_->ReadGroups[rg].FlowOrder;
 
     float delta_score, fit_score;
@@ -1176,17 +1134,26 @@ public:
       std::reverse(seq.begin(), seq.end());
       for (auto it = seq.begin(); it != seq.end(); ++it) {
         switch (*it) {
-        case 'A': *it = 'T'; break;
-        case 'C': *it = 'G'; break;
-        case 'G': *it = 'C'; break;
-        case 'T': *it = 'A'; break;
-        default: break;
+          case 'A':
+            *it = 'T';
+            break;
+          case 'C':
+            *it = 'G';
+            break;
+          case 'G':
+            *it = 'C';
+            break;
+          case 'T':
+            *it = 'A';
+            break;
+          default:
+            break;
         }
       }
     }
 
-    BaseHypothesisEvaluator(*alignment, flow_order, seq,
-                            delta_score, fit_score, 0);
+    BaseHypothesisEvaluator(*alignment, flow_order, seq, delta_score, fit_score,
+                            0);
     std::stringstream ss;
     ss << alignment->Name << "_" << delta_score << "_" << fit_score;
     alignment->Name = ss.str();
@@ -1201,22 +1168,22 @@ public:
 
 class PairedReadCorrector : public SingleReadCorrector {
  public:
-  PairedReadCorrector(const KMerData &kmer_data,
-                      DebugOutputPredicate &debug,
+  PairedReadCorrector(const KMerData &kmer_data, DebugOutputPredicate &debug,
                       SelectPredicate &select)
-    : SingleReadCorrector(kmer_data, debug, select) {}
+      : SingleReadCorrector(kmer_data, debug, select) {}
 
-  std::unique_ptr<io::PairedRead> operator()(std::unique_ptr<io::PairedRead> r) {
+  std::unique_ptr<io::PairedRead> operator()(
+      std::unique_ptr<io::PairedRead> r) {
     auto corrected_r = SingleReadCorrector::operator()(r->first());
     auto corrected_l = SingleReadCorrector::operator()(r->second());
 
-    if (!corrected_r || !corrected_l)
-      return nullptr;
+    if (!corrected_r || !corrected_l) return nullptr;
 
-    return std::unique_ptr<io::PairedRead>(new io::PairedRead(*corrected_r, *corrected_l, 0));
+    return std::unique_ptr<io::PairedRead>(
+        new io::PairedRead(*corrected_r, *corrected_l, 0));
   }
 };
 
-}; // namespace correction
-}; // namespace hammer
-#endif // __HAMMER_IT_READ_CORRECTOR_HPP__
+};      // namespace correction
+};      // namespace hammer
+#endif  // __HAMMER_IT_READ_CORRECTOR_HPP__
diff --git a/src/projects/ionhammer/read_corrector_new.hpp b/src/projects/ionhammer/read_corrector_new.hpp
new file mode 100644
index 0000000..e1f706b
--- /dev/null
+++ b/src/projects/ionhammer/read_corrector_new.hpp
@@ -0,0 +1,252 @@
+//***************************************************************************
+//* Copyright (c) 2015 Saint Petersburg State University
+//* Copyright (c) 2011-2014 Saint Petersburg Academic University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
+
+#ifndef __HAMMER_IT_READ_CORRECTOR_HPP__
+#define __HAMMER_IT_READ_CORRECTOR_HPP__
+
+#include "HSeq.hpp"
+#include "config_struct.hpp"
+#include "consensus.hpp"
+#include "flow_space_read.hpp"
+#include "hkmer_distance.hpp"
+#include "valid_hkmer_generator.hpp"
+
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/storage.hpp>
+#include <boost/optional.hpp>
+
+#include <bamtools/api/BamAlignment.h>
+#include <bamtools/api/SamHeader.h>
+#include "seqeval/BaseHypothesisEvaluator.h"
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <string>
+#include <vector>
+
+#if 1
+#include <iomanip>
+#include <iostream>
+#endif
+
+#include "read_corrector_structs_new.h"
+
+namespace hammer {
+namespace correction {
+
+template <class CorrectionsLikelihoodCalcer>
+class ReadCorrector {
+ public:
+  using PenaltyCalcer = CorrectionsLikelihoodCalcer;
+ private:
+  using State = CorrectionState<typename PenaltyCalcer::PenaltyState>;
+  const KMerData& data;
+  using PenaltyCalcerFactory = typename CorrectionsLikelihoodCalcer::PenaltyCalcerFactory;
+  const PenaltyCalcerFactory& penalty_calcer_factory;
+
+  mutable size_t skipped_reads = 0;
+  mutable size_t queue_overflow_reads = 0;
+
+  inline bool Flush(std::priority_queue<State>& candidates,
+                    std::priority_queue<State>& corrections,
+                    size_t limit,
+                    size_t readSize) const {
+
+    if (corrections.size() > limit) {
+      auto top = pop_queue(candidates);
+      if (!std::isinf(top.Penalty())) {
+        corrections.emplace(std::move(top));
+      }
+      std::priority_queue<State>().swap(candidates);
+      return true;
+    } else {
+      while (!candidates.empty()) {
+        auto top = pop_queue(candidates);
+        if (top.TotalCorrections() > std::max(readSize / 10, (size_t)3)) {
+          continue;
+        }
+        if (!std::isinf(top.Penalty())) {
+          corrections.emplace(std::move(top));
+        }
+      }
+      return false;
+    }
+  }
+
+  std::string CorrectRight(const PenaltyCalcer& penalty_calcer,
+                           const std::string& read,
+                           const size_t offset,
+                           bool reverse,
+                           bool& is_too_many_corrections,
+                           bool make_only_simple_corrections = false) const {
+    if (offset >= read.size()) {
+      return read;
+    }
+
+    std::priority_queue<State> corrections;
+    std::priority_queue<State> candidates;
+
+    CorrectionContext context(data, read, reverse);
+    {
+      corrections.emplace(StateBuilder<PenaltyCalcer>::Initial(
+          context, penalty_calcer, (uint)offset));
+    }
+
+    std::map<uint, std::set<size_t> > visited;
+    const size_t queue_limit =  (const size_t)(cfg::get().queue_limit_multiplier * log2(read.size() - offset + 1));//(const size_t)(100 * read.size());
+
+    bool queue_overflow = false;
+
+    while (!corrections.empty()) {
+
+      auto state = std::pop_queue(corrections);
+      assert(state.Position() <= read.size());
+
+      {
+        size_t hash = state.GetHKMer().GetHash();
+        if (visited[state.Position()].count(hash) && corrections.size()) {
+          continue;
+        }
+        visited[state.Position()].insert(hash);
+      }
+
+      if (state.Position() < read.size()) {
+        MoveToNextDivergence<PenaltyCalcer> mover(state,
+                                                  context,
+                                                  penalty_calcer);
+        if (mover.FindNextDivergence()) {
+          mover.Move();
+        }
+      }
+
+      if (state.Position() == read.size()) {
+        return state.Read()->ToString();
+      }
+
+      //      //don't correct last kmer
+      if ((state.Position() + context.GetHRun(state.Position()).len) ==
+          read.size()) {
+        auto result = state.Read()->ToString();
+        result += (context.GetHRun(state.Position()).str());
+        return result;
+      }
+
+      {
+        SkipMayBeBadHRun<PenaltyCalcer> skipHRun(state,
+                                                 context,
+                                                 penalty_calcer);
+        candidates.emplace(skipHRun.State());
+      }
+
+      {
+        CorrectLastHRun<PenaltyCalcer> hrun_corrector(state,
+                                                      context,
+                                                      penalty_calcer);
+        if (make_only_simple_corrections) {
+          hrun_corrector.AddOnlySimpleCorrections(candidates);
+        } else {
+          hrun_corrector.AddPossibleCorrections(candidates);
+        }
+        queue_overflow |= Flush(candidates, corrections, queue_limit, read.size());
+      }
+    }
+    is_too_many_corrections = queue_overflow;
+
+    return read;
+  }
+
+ public:
+
+  ReadCorrector(const KMerData& kmer_data,
+                 const PenaltyCalcerFactory& factory)
+      : data(kmer_data)
+      , penalty_calcer_factory(factory) {}
+
+  ~ReadCorrector() {
+    INFO("Skipped reads count: " << skipped_reads);
+    if (queue_overflow_reads) {
+      WARN("Too many possible corrections in some reads (" << queue_overflow_reads << "), something may be wrong");
+    }
+  }
+
+  std::string Correct(const io::SingleRead& read,
+                      bool keep_uncorrected_ends = true,
+                      bool debug = false,
+                      uint simple_passes_count = 0,
+                      uint complex_passes_count = 1) const {
+
+    std::string current_read = read.GetSequenceString();
+
+    PenaltyCalcer penalty_calcer = penalty_calcer_factory(current_read);
+
+    bool overflow = false;
+
+    for (uint pass = 0; pass < 2 * (simple_passes_count + complex_passes_count); ++pass) {
+      const bool reverse = pass % 2 == 0;  // tail has more errors, so let's start with "simple" part
+      const bool only_simple = pass < 2 * simple_passes_count;
+      if (reverse) {
+        current_read = ReverseComplement(current_read);
+      }
+      const auto solid_island = penalty_calcer.SolidIsland(current_read);
+      const size_t solid_length = solid_island.right_ - solid_island.left_;
+
+      if (debug) {
+#pragma omp critical
+        {
+          std::cerr << "Solid length: " << solid_length << " / "
+                    << current_read.size() << std::endl;
+          std::cerr << "Position: " << solid_island.left_ << " / "
+                    << solid_island.right_ << std::endl;
+        }
+      }
+
+      if (solid_length == 0 || solid_length == current_read.size()) {
+        if (pass == 0) {
+          if (solid_length == 0) {
+#pragma omp atomic
+            skipped_reads++;
+          }
+        }
+
+        break;
+      }
+
+      bool pass_overflow = false;
+      current_read = CorrectRight(penalty_calcer,
+                                  current_read,
+                                  solid_island.right_,
+                                  reverse,
+                                  overflow,
+                                  only_simple);
+
+      overflow  |= pass_overflow;
+
+      if (reverse) {
+        current_read = ReverseComplement(current_read);
+      }
+    }
+
+    if (overflow) {
+        #pragma omp atomic
+        queue_overflow_reads++;
+    }
+
+    if (!keep_uncorrected_ends) {
+      return penalty_calcer.TrimBadQuality(current_read);
+    }
+    return current_read;
+  }
+};
+
+};      // namespace correction
+};      // namespace hammer
+#endif  // __HAMMER_IT_READ_CORRECTOR_HPP__
diff --git a/src/projects/ionhammer/read_corrector_structs_new.h b/src/projects/ionhammer/read_corrector_structs_new.h
new file mode 100644
index 0000000..05ef303
--- /dev/null
+++ b/src/projects/ionhammer/read_corrector_structs_new.h
@@ -0,0 +1,740 @@
+//
+// Created by Vasily Ershov on 19.03.16.
+//
+
+#ifndef PROJECT_READ_CORRECTOR_INFO_H
+#define PROJECT_READ_CORRECTOR_INFO_H
+
+#include <functional>
+#include <queue>
+#include "hkmer.hpp"
+
+namespace hammer {
+namespace correction {
+
+namespace numeric = boost::numeric::ublas;
+using HRun = HomopolymerRun;
+
+template <class Moveable>
+inline Moveable pop_queue(std::priority_queue<Moveable>& queue) {
+  Moveable result(std::move(const_cast<Moveable&&>(queue.top())));
+  queue.pop();
+  return result;
+}
+
+struct IonEvent {
+  
+  IonEvent(const char nucl = 0, const char observed_size = 0,
+            const char fixed_size = 0, const bool to_good_correction = false)
+      : nucl_(nucl),
+        overserved_size_(observed_size),
+        fixed_size_(fixed_size),
+        is_to_good_correction_(to_good_correction) {}
+
+  IonEvent(const IonEvent& other) = default;
+
+  char nucl_;
+  char overserved_size_;
+  char fixed_size_;
+  bool is_to_good_correction_;
+
+  inline HRun FixedHRun() const {
+    return HRun((uint8_t)nucl_, (uint8_t)fixed_size_);
+  }
+
+  inline HRun ObservedHRun() const {
+    return HRun((uint8_t)nucl_, (uint8_t)overserved_size_);
+  }
+};
+
+class CorrectedRead {
+private:
+  std::vector<HRun> runs_;
+  std::shared_ptr<CorrectedRead> previous_;
+
+ public:
+  CorrectedRead() : previous_(nullptr) {}
+
+  CorrectedRead(std::shared_ptr<CorrectedRead> previous)
+      : previous_(previous) {}
+
+  CorrectedRead(std::vector<HRun>&& runs,
+                 std::shared_ptr<CorrectedRead> previous)
+      : runs_(std::move(runs)), previous_(previous) {}
+
+  inline void Add(const HRun hrun) { runs_.push_back(hrun); }
+
+  size_t Size() const {
+    size_t size = previous_ != nullptr ? previous_->Size() : 0;
+    for (auto hrun : runs_) {
+      size += hrun.len;
+    }
+    return size;
+  }
+
+  inline void Fill(std::string& result) const {
+    if (previous_ != nullptr) {
+      previous_->Fill(result);
+    }
+
+    for (auto hrun : runs_) {
+      result += hrun.str();
+    }
+  }
+
+  inline std::string ToString() const {
+    std::string result;
+    result.reserve(Size() + 10);
+    Fill(result);
+    return result;
+  }
+};
+
+template <class PenaltyState>
+class CorrectionState {
+  template <class>
+  friend class MoveToNextDivergence;
+  template <class>
+  friend class StateBuilder;
+
+ private:
+  PenaltyState penalty_state;
+  HKMer kmer_;
+  std::shared_ptr<CorrectedRead> current_read_ = std::shared_ptr<CorrectedRead>(nullptr);
+  int16_t cursor_ = 0;
+  int16_t corrections_ = 0;
+
+ public:
+  const HKMer& GetHKMer() const { return kmer_; }
+
+  inline double Penalty() const { return penalty_state.Penalty(); }
+
+  inline size_t TotalCorrections() const { return (size_t)corrections_; }
+
+  const CorrectedRead* Read() const { return current_read_.get(); }
+
+  unsigned Position() const { return (unsigned)cursor_; }
+};
+
+class CorrectionContext {
+ private:
+  std::vector<char> read_;
+  std::vector<uint8_t> hrun_sizes_;
+  const KMerData& data_;
+  bool reversed_;
+
+  inline void FillHRunSizes(const std::vector<char>& read,
+                            std::vector<uint8_t>& hrun_sizes) const {
+    size_t offset = 0;
+    hrun_sizes.resize(read.size());
+
+    while (offset < read.size()) {
+      size_t cursor = offset;
+      while (cursor < read.size() && read[cursor] == read[offset]) {
+        ++cursor;
+      };
+      uint8_t sz = (uint8_t)(cursor - offset);
+      while (sz > 0) {
+        hrun_sizes[offset++] = sz;
+        --sz;
+      }
+    }
+  }
+
+ public:
+  CorrectionContext(const KMerData& data, const std::string& read,
+                     bool reverse)
+      : data_(data)
+      , reversed_(reverse) {
+    read_.resize(read.size());
+    for (size_t i = 0; i < read.size(); ++i) {
+      read_[i] = dignucl(read[i]);
+    }
+
+    FillHRunSizes(read_, hrun_sizes_);
+  }
+
+  inline const std::vector<char>& GetRead() const { return read_; }
+
+  inline size_t GetOriginalOffset(const size_t offset) const {
+    if (reversed_) {
+      return read_.size() - offset;
+    }
+    return offset;
+  }
+
+  inline bool IsReversed() const { return reversed_; }
+
+  inline HRun GetHRun(size_t offset) const {
+    return HRun((uint8_t)read_[offset], (uint8_t)hrun_sizes_[offset]);
+  }
+
+  inline KMerStat const* TryGetKMerStats(const HKMer& kmer) const {
+    auto idx = data_.checking_seq_idx(kmer);
+    return idx == -1ULL ? nullptr : &data_[kmer];
+  }
+
+  inline bool Skip(const HKMer& kmer) const {
+    auto stat = TryGetKMerStats(kmer);
+    return stat != nullptr ? stat->skip() : false;
+  }
+};
+
+//
+template <class PenaltyCalcer>
+class StateBuilder {
+  using State = CorrectionState<typename PenaltyCalcer::PenaltyState>;
+  const State& previous_;
+  const PenaltyCalcer& penalty_calcer_;
+  const CorrectionContext& context_;
+  State next_;
+
+ public:
+  StateBuilder(const State& previous,
+               const PenaltyCalcer& penalty_calcer,
+                const CorrectionContext& context)
+      : previous_(previous),
+        penalty_calcer_(penalty_calcer),
+        context_(context),
+        next_() {
+    next_.current_read_.reset(new CorrectedRead(previous_.current_read_));
+    next_.kmer_ = previous_.kmer_;
+    next_.penalty_state = previous_.penalty_state;
+    next_.cursor_ = previous_.cursor_;
+    next_.corrections_ = previous_.corrections_;
+  }
+
+  inline void AddEvent(const IonEvent& event) {
+    if (event.fixed_size_ != 0) {
+      const HRun run = event.FixedHRun();
+      next_.kmer_ <<= run;
+      next_.current_read_->Add(run);
+    }
+
+    next_.cursor_ = (int16_t)(next_.cursor_ + event.overserved_size_);
+    penalty_calcer_.Update(next_.penalty_state, event,
+                         context_.TryGetKMerStats(next_.kmer_));
+
+    if (event.fixed_size_ != event.overserved_size_) {
+      next_.corrections_++;
+    }
+  }
+
+  inline State Build() { return next_; }
+
+  static State Initial(const CorrectionContext& context,
+                       const PenaltyCalcer& penalty,
+                       unsigned skip) {
+    State state;
+    state.penalty_state = PenaltyCalcer::CreateState(
+        context.IsReversed(), (unsigned)context.GetRead().size());
+    state.current_read_.reset(new CorrectedRead());
+    size_t offset = 0;
+    size_t minSkip = 0;
+
+    for (unsigned i = 0; i < hammer::K; ++i) {
+      minSkip += context.GetHRun(minSkip).len;
+      if (minSkip >= context.GetRead().size()) {
+        break;
+      }
+    }
+
+    if (minSkip > skip) {
+      skip = (unsigned)minSkip;
+    }
+    state.cursor_ = (int16_t)skip;
+
+    while (offset < skip) {
+      HRun run = context.GetHRun(offset);
+      state.kmer_ <<= run;
+      state.current_read_->Add(context.GetHRun(offset));
+      penalty.UpdateInitial(state.penalty_state,
+                            IonEvent(run.nucl, run.len, run.len, true),
+                            context.TryGetKMerStats(state.kmer_));
+      offset += run.len;
+    }
+    return state;
+  }
+};
+
+template <class PenaltyCalcer>
+class MoveToNextDivergence {
+  using State = CorrectionState<typename PenaltyCalcer::PenaltyState>;
+  std::vector<IonEvent> Proceeded;
+  State& state_;
+  const CorrectionContext& context_;
+  const PenaltyCalcer& calcer_;
+  unsigned cursor_;
+
+ public:
+  MoveToNextDivergence(State& state,
+                       const CorrectionContext& context,
+                       const PenaltyCalcer& calcer)
+      : state_(state),
+        context_(context),
+        calcer_(calcer),
+        cursor_((unsigned)state.cursor_) {}
+
+  inline bool FindNextDivergence() {
+    const auto& context = context_;
+    const size_t readSize = context.GetRead().size();
+    HKMer currentHKMer = state_.kmer_;
+
+    while (cursor_ < readSize) {
+      const HRun hrun = context.GetHRun(cursor_);
+      currentHKMer <<= hrun;
+
+      if (calcer_.Skip(currentHKMer)) {
+        Proceeded.push_back({hrun.Nucl(), hrun.Len(), hrun.Len(), true});
+        cursor_ += hrun.len;
+      } else {
+        break;
+      }
+    }
+    return cursor_ != (unsigned)state_.cursor_;
+  }
+
+  // we'll use it only while we move in branch…
+  inline void Move() {
+    for (unsigned i = 0; i < Proceeded.size(); ++i) {
+      state_.current_read_->Add(Proceeded[i].FixedHRun());
+      state_.kmer_ <<= Proceeded[i].FixedHRun();
+      calcer_.Update(state_.penalty_state, Proceeded[i],
+                    context_.TryGetKMerStats(state_.kmer_));
+    }
+    state_.cursor_ = (int16_t)cursor_;
+  }
+};
+
+template <class PenaltyCalcer>
+class SkipMayBeBadHRun {
+private:
+  using TState = CorrectionState<typename PenaltyCalcer::PenaltyState>;
+  const TState& previous_;
+  const CorrectionContext& context_;
+  const PenaltyCalcer& calcer_;
+
+ public:
+  SkipMayBeBadHRun(const TState& previous,
+                   const CorrectionContext& context,
+                    const PenaltyCalcer& calcer)
+      : previous_(previous)
+        , context_(context)
+        , calcer_(calcer) {}
+
+  inline TState State() {
+    StateBuilder<PenaltyCalcer> nextBuilder(previous_, calcer_, context_);
+    const auto hrun = context_.GetHRun(previous_.Position());
+    nextBuilder.AddEvent(IonEvent(hrun.nucl, hrun.len, hrun.len, false));
+    return nextBuilder.Build();
+  }
+};
+
+class HRunSizeSearcher {
+ private:
+  HKMer hkmer_;
+  const uint8_t observed_nucl_;
+  const char observed_size_;
+  const std::function<bool(const hammer::HKMer&)>& is_good_func;
+
+ public:
+  HRunSizeSearcher(const HKMer& prev,
+                    HRun run,
+                    std::function<bool(const hammer::HKMer&)>& good)
+      : hkmer_(prev),
+        observed_nucl_(run.nucl),
+        observed_size_(run.len),
+        is_good_func(good) {
+    assert(hkmer_[K - 1].nucl != run.nucl);
+    hkmer_ <<= run;
+  }
+
+  inline IonEvent WithoutCorrection() {
+    hkmer_[K - 1].len = observed_size_ & 0x3F;
+    return IonEvent(observed_nucl_, observed_size_, observed_size_, is_good_func(hkmer_));
+  }
+
+  inline std::vector<IonEvent> TryFindInsertions(char max_error_size = 3,
+                                                  const bool greedy = true) {
+    std::vector<IonEvent> results;
+    results.reserve(max_error_size);
+
+    const char nucl = hkmer_[K - 1].nucl;
+    for (char i = 1; i <= max_error_size; ++i) {
+      hkmer_[K - 1].len = (observed_size_ + i) & 0x3F;
+      if (is_good_func(hkmer_)) {
+        results.push_back(
+            IonEvent(nucl, observed_size_, (uint8_t)(observed_size_ + i), true));
+        if (greedy) {
+          break;
+        }
+      }
+    }
+    return results;
+  }
+
+  inline std::vector<IonEvent> TryFindAllDeletions(const char max_error_size = 3,
+                                                    const bool greedy = true) {
+    std::vector<IonEvent> results;
+    results.reserve(max_error_size);
+
+    const char nucl = hkmer_[K - 1].nucl;
+
+    const char start = (const char)std::max(1, observed_size_ - max_error_size);
+
+    for (char i = (char)(observed_size_ - 1); i >= start; --i) {
+      hkmer_[K - 1].len = i & 0x3F;
+      if (is_good_func(hkmer_)) {
+        results.push_back(IonEvent(nucl, observed_size_, i, true));
+        if (greedy) {
+          break;
+        }
+      }
+    }
+    return results;
+  }
+
+  inline IonEvent TryFindInsertion(char max_error_size = 3) {
+    const char nucl = hkmer_[K - 1].nucl;
+    bool found = false;
+    for (char i = 1; i <= max_error_size; ++i) {
+      hkmer_[K - 1].len = (observed_size_ + i) & 0x3F;
+      if (is_good_func(hkmer_)) {
+        found = true;
+        break;
+      }
+    }
+    return IonEvent(nucl, observed_size_,
+                     (const char)(found ? hkmer_[K - 1].len : observed_size_ + 1),
+                     found);
+  }
+
+  inline IonEvent TryFindDeletion(const char max_error_size = 3) {
+    const char nucl = hkmer_[K - 1].nucl;
+    bool found = false;
+
+    const char start = (const char)std::max(1, observed_size_ - max_error_size);
+
+    for (char i = (char)(observed_size_ - 1); i >= start; --i) {
+      hkmer_[K - 1].len = i & 0x3F;
+      if (is_good_func(hkmer_)) {
+        found = true;
+        break;
+      }
+    }
+    return IonEvent(nucl, observed_size_,
+                     (const char)(found ? hkmer_[K - 1].len : observed_size_ - 1),
+                     found);
+  }
+
+  inline std::vector<IonEvent> Find(const char max_error_size = 3) {
+    std::vector<IonEvent> events;
+
+    IonEvent without = WithoutCorrection();
+    if (without.is_to_good_correction_) {
+      events.push_back(without);
+      return events;
+    }
+
+    IonEvent insertion = TryFindInsertion(max_error_size);
+    if (insertion.is_to_good_correction_) {
+      events.push_back(insertion);
+    }
+
+    IonEvent deletion = TryFindDeletion(max_error_size);
+    if (deletion.is_to_good_correction_) {
+      events.push_back(deletion);
+    }
+
+    return events;
+  }
+};
+
+template <class PenaltyCalcer>
+class CorrectLastHRun {
+  using TState = CorrectionState<typename PenaltyCalcer::PenaltyState>;
+  const TState& previous_;
+  const CorrectionContext& context_;
+  const PenaltyCalcer& calcer_;
+  std::function<bool(const hammer::HKMer&)> is_good_function_;
+
+  const unsigned kMaxFulldel = cfg::get().max_full_del;
+  const unsigned kMaxInDel = cfg::get().max_indel;
+  const unsigned kMaxFromZeroInsertion = cfg::get().max_from_zero_insertion;
+  const unsigned kMaxSecondIndel = cfg::get().max_second_indel;
+
+ private:
+  inline bool AddAnotherNuclInsertions(const HRun run,
+                                       const TState& previous,
+                                       std::priority_queue<TState>& corrections) {
+    bool found = false;
+    const auto& kmer = previous.GetHKMer();
+
+    for (uint8_t c = 0; c < 4; ++c) {
+      if (c == run.nucl || c == kmer[K - 1].nucl) {
+        continue;
+      }
+
+      HKMer another_nucl_insertion = kmer;
+      another_nucl_insertion <<= HRun(c, 1);
+
+      for (unsigned i = 1; i <= kMaxFromZeroInsertion; ++i) {
+        another_nucl_insertion[K - 1].len = i & 0x3F;
+        if (is_good_function_(another_nucl_insertion)) {
+          HRunSizeSearcher rest_searcher(another_nucl_insertion, run, is_good_function_);
+          auto events = rest_searcher.Find((const char)kMaxSecondIndel);
+          for (auto& event : events) {
+            if (event.is_to_good_correction_) {
+              StateBuilder<PenaltyCalcer> builder(previous, calcer_, context_);
+              builder.AddEvent(IonEvent(c, 0, (const char)i, true));  // new insertion
+              builder.AddEvent(event);
+              corrections.emplace(builder.Build());
+              found = true;
+            }
+          }
+          break;
+        }
+      }
+    }
+    return found;
+  }
+
+ public:
+  CorrectLastHRun(const TState& previous,
+                  const CorrectionContext& context,
+                   const PenaltyCalcer& calcer)
+      : previous_(previous),
+        context_(context),
+        calcer_(calcer),
+        is_good_function_(calcer_.Good()) {}
+
+  inline void AddOnlySimpleCorrections(std::priority_queue<TState>& corrections,
+                                       unsigned indel_size = 1) {
+    const unsigned cursor = previous_.Position();
+    const HRun run = context_.GetHRun(cursor);
+
+    if (!is_good_function_(previous_.GetHKMer())) {
+      return;
+    }
+
+    {
+      HRunSizeSearcher searcher(previous_.GetHKMer(), run, is_good_function_);
+      auto insertion = searcher.TryFindInsertion((char)indel_size);
+      if (insertion.is_to_good_correction_) {
+        StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+        builder.AddEvent(insertion);
+        corrections.emplace(builder.Build());
+      }
+
+      auto deletion = searcher.TryFindDeletion((const char)indel_size);
+      if (deletion.is_to_good_correction_) {
+        StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+        builder.AddEvent(deletion);
+        corrections.emplace(builder.Build());
+      }
+    }
+    //
+    if (run.len == 1 && (cursor + 1 < context_.GetRead().size())) {
+      auto nextRun = context_.GetHRun(cursor + 1);
+      {
+        for (char c = 0; c < 4; ++c) {
+          if (c == run.nucl || c == nextRun.nucl) {
+            continue;
+          }
+
+          HKMer kmer = previous_.GetHKMer();
+          kmer <<= HRun((uint8_t)c, 1);
+
+          if (is_good_function_(kmer)) {
+            StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+            builder.AddEvent(IonEvent(run.nucl, run.len, 0, true));
+            builder.AddEvent(IonEvent(c, 0, 1, true));
+            corrections.emplace(builder.Build());
+          }
+        }
+      }
+    } else if (run.len > 2) {
+      for (char c = 0; c < 4; ++c) {
+        if (c == run.nucl) {
+          continue;
+        }
+
+        HKMer kmer = previous_.GetHKMer();
+        kmer <<= HRun(run.nucl, (uint8_t)(run.len - 1));
+        kmer <<= HRun(c, 1);
+        kmer <<= HRun(run.nucl, 1);
+
+        const unsigned maxLen = (unsigned)(run.len - 2);
+        for (unsigned i = 0; i < maxLen; ++i) {
+          kmer[K - 3].len = (i + 1) & 0x3F;
+          kmer[K - 1].len = (maxLen - i) & 0x3F;
+          if (is_good_function_(kmer)) {
+            StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+            builder.AddEvent(IonEvent(run.nucl, (char)(i + 2), (char)(i + 1), true));
+            builder.AddEvent(IonEvent(c, (char)0, (char)1, true));
+            builder.AddEvent(
+                IonEvent(run.nucl, (char)(maxLen - i), (char)(maxLen - i), true));
+            corrections.emplace(builder.Build());
+          }
+        }
+      }
+    }
+  }
+
+  inline bool AddPossibleCorrections(std::priority_queue<TState>& corrections) {
+    const unsigned cursor = previous_.Position();
+    const HRun run = context_.GetHRun(cursor);
+    bool found = false;
+
+    if (is_good_function_(previous_.GetHKMer())) {
+      HRunSizeSearcher searcher(previous_.GetHKMer(), run, is_good_function_);
+      {
+        auto insertions = searcher.TryFindInsertions((char)kMaxInDel);
+        for (const auto& insertion : insertions) {
+          if (insertion.is_to_good_correction_) {
+            StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+            builder.AddEvent(insertion);
+            corrections.emplace(builder.Build());
+            found = true;
+          }
+        }
+      }
+
+      {
+        auto deletions = searcher.TryFindAllDeletions((const char)std::max((int)run.len, 1));
+        if (deletions.size()) {
+          for (const auto& deletion : deletions) {
+            const uint8_t restSize =
+                (uint8_t)(deletion.overserved_size_ - deletion.fixed_size_);
+            if (restSize <= kMaxInDel) {
+              StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+              builder.AddEvent(deletion);
+              corrections.emplace(builder.Build());
+            }
+
+            // Try insertion after part of hrun. Errors of type aaaaa -> aaa g
+            // aa
+            if (restSize > 1) {
+              StateBuilder<PenaltyCalcer> indel_builder(previous_,
+                                                        calcer_,
+                                                        context_);
+              const IonEvent partDel = IonEvent(
+                  deletion.nucl_, deletion.fixed_size_, deletion.fixed_size_, true);
+              indel_builder.AddEvent(partDel);
+              const TState state = indel_builder.Build();
+              found |= AddAnotherNuclInsertions(HRun(deletion.nucl_, restSize),
+                                                state, corrections);
+            }
+          }
+          found = true;
+        }
+      }
+
+      if (!found) {
+        found |= AddAnotherNuclInsertions(run, previous_, corrections);
+
+        int read_size = (int)context_.GetRead().size();
+        const int next_cursor = cursor + run.len;
+
+        if (next_cursor >= read_size) {
+          return found;
+        }
+        const HRun next_run = context_.GetHRun((size_t)next_cursor);
+
+        // try full deletion of hrun.
+        if (run.len <= kMaxFulldel) {
+          if (next_run.nucl != previous_.GetHKMer()[K - 1].nucl) {
+            StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+            builder.AddEvent(IonEvent(run.nucl, run.len, 0, true));  // full deletion
+            corrections.emplace(builder.Build());
+            found = true;
+          } else {
+            {
+              StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+              builder.AddEvent(IonEvent(run.nucl, run.len, 0, true));  // full deletion
+              builder.AddEvent(IonEvent(next_run.nucl, next_run.len, 0, true));  // full deletion
+              corrections.emplace(builder.Build());
+            }
+            {
+              StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+              builder.AddEvent(IonEvent(run.nucl, run.len, 0, true));  // full deletion
+              auto state = builder.Build();
+              found |= AddAnotherNuclInsertions(next_run, state, corrections);
+            }
+          }
+        }
+      }
+    } else {
+      {
+        HKMer test = previous_.GetHKMer();
+        HRun fixed = run;
+        fixed.len = (fixed.len + 1) & 0x3F;
+        test <<= fixed;
+        size_t local_cursor = cursor + run.len;
+
+        for (unsigned i = 0; i < (K - 1); ++i) {
+          if (local_cursor >= context_.GetRead().size()) {
+            break;
+          }
+          const HRun cursorRun = context_.GetHRun(local_cursor);
+          test <<= cursorRun;
+          local_cursor += cursorRun.len;
+
+          if (is_good_function_(test)) {
+            found = true;
+            StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+            builder.AddEvent(IonEvent(run.nucl, run.len, (char) (run.len + 1), false));
+            corrections.emplace(builder.Build());
+            break;
+          }
+        }
+      }
+
+      if (run.len > 1) {
+        HKMer test = previous_.GetHKMer();
+        HRun fixed = run;
+        fixed.len = (fixed.len - 1) & 0x3F;
+        test <<= fixed;
+
+        size_t local_cursor = cursor + run.len;
+
+        for (unsigned i = 0; i < (K - 1); ++i) {
+          if (local_cursor >= context_.GetRead().size()) {
+            break;
+          }
+          const HRun cursorRun = context_.GetHRun(local_cursor);
+          test <<= cursorRun;
+          local_cursor += cursorRun.len;
+
+          if (is_good_function_(test)) {
+            found = true;
+            StateBuilder<PenaltyCalcer> builder(previous_, calcer_, context_);
+            builder.AddEvent(IonEvent(run.nucl, run.len, (uint8_t)(run.len - 1), false));
+            corrections.emplace(builder.Build());
+            break;
+          }
+        }
+      }
+    }
+    return found;
+  }
+};
+}  // namespace correction
+}  // namespace hammer
+
+namespace std {
+using namespace hammer::correction;
+
+template <class PenaltyState>
+struct less<CorrectionState<PenaltyState> > {
+  bool operator()(const CorrectionState<PenaltyState>& left,
+                  const CorrectionState<PenaltyState>& right) const {
+    return left.Penalty() < right.Penalty() ||
+           (left.Penalty() == right.Penalty() &&
+            left.Position() < right.Position());
+  }
+};
+
+}  // namespace std
+
+#endif  // PROJECT_READ_CORRECTOR_INFO_H
diff --git a/src/projects/ionhammer/reference.cpp b/src/projects/ionhammer/reference.cpp
new file mode 100644
index 0000000..054401f
--- /dev/null
+++ b/src/projects/ionhammer/reference.cpp
@@ -0,0 +1 @@
+#include "reference.h"
diff --git a/src/projects/ionhammer/reference.h b/src/projects/ionhammer/reference.h
new file mode 100644
index 0000000..2c48ee6
--- /dev/null
+++ b/src/projects/ionhammer/reference.h
@@ -0,0 +1,59 @@
+//
+// Created by Vasiliy Ershov on 10/07/16.
+//
+
+#ifndef PROJECT_REFERENCE_H
+#define PROJECT_REFERENCE_H
+#include "utils/logger/log_writers.hpp"
+
+#include "hkmer.hpp"
+
+#include "io/reads/file_reader.hpp"
+#include "io/reads/read_processor.hpp"
+#include "kmer_helpers.h"
+
+#include <cstddef>
+#include <iostream>
+#include <mutex>
+#include <unordered_set>
+
+class TGenomReferenceOracle {
+ private:
+  const std::string FilePath;
+  HKMerSet ReferenceKMers;
+
+ public:
+  TGenomReferenceOracle(const std::string& filePath) : FilePath(filePath) {
+    FillSet(ReferenceKMers, filePath.data());
+    INFO("Reference kmers:    " << ReferenceKMers.size());
+  }
+
+  bool IsGenomic(const hammer::HKMer& kmer) const {
+    return ReferenceKMers.count(kmer) > 0;
+  }
+
+  void KMerSetStats(const HKMerSet& kmers, std::string setName) const {
+    INFO("Stats for " << setName);
+
+    size_t total_genomic = ReferenceKMers.size();
+    size_t total_set = kmers.size();
+
+    size_t set_genomic = 0;
+
+    for (auto it = ReferenceKMers.cbegin(), et = ReferenceKMers.cend();
+         it != et; ++it) {
+      if (kmers.count(*it) > 0) {
+        set_genomic += 1;
+      }
+    }
+
+    long set_non_genomic = total_set - set_genomic;
+
+    INFO("Set kmers:       " << total_set);
+    INFO("Genomic: " << set_genomic << " ("
+                     << ((double)set_genomic * 100.0 / (double)total_genomic) << "%)");
+    INFO("NonGenomic: " << set_non_genomic);
+  }
+};
+
+#endif  // PROJECT_REFERENCE_H
diff --git a/src/projects/ionhammer/subcluster.cpp b/src/projects/ionhammer/subcluster.cpp
index 1b27e2f..4eaa425 100644
--- a/src/projects/ionhammer/subcluster.cpp
+++ b/src/projects/ionhammer/subcluster.cpp
@@ -14,25 +14,42 @@
 
 #include <boost/numeric/ublas/matrix.hpp>
 
-#include <vector>
 #include <iostream>
+#include <vector>
+#include "quality_metrics.h"
+#include <boost/math/special_functions/gamma.hpp>
+
+using namespace hammer;
+using namespace hammer_config;
+using namespace n_gamma_poisson_model;
+
+double TGenomicHKMersEstimator::GenerateLikelihood(const HKMer& from,
+                                                   const HKMer& to) const {
+  double llGenerate = 0;
+  for (size_t i = 0; i < hammer::K; ++i) {
+    llGenerate += cluster_model_.ErrorLogLikelihood(from[i].len, to[i].len);
+  }
+  return llGenerate;
+}
 
-hammer::HKMer center(const KMerData &data, const std::vector<size_t>& kmers) {
+HKMer TGenomicHKMersEstimator::Center(const KMerData& data,
+                                      const std::vector<size_t>& kmers) {
   hammer::HKMer res;
   namespace numeric = boost::numeric::ublas;
 
   for (unsigned i = 0; i < hammer::K; ++i) {
     numeric::matrix<double> scores(4, 64, 0);
     for (size_t j = 0; j < kmers.size(); ++j) {
-      const hammer::KMerStat &k = data[kmers[j]];
-      // FIXME: switch to MLE when we'll have use per-run quality values
+      const hammer::KMerStat& k = data[kmers[j]];
+// FIXME: switch to MLE when we'll have use per-run quality values
 #if 1
-      scores(k.kmer[i].nucl, k.kmer[i].len) += double(k.count) * (1 - k.qual);
+      scores(k.kmer[i].nucl, k.kmer[i].len) += k.count * (1.0 - exp(k.qual));
 #else
       for (unsigned n = 0; n < 4; ++n)
         for (unsigned l = 1; l < 64; ++l)
-          scores(n, l) += k.count * (n == k.kmer[i].nucl && l == k.kmer[i].len ?
-                                     log(1 - k.qual) : log(k.qual) - log(4*63 - 1));
+          scores(n, l) += k.count * (n == k.kmer[i].nucl && l == k.kmer[i].len
+                                         ? log(1 - k.qual)
+                                         : log(k.qual) - log(4 * 63 - 1));
 #endif
     }
 
@@ -42,94 +59,210 @@ hammer::HKMer center(const KMerData &data, const std::vector<size_t>& kmers) {
   return res;
 }
 
-bool assign(KMerData &kmer_data, const std::vector<size_t> &cluster) {
-  hammer::HKMer c = center(kmer_data, cluster);
-  bool nonread = false;
+HKMer TGenomicHKMersEstimator::ByPosteriorQualCenter(
+    const std::vector<size_t>& kmers) {
+  hammer::HKMer res;
+  namespace numeric = boost::numeric::ublas;
 
-  size_t idx = kmer_data.seq_idx(c);
-  if (kmer_data[idx].kmer != c) {
-#   pragma omp critical
-    {
-      idx = kmer_data.push_back(hammer::KMerStat(0, c, 1.0));
+  for (unsigned i = 0; i < hammer::K; ++i) {
+    numeric::matrix<double> scores(4, 64, 0);
+    for (size_t j = 0; j < kmers.size(); ++j) {
+      const hammer::KMerStat& kmerStat = data_[kmers[j]];
+      scores(kmerStat.kmer[i].nucl, kmerStat.kmer[i].len) +=
+          kmerStat.count * exp(cluster_model_.GenomicLogLikelihood(kmerStat));
     }
-    nonread = true;
-  }
 
-  for (size_t j = 0; j < cluster.size(); ++j)
-    kmer_data[cluster[j]].changeto = unsigned(idx);
+    res[i] = hammer::iontorrent::consensus(scores).first;
+  }
 
-  return nonread;
+  return res;
 }
 
-void dump(const KMerData &kmer_data, const std::vector<size_t> &cluster) {
-  std::cerr << "{ \n\"kmers\": {";
-  for (size_t j = 0; j < cluster.size(); ++j) {
-    if (j > 0) std::cerr << ", ";
-    std::cerr << '"' << kmer_data[cluster[j]].kmer << "\": ["
-              << kmer_data[cluster[j]].count << ", " 
-              << 1 - kmer_data[cluster[j]].qual << "] \n";
+void TGenomicHKMersEstimator::ProceedCluster(std::vector<size_t>& cluster) {
+  std::sort(cluster.begin(), cluster.end(), CountCmp(data_));
+
+  std::vector<double> qualities;
+  std::vector<size_t> candidates;
+
+  for (size_t i = 0; i < cluster.size(); ++i) {
+
+    const auto idx = cluster[i];
+    const auto& stat = data_[idx];
+
+    if ((uint)stat.count < cfg::get().subcluster_min_count && (i > 0)) {
+      break;
+    }
+
+    const double qual = cluster_model_.StatTransform(stat);
+    const double posterior = cluster_model_.GenomicLogLikelihood(stat);
+
+    if (!std::isfinite(posterior)) {
+      continue;
+    }
+
+    if (posterior > cfg::get().subcluster_threshold || i == 0) {
+      candidates.push_back(idx);
+      qualities.push_back(qual);
+    }
   }
-  std::cerr << "}, \"center\": { \"status\": ";
-  hammer::HKMer c = center(kmer_data, cluster);
-  size_t idx = kmer_data.seq_idx(c);
-  if (kmer_data[idx].kmer == c) {
-    std::cerr << "\"ok\", \"center\": \"" << c << "\"}\n";
-  } else {
-    std::cerr << "\"not\", \"kmer\": \"" << kmer_data[idx].kmer 
-              << "\", \"center\": \"" << c << "\"}\n";
+
+  std::vector<double> distOneBestQualities(qualities);
+  std::vector<double> countThreshold(qualities.size());
+
+
+  std::vector<double> kmerErrorRates;
+
+  {
+    for (size_t i = 0; i < candidates.size(); ++i) {
+      const auto& centerCandidate = data_[candidates[i]];
+      kmerErrorRates.push_back(exp(GenerateLikelihood(centerCandidate.kmer, centerCandidate.kmer)));
+
+      for (size_t j = 0; j < i; ++j) {
+        const auto& parent = data_[candidates[j]];
+
+        if (cfg::get().subcluster_filter_by_count_enabled) {
+          const double mult = pow(cfg::get().subcluster_count_mult, hammer::hkmerDistance(parent.kmer, centerCandidate.kmer).levenshtein_);
+          countThreshold[i] +=  mult * parent.count / kmerErrorRates[j];
+        }
+
+        if (hammer::hkmerDistance(parent.kmer, centerCandidate.kmer).levenshtein_ <= 1) {
+          distOneBestQualities[i] = std::min(distOneBestQualities[i], qualities[j]);
+        }
+      }
+
+      auto distOneParents = FindDistOneFullDels(centerCandidate);
+      for (auto distOneParent : distOneParents) {
+        const auto& parent = data_[distOneParent];
+        distOneBestQualities[i] = std::min(distOneBestQualities[i], cluster_model_.StatTransform(parent));
+
+        if (cfg::get().subcluster_filter_by_count_enabled) {
+          countThreshold[i] += cfg::get().subcluster_count_mult * parent.count / 10 / exp(GenerateLikelihood(parent.kmer, parent.kmer));
+        }
+      }
+    }
   }
-  std::cerr << "}" << std::endl;
-}
 
-size_t subcluster(KMerData &kmer_data, std::vector<size_t> &cluster) {
-  size_t nonread = 0;
+  std::vector<size_t> centerCandidates;
 
-  // First, sort the kmer indicies wrt count
-  std::sort(cluster.begin(), cluster.end(), CountCmp(kmer_data));
+  const double qualMult = cfg::get().subcluster_qual_mult;
+  //  const double alpha = cfg::get().dist_one_subcluster_alpha;
 
-  // The number of subclusters for now is really dumb: we assume that the quality should be 1.
-  size_t k = 0;
-  for (size_t i = 0; i < cluster.size(); ++i)
-      k += kmer_data[cluster[i]].qual < cfg::get().center_qual_threshold;
+  for (size_t i = 0; i < candidates.size(); ++i) {
+    const auto& candidate = data_[candidates[i]];
 
-  if (k <= 1) {
-#if 0
-    dump(kmer_data, cluster);
-#endif
-    return assign(kmer_data, cluster);
+    //don't subcluster low coverage hkmers with long runs, we can't distinguish dist-one error from noise.
+    if (cfg::get().subcluster_filter_by_count_enabled) {
+         double upperCountThreshold = boost::math::gamma_q_inva(countThreshold[i] + 1, 0.99) - 1;
+         if (candidate.count <= upperCountThreshold) {
+            continue;
+        }
+    }
+
+    if (i != 0 && distOneBestQualities[i] * qualMult < qualities[i]) {
+      if (!cluster_model_.IsHighQuality(candidate)) {
+        continue;
+      }
+    }
+    centerCandidates.push_back(candidates[i]);
+  }
+
+  if (!centerCandidates.size()) {
+    return;
   }
 
-  // Find the closest center
-  std::vector<std::vector<size_t> > idx(k, std::vector<size_t>());
-  for (size_t i = 0; i < k; ++i)
-    idx[i].push_back(cluster[i]);
-  for (size_t i = k; i < cluster.size(); ++i) {
-    unsigned dist = std::numeric_limits<unsigned>::max();
-    size_t cidx = k;
-    hammer::HKMer kmerx = kmer_data[cluster[i]].kmer;
-    for (size_t j = 0; j < k; ++j) {
-      hammer::HKMer kmery = kmer_data[cluster[j]].kmer;
-      unsigned cdist = hammer::distanceHKMer(kmerx.begin(), kmerx.end(),
-                                             kmery.begin(), kmery.end());
-      if (cdist < dist) {
-        cidx = j;
-        dist = cdist;
+  // First consensus (it's also filtering step)
+  if (consensus_type_ != CenterType::COUNT_ARGMAX) {
+    std::set<size_t> centerCandidatesSet;
+    const size_t k = centerCandidates.size();
+    // Find the closest center
+    std::vector<HKMer> centralKmers;
+    std::vector<std::vector<size_t> > subclusters(k, std::vector<size_t>());
+
+    for (size_t i = 0; i < k; ++i) {
+      auto centerId = centerCandidates[i];
+      centralKmers.push_back(data_[centerId].kmer);
+    }
+
+    for (size_t i = 0; i < cluster.size(); ++i) {
+      double dist = std::numeric_limits<double>::infinity();
+      size_t cidx = k;
+      size_t count = 0;
+
+      size_t kmerIdx = cluster[i];
+      hammer::HKMer kmerx = data_[kmerIdx].kmer;
+
+      for (size_t j = 0; j < k; ++j) {
+        hammer::HKMer kmery = centralKmers[j];
+        double cdist = hammer::hkmerDistance(kmerx, kmery).levenshtein_;
+        if (cdist < dist || (cdist == dist && count < (size_t)data_[kmery].count)) {
+          cidx = j;
+          dist = cdist;
+          count = data_[kmery].count;
+        }
+      }
+      VERIFY(cidx < k);
+      subclusters[cidx].push_back(cluster[i]);
+    }
+
+    for (size_t i = 0; i < k; ++i) {
+      const auto& subcluster = subclusters[i];
+
+      HKMer center;
+      if (consensus_type_ == CenterType::CONSENSUS) {
+        center = Center(data_, subcluster);
+      } else if (consensus_type_ == CenterType::BY_POSTERIOR_QUALITY) {
+        center = ByPosteriorQualCenter(subcluster);
+      } else {
+        INFO("Unsupported center type: will use mean instead");
+        center = Center(data_, subcluster);
+      }
+
+      auto centerIdx = data_.checking_seq_idx(center);
+
+      if ((k == 1 && centerIdx != -1ULL) || (centerIdx == centerCandidates[i])) {
+        centerCandidatesSet.insert(centerIdx);
       }
     }
-    VERIFY(cidx < k);
-    idx[cidx].push_back(cluster[i]);
+
+    centerCandidates = std::vector<size_t>(centerCandidatesSet.begin(),
+                                           centerCandidatesSet.end());
   }
 
-  for (auto it = idx.begin(), et = idx.end(); it != et; ++it) {
-    const std::vector<size_t> &subcluster = *it;
+  std::vector<double> posteriorQualities;
+  // Now let's "estimate" quality
+  std::vector<char> distOneGoodCenters(centerCandidates.size());
 
-    if (assign(kmer_data, subcluster)) {
-      nonread += 1;
-#if 0
-      dump(kmer_data, subcluster);
-#endif
+  for (uint k = 0; k < centerCandidates.size(); ++k) {
+    const auto idx = centerCandidates[k];
+    const KMerStat& centerCandidate = data_[idx];
+    for (uint j = 0; j < centerCandidates.size(); ++j) {
+      if (hammer::hkmerDistance(centerCandidate.kmer, data_[centerCandidates[j]].kmer).hamming_ == 1) {
+        distOneGoodCenters[k] = 1;
+      }
     }
+    double quality = cluster_model_.GenomicLogLikelihood(centerCandidate);
+    quality = std::isfinite(quality) ? quality : -1000;
+    posteriorQualities.push_back(max(quality, -1000.0));
   }
 
-  return nonread;
+  for (size_t i = 0; i < posteriorQualities.size(); ++i) {
+    const auto idx = centerCandidates[i];
+    data_[idx].lock();
+    const bool wasGood = data_[idx].good();
+    data_[idx].posterior_genomic_ll = (float)max(posteriorQualities[i], (double)data_[idx].posterior_genomic_ll);
+    data_[idx].dist_one_subcluster |= distOneGoodCenters[i];
+    data_[idx].unlock();
+    if (!wasGood && data_[idx].good()) {
+#pragma omp atomic
+      GoodKmers++;
+    }
+    if (!wasGood && data_[idx].skip()) {
+#pragma omp atomic
+      SkipKmers++;
+    }
+    if (wasGood) {
+#pragma omp atomic
+      ReasignedByConsenus++;
+    }
+  }
 }
diff --git a/src/projects/ionhammer/subcluster.hpp b/src/projects/ionhammer/subcluster.hpp
index a30c09f..ff05682 100644
--- a/src/projects/ionhammer/subcluster.hpp
+++ b/src/projects/ionhammer/subcluster.hpp
@@ -8,16 +8,102 @@
 #ifndef __SUBCLUSTER_HPP__
 #define __SUBCLUSTER_HPP__
 
+#include "hkmer.hpp"
+#include "kmer_data.hpp"
+#include "quality_thresholds_estimator.h"
+#include "reference.h"
+
+#include <common/adt/concurrent_dsu.hpp>
 #include <vector>
-#include <cstddef>
+#include "gamma_poisson_model.hpp"
+#include "normal_quality_model.hpp"
+#include "utils/logger/logger.hpp"
 
-#include "hkmer.hpp"
+namespace hammer {
+
+class ClusteringQuality;
+
+
+class TGenomicHKMersEstimator {
+ private:
+  KMerData& data_;
+  const n_normal_model::NormalClusterModel& cluster_model_;
+  hammer_config::CenterType consensus_type_;
+  size_t GoodKmers = 0;
+  size_t SkipKmers = 0;
+  size_t ReasignedByConsenus = 0;
+
+ public:
+  TGenomicHKMersEstimator(KMerData& data, const n_normal_model::NormalClusterModel& clusterModel,
+      hammer_config::CenterType consensusType = hammer_config::CenterType::CONSENSUS)
+      : data_(data), cluster_model_(clusterModel), consensus_type_(consensusType) {}
+
+  ~TGenomicHKMersEstimator() {
+    INFO("Good kmers: " << GoodKmers);
+    INFO("Perfect kmers: " << SkipKmers);
+    INFO("Reasigned by consensus: " << ReasignedByConsenus);
+  }
+
+  // we trying to find center candidate, not error candidates.
+  // so we try insert in every "center" po
+  std::vector<size_t> FindDistOneFullDels(const KMerStat& kmerStat) const {
+    std::vector<size_t> indices;
+    const auto& source = kmerStat.kmer;
+    for (uint k = 1; k < K; ++k) {
+      auto fixed = source;
+      for (uint j = k + 1; j < K; ++j) {
+        fixed[j] = fixed[j - 1];
+      }
+
+      auto prev = source[k - 1];
+      auto next = source[k];
+
+      for (int i = 0; i < 4; ++i) {
+        if (i == prev.nucl || i == next.nucl) {
+          continue;
+        }
+        fixed[k] = HomopolymerRun((uint8_t)i, 1);
+        auto idx = data_.checking_seq_idx(fixed);
+        if (idx != -1ULL) {
+          indices.push_back(idx);
+        }
+      }
+    }
+    return indices;
+  }
+
+  void ProceedCluster(std::vector<size_t>& cluster);
+
+  static size_t GetCenterIdx(const KMerData& kmerData,
+                             const std::vector<size_t>& cluster) {
+    if (cluster.size() == 1) {
+      return cluster[0];
+    }
+
+    hammer::HKMer center = Center(kmerData, cluster);
+    size_t idx = kmerData.checking_seq_idx(center);
+
+    if (idx == -1ULL) {
+      double bestQual = kmerData[cluster[0]].qual;
+      idx = cluster[0];
+
+      for (auto i : cluster) {
+        if (kmerData[i].qual < bestQual) {
+          bestQual = kmerData[i].qual;
+          idx = i;
+        }
+      }
+    }
+    return idx;
+  }
+
+  double GenerateLikelihood(const HKMer& from, const HKMer& to) const;
 
-class KMerData;
+  static HKMer Center(const KMerData& data, const std::vector<size_t>& kmers);
 
-size_t subcluster(KMerData &kmer_data, std::vector<size_t> &cluster);
+  HKMer ByPosteriorQualCenter(const std::vector<size_t>& kmers);
+};
 
-// for debug purposes
-hammer::HKMer center(const KMerData &data, const std::vector<size_t>& kmers);
+}  // namespace hammer
 
-#endif // __SUBCLUSTER_HPP__
+#endif  // __SUBCLUSTER_HPP__
diff --git a/src/projects/ionhammer/thread_utils.h b/src/projects/ionhammer/thread_utils.h
new file mode 100644
index 0000000..7624214
--- /dev/null
+++ b/src/projects/ionhammer/thread_utils.h
@@ -0,0 +1,57 @@
+//
+// Created by Vasiliy Ershov on 02/12/2016.
+//
+
+#ifndef PROJECT_THREAD_UTILS_H
+#define PROJECT_THREAD_UTILS_H
+
+namespace n_computation_utils {
+
+template <class AdditiveStat>
+class ParallelStatisticsCalcer {
+ private:
+  size_t num_threads_;
+
+ public:
+  ParallelStatisticsCalcer(size_t num_threads) : num_threads_(num_threads) {}
+
+  template <class TFunction>
+  AdditiveStat Calculate(size_t n, std::function<AdditiveStat()>&& factory,
+                          TFunction&& func) const {
+    std::vector<AdditiveStat> aggregated_stats;
+    for (uint i = 0; i < num_threads_; ++i) {
+      aggregated_stats.push_back(factory());
+    }
+
+#pragma omp parallel for num_threads(num_threads_)
+    for (size_t i = 0; i < n; ++i) {
+      const auto tid = omp_get_thread_num();
+      func(aggregated_stats[tid], i);
+    }
+
+    for (size_t i = 1; i < aggregated_stats.size(); ++i) {
+      aggregated_stats[0] += aggregated_stats[i];
+    }
+    return aggregated_stats[0];
+  }
+};
+
+template <class TStat, class TAdditiveStat>
+class TAdditiveStatisticsCalcer {
+ private:
+  const std::vector<TStat>& stats_;
+  size_t num_threads_;
+
+ public:
+  TAdditiveStatisticsCalcer(const std::vector<TStat>& stats, size_t num_threads)
+      : stats_(stats), num_threads_(num_threads) {}
+
+  TAdditiveStat Calculate(std::function<TAdditiveStat()>&& factory) const {
+    ParallelStatisticsCalcer<TAdditiveStat> parallel_calcer(num_threads_);
+    return parallel_calcer.Calculate(
+        stats_.size(), std::move(factory),
+        [&](TAdditiveStat& stat, size_t i) { stat.Add(stats_[i]); });
+  }
+};
+}  // namespace n_computation_utils
+#endif  // PROJECT_THREAD_UTILS_H
diff --git a/src/projects/ionhammer/valid_hkmer_generator.hpp b/src/projects/ionhammer/valid_hkmer_generator.hpp
index 468bee6..84822c8 100644
--- a/src/projects/ionhammer/valid_hkmer_generator.hpp
+++ b/src/projects/ionhammer/valid_hkmer_generator.hpp
@@ -12,13 +12,13 @@
 #include <string>
 #include <vector>
 
-#include "io/reads/single_read.hpp"
 #include "HSeq.hpp"
+#include "io/reads/single_read.hpp"
 
-#include <cstdint>
 #include <cmath>
+#include <cstdint>
 
-template<size_t kK>
+template <size_t kK>
 class ValidHKMerGenerator {
  public:
   /**
@@ -29,11 +29,9 @@ class ValidHKMerGenerator {
    */
   // FIXME: Switch to delegating ctor.
   explicit ValidHKMerGenerator(const io::SingleRead &read,
-                               unsigned bad_quality_threshold = 2) {
-    Reset(read.GetSequenceString().data(),
-          read.GetQualityString().data(),
-          read.GetSequenceString().size(),
-          bad_quality_threshold);
+                               unsigned bad_quality_threshold = 5) {
+    Reset(read.GetSequenceString().data(), read.GetQualityString().data(),
+          read.GetSequenceString().size(), bad_quality_threshold);
   }
 
   /**
@@ -43,21 +41,26 @@ class ValidHKMerGenerator {
    * nucleotides with quality lower the threshold from the ends of the
    * read.
    */
-  explicit ValidHKMerGenerator(const char *seq, const char *qual,
-                               size_t len,
-                               unsigned bad_quality_threshold = 2) {
+  explicit ValidHKMerGenerator(const char *seq, const char *qual, size_t len,
+                               unsigned bad_quality_threshold = 5) {
     Reset(seq, qual, len, bad_quality_threshold);
   }
 
   ValidHKMerGenerator()
-      : kmer_(), seq_(0), qual_(0),
-        pos_(-1), nlen_(-1), end_(-1), len_(0),
-        correct_probability_(1), bad_quality_threshold_(2),
-        has_more_(false), first_(true) {}
-
-  void Reset(const char *seq, const char *qual,
-             size_t len,
-             unsigned bad_quality_threshold = 2) {
+      : kmer_(),
+        seq_(0),
+        qual_(0),
+        pos_(-1),
+        nlen_(-1),
+        end_(-1),
+        len_(0),
+        correct_probability_(0),
+        bad_quality_threshold_(5),
+        has_more_(false),
+        first_(true) {}
+
+  void Reset(const char *seq, const char *qual, size_t len,
+             unsigned bad_quality_threshold = 5) {
     kmer_ = hammer::HSeq<kK>();
     seq_ = seq;
     qual_ = qual;
@@ -65,12 +68,14 @@ class ValidHKMerGenerator {
     nlen_ = -1;
     end_ = -1;
     len_ = len;
-    correct_probability_ = 1.0;
+    correct_probability_ = 0.0;
     bad_quality_threshold_ = bad_quality_threshold;
     has_more_ = true;
     first_ = true;
     last_ = false;
     probs_.resize(0);
+    runlens_.resize(0);
+    length = 0;
 
     TrimBadQuality();
     Next();
@@ -80,47 +85,35 @@ class ValidHKMerGenerator {
    * @result true if Next() succeed while generating new k-mer, false
    * otherwise.
    */
-  bool HasMore() const {
-    return has_more_;
-  }
+  bool HasMore() const { return has_more_; }
 
   /**
    * @result last k-mer generated by Next().
    */
-  const hammer::HSeq<kK>& kmer() const {
-    return kmer_;
-  }
+  const hammer::HSeq<kK> &kmer() const { return kmer_; }
 
   /**
    * @result last k-mer position in initial read.
    */
-  size_t pos() const {
-    return pos_;
-  }
+  size_t pos() const { return pos_; }
 
-  size_t nlen() const {
-    return nlen_;
-  }
+  size_t nlen() const { return nlen_; }
 
   /**
    * @result number of nucleotides trimmed from left end
    */
-  size_t trimmed_left() const {
-    return beg_;
-  }
+  size_t trimmed_left() const { return beg_; }
 
   /**
    * @result number of nucleotides trimmed from right end
    */
-  size_t trimmed_right() const {
-    return len_ - end_;
-  }
+  size_t trimmed_right() const { return len_ - end_; }
 
   /**
    * @result probability that last generated k-mer is correct.
    */
   double correct_probability() const {
-    return correct_probability_;
+    return exp(correct_probability_ / (double)length);
   }
 
   /**
@@ -128,27 +121,30 @@ class ValidHKMerGenerator {
    * if succeeded. You can access k-mer read with kmer().
    */
   void Next();
+
  private:
   void TrimBadQuality();
 
   double Prob(unsigned qual) {
-    return (qual < 3 ? 0.25 : 1 - pow(10.0, -(qual / 10.0)));
-    // return Globals::quality_probs[qual];
+    return max(1 - pow(10.0, -(qual / 10.0)),
+               1e-40);  //(qual < 3 ? 0.25 : 1 - pow(10.0, -(qual / 10.0)));
+                        //     return Globals::quality_probs[qual];
   }
 
   unsigned GetQual(size_t pos) {
     if (pos >= len_) {
-      return 2;
+      return 1;
     } else {
       return qual_[pos];
     }
   }
 
   hammer::HSeq<kK> kmer_;
-  const char* seq_;
-  const char* qual_;
+  const char *seq_;
+  const char *qual_;
   size_t pos_;
   size_t nlen_;
+  size_t length = 0;
   size_t beg_;
   size_t end_;
   size_t len_;
@@ -158,30 +154,29 @@ class ValidHKMerGenerator {
   bool first_;
   bool last_;
   std::deque<double> probs_;
+  std::deque<double> runlens_;
 
   // Disallow copy and assign
-  ValidHKMerGenerator(const ValidHKMerGenerator&) = delete;
-  void operator=(const ValidHKMerGenerator&) = delete;
+  ValidHKMerGenerator(const ValidHKMerGenerator &) = delete;
+  void operator=(const ValidHKMerGenerator &) = delete;
 };
 
-template<size_t kK>
+template <size_t kK>
 void ValidHKMerGenerator<kK>::TrimBadQuality() {
   pos_ = 0;
   if (qual_)
     for (; pos_ < len_; ++pos_) {
-      if (GetQual(pos_) >= bad_quality_threshold_)
-        break;
+      if (GetQual(pos_) >= bad_quality_threshold_) break;
     }
   beg_ = pos_;
   end_ = len_;
   if (qual_)
     for (; end_ > pos_; --end_) {
-      if (GetQual(end_ - 1) >= bad_quality_threshold_)
-        break;
-  }
+      if (GetQual(end_ - 1) >= bad_quality_threshold_) break;
+    }
 }
 
-template<size_t kK>
+template <size_t kK>
 void ValidHKMerGenerator<kK>::Next() {
   if (last_) {
     has_more_ = false;
@@ -190,7 +185,8 @@ void ValidHKMerGenerator<kK>::Next() {
 
   size_t toadd = (first_ ? kK : 1);
   char pnucl = -1;
-  double cprob = 1.0;
+  double cprob = 0.0;
+  double len = 0.0;
   nlen_ = 0;
   // Build the flow-space kmer looking over homopolymer streches.
   while (toadd) {
@@ -200,7 +196,7 @@ void ValidHKMerGenerator<kK>::Next() {
     if (pos_ >= end_) {
       last_ = true;
       if (toadd > 0) {
-          has_more_ = false;
+        has_more_ = false;
       }
       return;
     }
@@ -212,12 +208,17 @@ void ValidHKMerGenerator<kK>::Next() {
       pnucl = -1;
       pos_ += nlen_ + 1;
       nlen_ = 0;
-      correct_probability_ = 1.0;
+      len = 0;
+      length = 0;
+      correct_probability_ = 0.0;
       probs_.resize(0);
+      runlens_.resize(0);
       continue;
     }
-    if (qual_)
-      cprob *= Prob(GetQual(pos_ + nlen_));
+    if (qual_) {
+      cprob += log(Prob(GetQual(pos_ + nlen_)));
+      ++len;
+    }
 
     // If current nucl differs from previous nucl then either we're starting the
     // k-mer or just finished the homopolymer run.
@@ -225,14 +226,21 @@ void ValidHKMerGenerator<kK>::Next() {
       // If previous nucl was valid then finish the current homopolymer run
       if (pnucl != -1) {
         toadd -= 1;
-        correct_probability_ *= cprob;
+
+        correct_probability_ += cprob;
+        length += (size_t)len;
+
         if (probs_.size() == kK) {
-          correct_probability_ /= probs_[0];
+          correct_probability_ -= probs_[0];
+          length -= (size_t)runlens_[0];
           probs_.pop_front();
+          runlens_.pop_front();
         }
 
         probs_.push_back(cprob);
-        cprob = 1.0;
+        runlens_.push_back(len);
+        cprob = 0.0;
+        len = 0;
       }
       pnucl = cnucl;
     }
diff --git a/src/projects/mph_test/main.cpp b/src/projects/mph_test/main.cpp
index e5421c1..6f79ced 100644
--- a/src/projects/mph_test/main.cpp
+++ b/src/projects/mph_test/main.cpp
@@ -7,8 +7,8 @@
 
 #include "utils/logger/log_writers.hpp"
 #include "utils/segfault_handler.hpp"
-#include "utils/indices/perfect_hash_map.hpp"
-#include "utils/mph_index/kmer_index_builder.hpp"
+#include "utils/ph_map/perfect_hash_map.hpp"
+#include "utils/kmer_mph/kmer_index_builder.hpp"
 
 #include "io/reads/read_processor.hpp"
 #include "io/reads/io_helper.hpp"
@@ -30,18 +30,17 @@ void create_console_logger() {
     attach_logger(lg);
 }
 
-class SimplePerfectHashMap : public debruijn_graph::KeyIteratingMap<RtSeq, uint32_t> {
-    using base = debruijn_graph::KeyIteratingMap<RtSeq, uint32_t>;
+class SimplePerfectHashMap : public utils::KeyIteratingMap<RtSeq, uint32_t> {
+    using base = utils::KeyIteratingMap<RtSeq, uint32_t>;
   public:
     SimplePerfectHashMap(size_t k, const std::string &workdir)
             : base(k, workdir) {}
 };
 
-class ParallelSortingSplitter : public KMerSortingSplitter<RtSeq> {
+class ParallelSortingSplitter : public utils::KMerSortingSplitter<RtSeq> {
   using Seq = RtSeq;
 
   std::vector<std::string> files_;
-  unsigned nthreads_;
   size_t read_buffer_size_;
 
   class BufferFiller {
@@ -78,17 +77,15 @@ class ParallelSortingSplitter : public KMerSortingSplitter<RtSeq> {
 
 
   public:
-    ParallelSortingSplitter(const std::string &workdir, unsigned K, unsigned nthreads, size_t read_buffer_size = 0)
-            : KMerSortingSplitter<Seq>(workdir, K), nthreads_(nthreads), read_buffer_size_(read_buffer_size) {}
+    ParallelSortingSplitter(const std::string &workdir, unsigned K, size_t read_buffer_size = 0)
+            : KMerSortingSplitter<Seq>(workdir, K), read_buffer_size_(read_buffer_size) {}
 
     void push_back(const std::string &filename) {
         files_.push_back(filename);
     }
 
-    path::files_t Split(size_t num_files) override {
-        INFO("Splitting kmer instances into " << num_files << " buckets. This might take a while.");
-
-        path::files_t out = PrepareBuffers(num_files, nthreads_, read_buffer_size_);
+    fs::files_t Split(size_t num_files, unsigned nthreads) override {
+        fs::files_t out = PrepareBuffers(num_files, nthreads, read_buffer_size_);
 
         size_t n = 10;
         BufferFiller filler(*this, K());
@@ -96,7 +93,7 @@ class ParallelSortingSplitter : public KMerSortingSplitter<RtSeq> {
             INFO("Processing " << file);
             auto irs = io::EasyStream(file, true, true);
             while (!irs->eof()) {
-                hammer::ReadProcessor rp(nthreads_);
+                hammer::ReadProcessor rp(nthreads);
                 rp.Run(*irs, filler);
                 DumpBuffers(out);
                 VERIFY_MSG(rp.read() == rp.processed(), "Queue unbalanced");
@@ -116,7 +113,7 @@ class ParallelSortingSplitter : public KMerSortingSplitter<RtSeq> {
 };
 
 int main(int argc, char* argv[]) {
-    perf_counter pc;
+    utils::perf_counter pc;
 
     srand(42);
     srandom(42);
@@ -160,7 +157,7 @@ int main(int argc, char* argv[]) {
         INFO("# of threads to use: " << nthreads);
 
         SimplePerfectHashMap index(K, workdir);
-        ParallelSortingSplitter splitter(workdir, K, nthreads, read_buffer_size);
+        ParallelSortingSplitter splitter(workdir, K, read_buffer_size);
         if (options.count("dataset")) {
             io::DataSet<> idataset;
             idataset.load(dataset);
@@ -170,7 +167,7 @@ int main(int argc, char* argv[]) {
             for (const auto& s : input)
                 splitter.push_back(s);
         }
-        KMerDiskCounter<RtSeq> counter(workdir, splitter);
+        utils::KMerDiskCounter<RtSeq> counter(workdir, splitter);
         counter.CountAll(16, nthreads);
         INFO("K-mer counting done, kmers saved to " << counter.GetFinalKMersFname());
     } catch (std::string const &s) {
diff --git a/src/projects/mts/CMakeLists.txt b/src/projects/mts/CMakeLists.txt
index 1e06d4b..506d3a3 100644
--- a/src/projects/mts/CMakeLists.txt
+++ b/src/projects/mts/CMakeLists.txt
@@ -29,7 +29,8 @@ endif()
 add_executable(prop_binning
                propagate.cpp
                read_binning.cpp
-               prop_binning.cpp)
+               prop_binning.cpp
+               gzstream/gzstream.C)
 
 target_link_libraries(prop_binning common_modules nlopt BamTools ssw getopt_pp ${COMMON_LIBRARIES})
 
diff --git a/src/projects/mts/Common.snake b/src/projects/mts/Common.snake
index 6cd6a50..d1ec91e 100644
--- a/src/projects/mts/Common.snake
+++ b/src/projects/mts/Common.snake
@@ -4,25 +4,36 @@ from itertools import chain
 from functools import partial
 import os.path
 
-from scripts.common import detect_reads
+from scripts.common import detect_reads, fill_default_values
 
 #Config parameters
-IN = config["IN"]
-LOCAL_DIR = config["LOCAL_DIR"]
-SPADES = config.get("SPADES", LOCAL_DIR)
-SPADES_REASSEMBLY = config.get("SPADES_REASSEMBLY", LOCAL_DIR)
-BIN = config.get("BIN", os.path.join(LOCAL_DIR, "build/release/bin"))
-SCRIPTS = config.get("SCRIPTS", os.path.join(LOCAL_DIR, "src/projects/mts/scripts"))
-SOFT = config["SOFT"]
-K = int(config.get("K", 55))
-SMALL_K = int(config.get("SMALL_K", 21))
-MIN_CONTIG_LENGTH = int(config.get("MIN_CONTIG_LENGTH", 2000))
-THREADS = config.get("THREADS", 16)
-BINNER = config.get("BINNER", "canopy")
+fill_default_values(config)
+
+#TODO: check if modern Snakemake allows dictionaries in string splicing
+IN = config["data"]
+ASSEMBLER = config["assembly"]["assembler"]
+ASSEMBLER_DIR = config["assembly"]["dir"]
+REASSEMBLER_DIR = config["reassembly"]["dir"]
+BIN = config["bin"]
+SCRIPTS = config["scripts"]
+SOFT = config["soft"]
+ASSEMBLY_K = config["assembly"]["k"]
+PROFILE_K = config["profile"]["k"]
+PROFILER = config["profile"]["profiler"]
+SPLIT_LENGTH = config["profile"]["split"]
+MIN_CONTIG_LENGTH = config["binning"]["min_length"]
+MIN_NONZEROES = config["binning"]["min_nonzeroes"]
+THREADS = config["threads"]
+BINNER = config["binning"]["binner"]
+
+IS_COASSEMBLY = config["assembly"].get("groups") == ["*"]
+if not IS_COASSEMBLY and BINNER == "metabat":
+    raise WorkflowError("MetaBAT is supported only in the full coassembly mode")
 
 #Autodetect samples and their reads
+#Check that sample names are consecutive and all are presented
 SAMPLE_DIRS = set(glob_wildcards(os.path.join(IN, "{sample,sample\d+}"))[0])
-SAMPLE_COUNT = len(SAMPLE_DIRS)
+SAMPLE_COUNT = config.get("count", len(SAMPLE_DIRS))
 SAMPLES = list()
 for i in range(1, SAMPLE_COUNT + 1):
     sample_name = "sample" + str(i)
@@ -32,10 +43,16 @@ for i in range(1, SAMPLE_COUNT + 1):
 
 SAMPLE_READS = dict(map(lambda sample: (sample, detect_reads(os.path.join(IN, sample))), SAMPLES))
 
+NAME_TEMPLATE = "(\w+\.?)?\d+"
+
 #Group samples
-GROUP_SAMPLES = config.get("GROUPS", [])
+GROUP_SAMPLES = config["assembly"]["groups"]
+#Form /N groups
+if type(GROUP_SAMPLES) == str and GROUP_SAMPLES[0] == "/":
+    group_size = SAMPLE_COUNT // int(GROUP_SAMPLES[1:])
+    GROUP_SAMPLES = [["sample"+str(j) for j in range(i, min(i + group_size, SAMPLE_COUNT + 1))]
+                     for i in range(1, SAMPLE_COUNT, group_size)]
 USED_SAMPLES = set(chain(*GROUP_SAMPLES))
-#TODO: double-check
 #Replace the wildcard group with unused samples
 if GROUP_SAMPLES and GROUP_SAMPLES[-1] == "*":
     GROUP_SAMPLES[-1] = [sample for sample in SAMPLES if sample not in USED_SAMPLES]
@@ -45,25 +62,56 @@ else:
         if sample not in USED_SAMPLES:
             GROUP_SAMPLES.append([sample])
 
+#Dictionary: {group name: [samples of the group]}
+#Can be iterated to retrieve all group names
 GROUPS = dict()
 group_id = 1
 for group in GROUP_SAMPLES:
     if len(group) == 1:
-        key = group[0]
+        key = group[0] #Groups of a single sample are simply called sampleXX
     else:
         key = "group" + str(group_id)
-        #SAMPLE_READS[key] = ["reads/{}/{}.fastq".format(key, dir) for dir in ["left", "right"]]
         SAMPLE_READS[key] = ([SAMPLE_READS[s][0] for s in group], [SAMPLE_READS[s][1] for s in group])
         group_id += 1
     GROUPS[key] = group
 
 #Helpers for locating input files
+
+#Returns all filepaths with left/right reads for a sample/group/bin/etc, used as Snakemake input
+def reads_input(dict):
+    return (partial(dict, 0), partial(dict, 1))
+
 def sample_reads(dir, wildcards):
-    res = SAMPLE_READS[wildcards.sample][dir]
-    if res is str:
-        return [res]
-    else:
-        return res
+    return SAMPLE_READS[wildcards["sample"]][dir]
+
+left_sample_reads, right_sample_reads = reads_input(sample_reads)
+
+def group_reads(dir, wildcards):
+    return SAMPLE_READS[wildcards["group"]][dir]
+
+left_reads, right_reads = reads_input(group_reads)
+
+def is_fastq(wildcards):
+    name = getattr(wildcards, "sample", None)
+    if not name:
+        name = GROUPS[wildcards.group][0]
+    for ext in {".fastq", ".fq", ".fastq.gz", "fq.gz"}:
+        if SAMPLE_READS[name][0].endswith(ext):
+            return True
+    return False
+
+rule combine_contigs:
+    input:   expand("assembly/{{frags}}/{group}.fasta", assembler=ASSEMBLER, group=GROUPS)
+    output:  "assembly/{frags}/all.fasta"
+    message: "Combine all {wildcards.frags} contigs"
+    shell:   "{SCRIPTS}/combine_contigs.py {input} > {output}"
 
-left_reads  = partial(sample_reads, 0)
-right_reads = partial(sample_reads, 1)
+rule combine_annotation:
+    input:   expand("{{stage}}/annotation/{group}.ann", group=GROUPS)
+    output:  "{stage}/annotation/all.ann"
+    message: "Combine all annotation on {wildcards.stage}"
+    run:
+        shell("rm -f {output}")
+        for sample_ann in input:
+            sample, _ = os.path.splitext(os.path.basename(sample_ann))
+            shell("sed -e 's/^/{sample}-/' {sample_ann} >> {output}")
diff --git a/src/projects/mts/README b/src/projects/mts/README
index 5e8e6d3..43a9b02 100644
--- a/src/projects/mts/README
+++ b/src/projects/mts/README
@@ -8,14 +8,15 @@ With Miniconda installed, you can issue
 > conda install -c bioconda snakemake
 
 2. Running MTS
-Make a directory for output, place config.yaml there, and configure it. Then run
-> snakemake --directory <output directory>  --cores XX
+To assemble a dataset, you need to prepare a config.yaml. Use the provided template, it's mostly self-descriptive. Then run
+> ./mts.py -c <config> -t XX --directory <output directory>
 
 3. Gathering stats
-To render some interesting info, you need to specify some references in config:
-REFS: path
+To enable stats for some stage(s), add the section to the config:
+stats:
+    stage: [stat1, stat2]
+"gf" stats require references provided with "refs" parameter:
+    refs: path
 or
-REFS: [path1, path2, ...]
+    refs: [path1, path2, ...]
 where path can be either a single reference or a folder with references.
-Then run the stats target manually:
-> snakemake --directory <output directory> stats_all
diff --git a/src/projects/mts/Reassembly.snake b/src/projects/mts/Reassembly.snake
new file mode 100644
index 0000000..2139c6d
--- /dev/null
+++ b/src/projects/mts/Reassembly.snake
@@ -0,0 +1,74 @@
+include: "Common.snake"
+
+import yaml
+
+READ_LENGTH = config.get("read_length", 100) #TODO: auto-detect default value
+
+BINS, = glob_wildcards("binning/{{bin,{}}}.info".format(NAME_TEMPLATE))
+if not BINS:
+    raise WorkflowError("No bins detected!")
+
+rule reassemble_all:
+    input:   expand("reassembly/bins/{bin}.fasta", bin=BINS)
+    message: "Reassembly finished."
+
+rule reassembly_config:
+    input:   "binning/{bin}.info"
+    output:  "reassembly/{bin}.yaml"
+    message: "Generating config file for reassembly of {wildcards.bin}"
+    run:
+        with open(output[0], "w") as outfile:
+            conf = {"k": PROFILE_K, "sample_cnt": SAMPLE_COUNT,
+                    "kmer_mult": "profile/mts/kmers",
+                    "bin": wildcards.bin, "bin_prof": "binning/{}/bins.prof".format(BINNER),
+                    "edges_sqn": "reassembly/{}_edges.fasta".format(wildcards.bin),
+                    "edges_mpl": "reassembly/{}_edges.mpl".format(wildcards.bin),
+                    "edge_fragments_mpl": "reassembly/{}_edges_frag.mpl".format(wildcards.bin),
+                    "frag_size": SPLIT_LENGTH, "min_len": 100}
+            yaml.dump(conf, outfile)
+
+#For bin reassembly, we choose only matching reads from samples which were marked with +
+#in the corresponding {bin}.info file
+BIN_READS = dict()
+for bin in BINS:
+    with open("binning/{}.info".format(bin)) as samples_info:
+        samples = []
+        for line in samples_info:
+            sample_data = line.split()
+            if sample_data[0][0] == "+":
+                sample = sample_data[0][1:]
+                if not os.path.exists("binning/{}/{}_1.fastq.gz".format(bin, sample)):
+                    print("\033[33mWarning: {} contains no reads for {}\033[0m".format(sample, bin))
+                    continue
+                samples.append(sample)
+        BIN_READS[bin] = (["binning/{}/{}_1.fastq.gz".format(bin, sample) for sample in samples],
+                          ["binning/{}/{}_2.fastq.gz".format(bin, sample) for sample in samples])
+
+def bin_reads(dir, wildcards):
+    return BIN_READS[wildcards["bin"]][dir]
+
+#Returns the filepath with left/right reads for a sample or list of them for a group, used as Snakemake input
+left_bin_reads, right_bin_reads = reads_input(bin_reads)
+
+rule reassemble:
+    input:   info="binning/{bin}.info", config="reassembly/{bin}.yaml"
+    output:  "reassembly/bins/{bin}.fasta"
+    params:  out="reassembly/{bin}",
+             left=lambda w: " ".join(expand("--pe1-1 {r}", r=left_bin_reads(w))),
+             right=lambda w: " ".join(expand("--pe1-2 {r}", r=right_bin_reads(w))),
+    log:     "reassembly/{bin}.log"
+    threads: THREADS
+    message: "Reassembling reads for {wildcards.bin}"
+    run:
+        COV_THRESHOLD = 0.3
+        subtotal = 0
+        with open(input.info) as samples_info:
+            for line in samples_info:
+                sample_data = line.split()
+                if sample_data[0][0] == "+":
+                    subtotal += float(sample_data[1])
+        bin_cov = COV_THRESHOLD * subtotal * READ_LENGTH / (READ_LENGTH - PROFILE_K)
+        shell("{REASSEMBLER_DIR}/spades.py --only-assembler -t {threads}"
+             " {params.left} {params.right} --pe1-ff -o {params.out}"
+             " --hidden-cov-cutoff {bin_cov} --series-analysis {input.config}"
+             " >{log} 2>&1 && cp {params.out}/scaffolds.fasta {output}")
diff --git a/src/projects/mts/Snakefile b/src/projects/mts/Snakefile
index 25553b4..75a842e 100644
--- a/src/projects/mts/Snakefile
+++ b/src/projects/mts/Snakefile
@@ -1,175 +1,330 @@
 include: "Common.snake"
 
-import os
-import os.path
-
-from scripts.common import dump_dict
+configfile: "config.yaml"
 
-#Path to saves of necessary assembly stage
-SAVES = "K{0}/saves/01_before_repeat_resolution/graph_pack".format(K)
+import os.path
 
 onstart:
-    try:
-        os.mkdir("tmp")
-    except:
-        pass
-    print("Detected", SAMPLE_COUNT, "samples in", IN)
-    print("They form: ", GROUPS)
+    shell("mkdir -p tmp")
 
-# ---- Main pipeline -----------------------------------------------------------
+def final_stage(w):
+    if config["reassembly"]["enabled"]:
+        return ["propagation.done", "binning/bins_total.prof"] #Stop after the bin choosing
+    if config["propagation"]["enabled"]:
+        return "propagation.done" #Stop on the propagation
+    return "binning/{}/binning.done".format(BINNER) #Stop on the preliminary binning
 
 rule all:
-    input:   dynamic("reassembly/{cag}.fasta")
+    input:   final_stage
     message: "Dataset of {SAMPLE_COUNT} samples from {IN} has been processed."
 
-rule assemble:
+# ---- Assembly ----------------------------------------------------------------
+
+# Assemble with MegaHIT
+rule megahit:
+    input:   left=left_reads, right=right_reads
+    output:  "assembly/megahit/{group}.fasta"
+    params:  left=lambda w: ",".join(left_reads(w)),
+             right=lambda w: ",".join(right_reads(w)),
+             dir="assembly/megahit/{group}"
+    threads: THREADS
+    log:     "assembly/megahit.log"
+    message: "Assembling {wildcards.group} with MegaHIT"
+    shell:   "rm -rf {params.dir} &&"
+             " {SOFT}/megahit/megahit -1 {params.left} -2 {params.right}"
+             " -t {threads} -o {params.dir} >{log} 2>&1 &&"
+             " cp {params.dir}/final.contigs.fa {output}"
+
+# Assemble with SPAdes
+rule spades:
     input:   left=left_reads, right=right_reads
-    output:  "assembly/{sample}.fasta"
-    #TODO: remove this boilerplate
+    output:  "assembly/spades/{group}.fasta"
     params:  left=lambda w: " ".join(expand("-1 {r}", r=left_reads(w))),
              right=lambda w: " ".join(expand("-2 {r}", r=right_reads(w))),
-             dir="assembly/{sample}"
-    log:     "assembly/{sample}.log"
+             dir="assembly/spades/{group}", bh=lambda w: "" if is_fastq(w) else "--only-assembler"
     threads: THREADS
-    message: "Assembling {wildcards.sample} with SPAdes"
-    shell:   "{SPADES}/spades.py --meta -m 400 -t {threads} {params.left} {params.right}"
-             " -o {params.dir} >{log} 2>&1 && "
+    log:     "assembly/{group}.log"
+    message: "Assembling {wildcards.group} with metaSPAdes"
+    shell:   "{ASSEMBLER_DIR}/spades.py {params.bh} --meta -m 400 -t {threads}"
+             " {params.left} {params.right}"
+             " --save-gp -o {params.dir} >{log} 2>&1 && "
              "cp {params.dir}/scaffolds.fasta {output}"
 
-rule assemble_all:
-    input:   expand("assembly/{sample}.fasta", sample=GROUPS)
-    message: "Assembled all samples"
+rule copy_contigs:
+    input:   "assembly/{}/{{group}}.fasta".format(ASSEMBLER)
+    output:  "assembly/full/{group,(sample|group\d+)}.fasta"
+    shell:   "cp {input} {output}"
 
-rule descriptions:
-    output:  expand("profile/{sample}.desc", sample=SAMPLES)
-    message: "Generating sample descriptions"
-    run:
-        for sample in SAMPLES:
-            with open("profile/{}.desc".format(sample), "w") as out:
-                wildcards.sample = sample
-                print(left_reads(wildcards),  file=out)
-                print(right_reads(wildcards), file=out)
+rule split_contigs:
+    input:   "assembly/{}/{{group}}.fasta".format(ASSEMBLER)
+    output:  "assembly/splits/{group,(sample|group)\d+}.fasta"
+    message: "Cutting {wildcards.group} into {SPLIT_LENGTH} bp splits"
+    shell:   "{SCRIPTS}/cut_fasta.py -c {SPLIT_LENGTH} -o 0 -m {input} > {output}"
+
+#---- Generating profiles/depths -----------------------------------------------
+
+# MetaBAT way
+
+rule bowtie_index:
+    input:   "assembly/{frags}/all.fasta"
+    output:  "profile/jgi/{frags}/index.done"
+    log:     "profile/jgi/{frags}/bowtie-build.log"
+    message: "Building bowtie index"
+    shell:   "bowtie2-build {input} profile/jgi/index_{wildcards.frags} >{log} 2>&1 && touch {output}"
 
+rule align:
+    input:   left=left_sample_reads, right=right_sample_reads,
+             index="profile/jgi/{frags}/index.done"
+    output:  "profile/jgi/{frags}/{sample}.bam"
+    threads: THREADS
+    log:     "profile/jgi/{frags}/bowtie-{sample}.log"
+    message: "Aligning {wildcards.sample} with bowtie"
+    shell:   "bowtie2 -x profile/jgi/index_{wildcards.frags} -p {threads}"
+             " -1 {input.left} -2 {input.right} 2>{log} | samtools view -bS - > {output}"
+
+rule depth:
+    input:   expand("profile/jgi/{{frags}}/{sample}.bam", sample=SAMPLES)
+    output:  "profile/jgi/{frags}/depth_metabat.txt"
+    log:     "profile/jgi/{frags}/depths.log"
+    message: "Calculating contig depths"
+    shell:   "{SOFT}/metabat/jgi_summarize_bam_contig_depths --outputDepth {output} {input} >{log} 2>&1"
+
+rule concoct_depth:
+    input:   "profile/jgi/splits/depth_metabat.txt"
+    output:  "binning/concoct/profiles_jgi.in"
+    message: "Converting depth file into CONCOCT format"
+    shell:   "awk 'NR > 1 {{for(x=1;x<=NF;x++) if(x == 1 || (x >= 4 && x % 2 == 0)) printf \"%s\", $x (x == NF || x == (NF-1) ? \"\\n\":\"\\t\")}}' {input} > {output}"
+
+# Our way
 rule kmc:
-    input:   "profile/{sample}.desc"
     output:  temp("tmp/{sample}.kmc_pre"), temp("tmp/{sample}.kmc_suf")
-    params:  min_mult=2, tmp="tmp/{sample}_kmc", out="tmp/{sample}"
+    params:  min_mult=2, tmp="tmp/{sample}_kmc", out="tmp/{sample}",
+             desc="profile/{sample}.desc",
+             left=left_sample_reads, right=right_sample_reads,
+             format=lambda w: "-fq" if is_fastq(w) else "-fa"
     log:     "profile/kmc_{sample}.log"
     threads: THREADS
     message: "Running kmc for {wildcards.sample}"
-    shell:   "mkdir {params.tmp} && "
-             "{SOFT}/kmc -k{SMALL_K} -t{threads} -ci{params.min_mult} -cs65535"
-             " @{input} {params.out} {params.tmp} >{log} 2>&1 && "
+    shell:   "mkdir -p {params.tmp}\n"
+             "echo '{params.left}\n{params.right}' > {params.desc}\n"
+             "{SOFT}/kmc {params.format} -k{PROFILE_K} -t{threads} -ci{params.min_mult}"
+             " -cs65535 @{params.desc} {params.out} {params.tmp} >{log} 2>&1 && "
              "rm -rf {params.tmp}"
 
 rule multiplicities:
     input:   expand("tmp/{sample}.kmc_pre", sample=SAMPLES), expand("tmp/{sample}.kmc_suf", sample=SAMPLES)
-    output:  "profile/kmers.kmm"
-    params:  kmc_files=" ".join(expand("tmp/{sample}", sample=SAMPLES)), out="profile/kmers"
-    log:     "profile/kmers.log"
-    message: "Gathering {SMALL_K}-mer multiplicities from all samples"
-    shell:   "{BIN}/kmer_multiplicity_counter -n {SAMPLE_COUNT} -k {SMALL_K} -s 3"
+    output:  "profile/mts/kmers.kmm"
+    params:  kmc_files=" ".join(expand("tmp/{sample}", sample=SAMPLES)), out="profile/mts/kmers"
+    log:     "profile/mts/kmers.log"
+    message: "Gathering {PROFILE_K}-mer multiplicities from all samples"
+    shell:   "{BIN}/kmer_multiplicity_counter -n {SAMPLE_COUNT} -k {PROFILE_K} -s 2"
              " -f tmp -t {threads} -o {params.out} >{log} 2>&1 && "
              "rm tmp/*.sorted"
 
-rule profile:
-    input:   contigs="assembly/{sample,\w+\d+}.fasta", mpl="profile/kmers.kmm"
-    output:  id="profile/{sample}.id", mpl="profile/{sample}.mpl", splits= "assembly/{sample}_splits.fasta"
-    log:     "profile/{sample}.log"
-    message: "Counting contig abundancies for {wildcards.sample}"
-    shell:   "{BIN}/contig_abundance_counter -k {SMALL_K} -w tmp -c {input.contigs}"
-             " -n {SAMPLE_COUNT} -m profile/kmers -o profile/{wildcards.sample}"
-             " -f {output.splits} -l {MIN_CONTIG_LENGTH} >{log} 2>&1"
+rule abundancies:
+    input:   contigs="assembly/splits/{group}.fasta", mpl="profile/mts/kmers.kmm"
+    output:  "profile/mts/{group}.tsv"
+    log:     "profile/mts/{group}.log"
+    message: "Counting contig abundancies for {wildcards.group}"
+    shell:   "{BIN}/contig_abundance_counter -k {PROFILE_K} -w tmp -c {input.contigs}"
+             " -n {SAMPLE_COUNT} -m profile/mts/kmers -o {output}"
+             " -l {MIN_CONTIG_LENGTH} >{log} 2>&1"
+
+rule combine_profiles:
+    input:   expand("profile/mts/{group}.tsv", group=GROUPS)
+    output:  "profile/mts/all.tsv"
+    message: "Combine all profiles"
+    run:
+        shell("rm -f {output}")
+        for sample_ann in input:
+            sample, _ = os.path.splitext(os.path.basename(sample_ann))
+            shell("sed -e 's/^/{sample}-/' {sample_ann} >> {output}")
 
 rule binning_pre:
-    input:   expand("profile/{sample}.id", sample=GROUPS)
-    output:  "binning/{binner}/profiles.in"
+    input:   "profile/mts/all.tsv"
+    output:  "binning/{binner}/profiles_mts.in"
     params:  " ".join(list(GROUPS.keys()))
+    log:     "binning/input.log"
     message: "Preparing input for {wildcards.binner}"
-    shell:   "{SCRIPTS}/make_input.py -t {wildcards.binner} -d profile -o {output} {params}"
+    shell:   "{SCRIPTS}/make_input.py -n {SAMPLE_COUNT} -t {wildcards.binner}"
+             " -o {output} {input} >{log}"
 
+rule filter_contigs:
+    input:   contigs="assembly/splits/all.fasta", profile="binning/{}/profiles_mts.in".format(BINNER)
+    output:  contigs="assembly/splits/all_filtered.fasta"
+    message: "Leave contigs that have profile information"
+    shell:   "cut -f1 < {input.profile} > tmp/names_tmp.txt && sed -i '1d' tmp/names_tmp.txt && "
+             "{SCRIPTS}/contig_name_filter.py {input.contigs} tmp/names_tmp.txt {output.contigs}"
+
+#---- Binning ------------------------------------------------------------------
+
+# Binning with Canopy
 rule canopy:
-    input:   "binning/canopy/profiles.in"
-    output:  out="binning/canopy/binning.out", prof="binning/canopy/bins.prof"
+    input:   "binning/canopy/profiles_{}.in".format(PROFILER)
+    output:  out="binning/canopy/binning.out", prof="binning/canopy/bins.prof",
+             flag=touch("binning/canopy/binning.done")
     threads: THREADS
+    log:     "binning/canopy.log"
     message: "Running canopy clustering"
-    shell:   "{SOFT}/cc.bin -n {threads} -i {input} -o {output.out} -c {output.prof} >binning/canopy/canopy.log 2>&1"
-
-rule combine_splits:
-    input:   expand("assembly/{sample}_splits.fasta", sample=GROUPS)
-    output:  "assembly/samples_splits.fasta"
-    message: "Combine splitted contigs"
-    shell:   "{SCRIPTS}/combine_contigs.py -r {input} > {output}"
+    shell:   "{SOFT}/cc.bin --filter_max_dominant_obs 1 -n {threads}"
+             " -i {input} -o {output.out} -c binning/canopy/canopy_bins.prof >{log} 2>&1 && "
+             "sed 's/CAG/BIN/g' binning/canopy/canopy_bins.prof >{output.prof}"
 
-#FIXME what does gt1000 mean?
+# Binning with CONCOCT
 rule concoct:
-    input:   contigs=rules.combine_splits.output[0], profiles="binning/concoct/profiles.in"
-    output:  out="binning/concoct/clustering_gt1000.csv"
-    params:  "binning/concoct"
+    input:   contigs="assembly/splits/all.fasta", profiles="binning/concoct/profiles_{}.in".format(PROFILER)
+    output:  "binning/concoct/binning.out"
+    params:  max_clusters=40, out="binning/concoct"
+    threads: THREADS
+    log:     "binning/concoct.log"
     message: "Running CONCOCT clustering"
-    shell:   "mkdir -p {params} && "
-             "set +u; source activate concoct_env; set -u && "
-             "concoct --composition_file {input.contigs} --coverage_file {input.profiles} -b {params}"
+    shell:   "set +u; source activate py27; set -u\n"
+             "concoct -c {params.max_clusters} --composition_file {input.contigs}"
+             " --coverage_file {input.profiles} --length_threshold {MIN_CONTIG_LENGTH}"
+             " -b {params.out} >{log} 2>&1 && "
+             "cp binning/concoct/clustering_gt{MIN_CONTIG_LENGTH}.csv {output}"
+
+rule extract_bins:
+    input:   "assembly/splits/all.fasta", "binning/annotation/all.ann"
+    output:  touch("binning/concoct/binning.done")
+    message: "Extracting CONCOCT bins"
+    shell:   "mkdir -p binning/bins && {SCRIPTS}/split_bins.py {input} binning/bins"
 
-binning_inputs = {"canopy": rules.canopy.output.out, "concoct": rules.concoct.output.out}
+# Binning with MetaBAT
+rule metabat:
+    input:   contigs="assembly/full/all.fasta", profiles="profile/jgi/full/depth_metabat.txt"
+    output:  flag=touch("binning/metabat/binning.done"),
+             out="binning/metabat/binning.out"
+    threads: THREADS
+    params:  "binning/metabat/cluster"
+    log:     "binning/metabat.log"
+    message: "Running MetaBAT clustering"
+    shell:   "{SOFT}/metabat/metabat -t {threads} -m {MIN_CONTIG_LENGTH} "
+             " --minContigByCorr {MIN_CONTIG_LENGTH} --saveCls"
+             " -i {input.contigs} -a {input.profiles}"
+             " -o {params} > {log} && "
+             "sed 's/\t/,/g' {params} > {output.out} && mkdir -p binning/bins && "
+             "for file in binning/metabat/*.fa ; do bin=${{file##*/}}; mv $file binning/bins/${{bin%.*}}.fasta; done"
+
+# Binning with MAXBIN2
+rule maxbin:
+    input:   contigs="assembly/splits/all.fasta", profiles="binning/maxbin/profiles_{}.in".format(PROFILER)
+    output:  "binning/maxbin/binning.out"
+    threads: THREADS
+    params:  out="binning/maxbin/cluster"
+    log:     "binning/maxbin.log"
+    message: "Running MaxBin2 clustering"
+    shell:   "perl {SOFT}/MaxBin/run_MaxBin.pl -thread {threads} -min_contig_length {MIN_CONTIG_LENGTH} "
+             " -contig {input.contigs} -abund {input.profiles}"
+             " -out {params.out} > {log}"
+             "&& {SCRIPTS}/make_maxbincsv.py -o {output} {params.out}"
+
+# Binning with GATTACA
+# conda create -n py27 python=2.7.9 numpy scipy scikit-learn anaconda
+# conda install -c bioconda pysam=0.11.2.2
+rule gattaca:
+    input:   contigs="assembly/splits/all_filtered.fasta", profiles="binning/gattaca/profiles_{}.in".format(PROFILER)
+    output:  "binning/gattaca/binning.out"
+    threads: THREADS
+    log:     "binning/gattaca.log"
+    message: "Running GATTACA clustering"
+    shell:   "set +u; source activate py27; set -u\n"
+             "python {SOFT}/gattaca/src/python/gattaca.py cluster --contigs {input.contigs}"
+             " --coverage {input.profiles} --algorithm dirichlet --clusters {output} >{log} 2>&1"
+
+# Binning with BinSanity
+rule binsanity:
+    input:   contigs="assembly/splits/all_filtered.fasta", profiles="binning/binsanity/profiles_{}.in".format(PROFILER)
+    output:  "binning/binsanity/binning.out"
+    threads: THREADS
+    log:     "binning/binsanity.log"
+    message: "Running BinSanity clustering"
+    shell:   "python2 /home/tdvorkina/binsanity/src/BinSanity/test-scripts/Binsanity-lc "
+             " -f ./ -l {input.contigs} -c {input.profiles} -o binning/binsanity/BINSANITY-RESULTS > {log}  && "
+             "{SCRIPTS}/clusters2csv.py binning/binsanity/BINSANITY-RESULTS/KMEAN-BINS {output} && mv Binsanity-log.txt binning/ "
+
+# Postprocessing
+rule bin_profiles:
+    input:   "profile/{}/all.tsv".format(PROFILER), "binning/{}/unified_binning.tsv".format(BINNER)
+    output:  "binning/{}/bins.prof".format(BINNER)
+    message: "Deriving bin profiles"
+    shell:   "{SCRIPTS}/bin_profiles.py {input} > {output}"
+
+ruleorder: canopy > bin_profiles
 
-rule binning_post:
-    input:   binning_inputs[BINNER]
-    output:  expand("annotation/{sample}.ann", sample=GROUPS)
+rule binning_format:
+    input:   "binning/{}/binning.out".format(BINNER)
+    output:  "binning/{}/unified_binning.tsv".format(BINNER)
+    message: "Making unified binning results"
+    shell:   "{SCRIPTS}/convert_output.py -t {BINNER} -o {output} {input} &&"
+             "cp {output} binning/binning.tsv" #Additional table for stats
+
+rule annotate:
+    input:   "binning/{}/unified_binning.tsv".format(BINNER)
+    output:  expand("binning/annotation/{sample}.ann", sample=GROUPS)
+    params:  "binning/annotation/"
     message: "Preparing raw annotations"
-    shell:   "{SCRIPTS}/parse_output.py -t {BINNER} -o annotation {input}"
-
-#Post-clustering pipeline
-rule read_binning:
-    input:   contigs="assembly/{sample}.fasta", ann="annotation/{sample}.ann",
-             left=left_reads, right=right_reads
-    output:  "propagation/{sample}_edges.ann"
-    params:  saves=os.path.join("assembly/{sample}/", SAVES),
-             splits="assembly/{sample}_splits.fasta",
-             out="propagation/{sample}_edges",
-             group=lambda wildcards: GROUPS[wildcards.sample]
-             #left=" ".join(input.left), right=" ".join(input.right)
-    log:     "binning/{sample}.log"
-    message: "Propagating annotation & binning reads for {wildcards.sample}"
-    shell:
-          "{BIN}/prop_binning -k {K} -s {params.saves} -c {input.contigs}"
-          " -n {params.group} -l {input.left} -r {input.right}"
-          " -a {input.ann} -f {params.splits} -o binning -d {params.out} >{log} 2>&1"
-
-#TODO: bin profiles for CONCOCT
+    run:
+        samples_annotation = dict()
+        #Load the whole annotation: {sample: [bins]}
+        with open(input[0]) as input_file:
+            for line in input_file:
+                annotation_str = line.split("\t", 1)
+                bin_id = annotation_str[1].strip()
+                sample_contig = annotation_str[0].split('-', 1)
+                if len(sample_contig) > 1:
+                    sample = sample_contig[0]
+                    contig = sample_contig[1]
+                else: #Backward compatibility with old alternative pipeline runs
+                    sample = "group1"
+                    contig = sample_contig[0]
+                annotation = samples_annotation.setdefault(sample, dict())
+                if contig not in annotation:
+                    annotation[contig] = [bin_id]
+                else:
+                    annotation[contig].append(bin_id)
+
+        #Serialize it in the propagator format
+        for sample, annotation in samples_annotation.items():
+            with open(os.path.join(params[0], sample + ".ann"), "w") as sample_out:
+                for contig in samples_annotation[sample]:
+                    print(contig, "\t", " ".join(annotation[contig]), sep="", file=sample_out)
+
+
+#---- Post-clustering pipeline -------------------------------------------------
+
+# Propagation stage
+#Path to saves of necessary assembly stage
+SAVES = "K{0}/saves/01_before_repeat_resolution/graph_pack".format(ASSEMBLY_K)
+
+rule prop_binning:
+    input:   contigs="assembly/spades/{group}.fasta", splits="assembly/splits/{group}.fasta",
+             ann="binning/annotation/{group}.ann",    left=left_reads, right=right_reads,
+             bins="binning/{}/filtered_bins.tsv".format(BINNER)
+    output:  ann="propagation/annotation/{group}.ann", edges="propagation/edges/{group}.fasta"
+    params:  saves=os.path.join("assembly/spades/{group}/", SAVES),
+             samples=lambda wildcards: " ".join(GROUPS[wildcards.group])
+    log:     "binning/{group}.log"
+    message: "Propagating annotation & binning reads for {wildcards.group}"
+    shell:   "{BIN}/prop_binning -k {ASSEMBLY_K} -s {params.saves} -c {input.contigs} -b {input.bins}"
+             " -n {params.samples} -l {input.left} -r {input.right} -t {MIN_CONTIG_LENGTH}"
+             " -a {input.ann} -f {input.splits} -o binning -p {output.ann} -e {output.edges} >{log} 2>&1"
+
+rule prop_all:
+    input:   expand("propagation/annotation/{group}.ann", group=GROUPS)
+    output:  touch("propagation.done")
+    message: "Finished propagation of all annotations."
+
+rule choose_bins:
+    input:   "binning/{}/unified_binning.tsv".format(BINNER)
+    output:  "binning/{}/filtered_bins.tsv".format(BINNER)
+    message: "Filter small bins"
+    shell:   "{SCRIPTS}/choose_bins.py {input} >{output} 2>&1"
+
 rule choose_samples:
-    input:   binned=expand("propagation/{sample}_edges.ann", sample=GROUPS),
-             prof=rules.canopy.output.prof
-    output:  dynamic("binning/{cag}/left.fastq"),
-             dynamic("binning/{cag}/right.fastq")
+    input:   "binning/{}/bins.prof".format(BINNER), "binning/{}/filtered_bins.tsv".format(BINNER)
+    output:  "binning/bins_total.prof"
     log:     "binning/choose_samples.log"
-    message: "Choosing samples for all CAGs"
-    shell:   "{SCRIPTS}/choose_samples.py {input.prof} binning/ >{log} 2>&1"
-
-rule reassembly_config:
-    input:   "binning/{cag}/left.fastq"
-    output:  "reassembly/{cag}.yaml"
-    message: "Generated config file for reassembly of {wildcards.cag}"
-    run:
-        with open(output[0], "w") as outfile:
-            conf = {"k": SMALL_K, "sample_cnt": SAMPLE_COUNT,
-                    "kmer_mult": str(rules.multiplicities.params.out),
-                    "bin": wildcards.cag, "bin_prof": str(rules.canopy.output.prof),
-                    "edges_sqn": "profile/{}_edges.fasta".format(wildcards.cag),
-                    "edges_mpl": "profile/{}_edges.mpl".format(wildcards.cag),
-                    "edge_fragments_mpl": "profile/{}_edges_frag.mpl".format(wildcards.cag),
-                    "frag_size": 10000, "min_len": 100}
-            dump_dict(conf, outfile)
-
-rule reassemble:
-    input:   left="binning/{cag}/left.fastq", right="binning/{cag}/right.fastq",
-             config="reassembly/{cag}.yaml"
-    output:  "reassembly/{cag}.fasta"
-    params:  "reassembly/reassembly_{cag}"
-    log:     "reassembly/reassembly_{cag}.log"
-    threads: THREADS
-    message: "Reassembling reads for {wildcards.cag}"
-    shell:   "{SPADES_REASSEMBLY}/spades.py --meta -t {threads}"
-             " --pe1-1 {input.left} --pe1-2 {input.right} --pe1-ff"
-             " -o {params} --series-analysis {input.config} >{log} 2>&1 && "
-             "cp {params}/scaffolds.fasta {output}"
+    message: "Choosing bins for reassembly and samples for them"
+    shell:   "rm -f binning/*.info && rm -rf binning/excluded && "
+             "{SCRIPTS}/choose_samples.py {input} {output} binning >{log} 2>&1"
diff --git a/src/projects/mts/Stats.snake b/src/projects/mts/Stats.snake
index 5019433..8834a12 100644
--- a/src/projects/mts/Stats.snake
+++ b/src/projects/mts/Stats.snake
@@ -6,7 +6,7 @@ import os.path
 import pandas
 from pandas import DataFrame
 
-from scripts.common import gather_refs, dump_dict
+from scripts.common import gather_refs
 
 #Additional config parameters
 try:
@@ -18,215 +18,189 @@ except KeyError:
     METAQUAST = "metaquast"
 
 #Autodetect bins
-CAGS, = glob_wildcards("binning/{cag,CAG\d+}/left.fastq")
-CAGS.sort()
+if config["reassembly"]["enabled"]:
+    BINS, = glob_wildcards("binning/{{bin,{}}}.info".format(NAME_TEMPLATE))
+else:
+    BINS, = glob_wildcards("binning/bins/{{bin,{}}}.fasta".format(NAME_TEMPLATE))
+if not BINS:
+    raise WorkflowError("No bins detected!")
+BINS.sort()
 
-CAG_EDGES = [c + "_edges" for c in CAGS]
+#Additional config parameters
+try:
+    QUAST_DIR = config["stats"]["quast"]
+    QUAST = os.path.join(QUAST_DIR, "quast.py")
+    METAQUAST = os.path.join(QUAST_DIR, "metaquast.py")
+except KeyError:
+    QUAST = "quast"
+    METAQUAST = "metaquast"
 
 #Detect references
-REFS = dict(gather_refs(config.get("REFS", [])))
-ALL_REFS = ",".join(path for path in REFS.values())
-
-FRAGMENT_NAMES_BY_TYPE = {"reassembly": CAG_EDGES,
-                          "initial_assembly": list(GROUPS.keys())}
+REFS = dict(gather_refs(config["stats"].get("refs", [])))
+REFS_STR = ",".join(path for path in sorted(REFS.values()))
 
 def ref_path(wildcards):
     return REFS[wildcards.ref]
 
 onstart:
-    try:
-        os.mkdir("tmp")
-    except:
-        pass
     print("Detected", SAMPLE_COUNT, "samples in", IN)
-    if CAGS:
-        print("Detected good (abundant) CAGs:", " ".join(CAGS))
+    if BINS:
+        print("Detected good (abundant) bins:", " ".join(BINS))
     if REFS:
         print("Detected references:", " ".join(REFS))
 
+enabled_stats = []
+stats_targets = {"checkm": "checkm_{}.tsv",
+                 "pca":    "pca_{}.png",
+                 "tsne":   "tsne_{}.png",
+                 "f1":     "f1_{}.txt"}
+if config["stats"].get("refs"):
+    stats_targets["gf"] = "gf_{}.tsv"
+    stats_targets["summary"] = "{}_summary.tsv"
+for stage in ["binning", "propagation", "reassembly"]:
+    stage_stats = config["stats"].get(stage, [])
+    if stage_stats == "all":
+        stats = stats_targets.keys()
+    else:
+        stats = stage_stats + config["stats"].get("all", [])
+    enabled_stats.extend(os.path.join("stats/summary", stats_targets[st].format(stage)) for st in stats)
+
+misc_targets = {"ref_profiles": "ref_profiles.tsv"}
+enabled_stats.extend(os.path.join("stats/summary", misc_targets[st].format(stage)) for st in config["stats"].get("misc", []))
+
+rule all_stats:
+    input:   enabled_stats
+    message: "Gathered stats: {input}"
+
 #===============================================================================
 #---- Statistics section -------------------------------------------------------
 #===============================================================================
 
-#---- Single alignments for samples per reference -------------------------------
-#TODO: use alignments from meta version instead
-rule quast_all_samples:
-    input:   ref_fn=ref_path, contigs=expand("assembly/{sample}.fasta", sample=GROUPS)
-    output:  summary_tsv="stats/summary/q_{ref}.tsv", report="stats/initial_assembly/{ref}/report.txt"
-    params:  "stats/initial_assembly/{ref}"
-    log:     "stats/initial_assembly/{ref}/quast.log"
-    threads: THREADS
-    message: "Aligning all samples on {wildcards.ref}"
-    shell:   "{QUAST} -t {threads} -R {input.ref_fn} {input.contigs} -o {params} >/dev/null 2>&1 && "
-             "cp {params}/report.tsv {output.summary_tsv}"
-
-rule quast_all_reassemblies:
-    input:   ref=ref_path, fragments=expand("profile/{cag_edges}.fasta", cag_edges=CAG_EDGES)
-    output:  "stats/reassembly/{ref}/report.txt"
-    params:  "stats/reassembly/{ref}"
-    log:     "stats/reassembly/{ref}/quast.log"
-    threads: THREADS
-    message: "Aligning all samples on {wildcards.ref}"
-    shell:   "{QUAST} -t {threads} -R {input.ref} {input.fragments} -o {params} >/dev/null 2>&1 && "
-             "cp {params}/report.tsv {output}"
-
-#---- Contigs of interest ------------------------------------------------------
+#---- F1 stats ----------------------------------------------------------------
 rule filter_ref_alignments:
-    input:   "{path}/report.txt"
-    output:  "{path}/{fragments}.info"
-    params:  "{path}/contigs_reports/nucmer_output/{fragments}.coords.filtered"
-    shell:   "if [ -f {params} ] ; then {SCRIPTS}/filter_nucmer.py {params} {output} {MIN_CONTIG_LENGTH} 70 ; else touch {output} ; fi"
-
-#---- GF of combined sample ----------------------------------------------------
-#rule combine_filtered:
-#    input:   contigs=expand("assembly/{sample}.fasta", sample=GROUPS),
-#             filters=expand("stats/{{ref}}/{sample}.cont", sample=GROUPS)
-#    output:  "stats/{ref}.fasta"
-#    message: "Gathering all interesting contigs for {wildcards.ref} into a single assembly"
-#    shell:   "{SCRIPTS}/filter_contigs.py {SAMPLE_COUNT} {output} {input.contigs} {input.filters}"
-
-rule quast_combined:
-    input:   ref=ref_path, contigs="stats/{ref}.fasta"
-    output:  "stats/q_{ref}_all/report.tsv"
-    params:  "stats/q_{ref}_all"
-    log:     "stats/q_{ref}_all.log"
-    threads: THREADS
-    message: "Aligning combined sample on {wildcards.ref}"
-    shell:   "{QUAST} -t {threads} -R {input.ref} {input.contigs} -o {params} >{log} 2>&1"
+    input:   "stats/summary/gf_{stage}.tsv"
+    output:  "stats/q_{stage}/{ref}.tsv"
+    params:  " ".join(BINS)
+    message: "Filtering alignments from {wildcards.stage} onto {wildcards.ref}"
+    shell:   "rm -f {output}\n"
+             "for bin in {params}\ndo\n"
+             "    file=stats/q_{wildcards.stage}/runs_per_reference/{wildcards.ref}/contigs_reports/nucmer_output/$bin.coords.filtered\n"
+             "    if [ -f $file ] ; then\n"
+             "        {SCRIPTS}/filter_nucmer.py $file {MIN_CONTIG_LENGTH} 70 | awk -v b=$bin '{{print b \"-\" $0}}' >> {output}\n"
+             "    fi\n"
+             "done\n"
+             "if [ -n {output} ] ; then touch {output} ; fi\n"
 
-# Run this
-rule quast_combined_all:
-    input:   expand("stats/q_{ref}_all/report.tsv", ref=REFS)
-    message: "Calculated QUAST metrics on all combined samples"
-
-#---- Bins of interest ---------------------------------------------------------
-rule int_bins:
-    input:   "annotation/{sample}.ann", "stats/{ref}/{sample}.info"
-    output:  "stats/{ref}/{sample}.bin"
-    message: "Filtering interesting bins for {wildcards.sample} aligned to {wildcards.ref}"
-    shell:   "{SCRIPTS}/filter_bins.py {input} > {output}"
-
-rule int_bins_all_samples:
-    input:   expand("stats/{{ref}}/{sample}.bin", sample=GROUPS)
-    output:  "stats/{ref}/total.bin"
-    message: "Gathering interesting bins for {wildcards.ref} from all samples"
-    run:
-        bins = set()
-        for in_fn in input:
-            with open(in_fn) as infile:
-                for line in infile:
-                    bins.add(line)
-        with open(output[0], "w") as outfile:
-            for bin in bins:
-                print(bin, file=outfile)
-
-# Run this
-rule int_bins_all:
-    input:   expand("stats/{ref}/total.bin", ref=REFS)
-    message: "Gathered all interesting bins"
-
-#---- GF per bin per reference -------------------------------------------------
-#Helper formatters for determining input files from different stages
-PROP = {"prelim": ("assembly/{}_splits.fasta",   "annotation/{}.ann"),
-        "prop":   ("propagation/{}_edges.fasta", "propagation/{}_edges.ann")}
-
-#TODO: split into different directories per sample
-rule split_bins:
-    input:   lambda w: PROP[w.prop][0].format(w.sample),
-             lambda w: PROP[w.prop][1].format(w.sample)
-    output:  touch("binning/{prop}/{sample}.log")
-    log:     "binning/{prop}/split_{sample}.log"
-    params:  "binning/{prop}"
-    message: "Splitting assembly of {wildcards.sample} between {wildcards.prop} bins"
-    shell:   "{SCRIPTS}/split_bins.py {input} {params} >{log}"
-
-rule cat_binned_contigs:
-    input:   expand("binning/{{prop}}/{sample}.log", sample=SAMPLES)
-    output:  "binning/{prop}/{cag,CAG\d+}.fasta"
-    params:  "`ls binning/{prop}/*-{cag}.fasta`"
-    message: "Combine binned contigs ({wildcards.prop}) for {wildcards.cag}"
-    shell:   "cat {params} > {output}"
-
-#Two helpers for determining dependencies of QUAST targets.
-#For split contigs and reassemblies, we need only corresponding FASTA.
-#For combined contigs, we need to glue their split pieces first.
-def stats_input(wildcards):
-    if wildcards.stage == "reassembly":
-        return expand("reassembly/{cag}.fasta", cag=CAGS)
-    w_bin, w_prop = wildcards.stage.split("_", 2)
-    if w_bin == "split":
-        return expand("binning/{prop}/{sample}.log", prop=w_prop, sample=GROUPS)
-    elif w_bin == "bin":
-        return expand("binning/{prop}/{cag}.fasta", prop=w_prop, cag=CAGS)
-
-def stats_data(wildcards):
-    if wildcards.stage == "reassembly":
-        return "`ls reassembly/CAG*.fasta`"
-    w_bin, w_prop = wildcards.stage.split("_", 2)
-    masks = {"bin": "CAG*", "split": "*-CAG*"}
-    return "`ls binning/{}/{}.fasta`".format(w_prop, masks[w_bin])
-
-rule quast_stats:
-    input:   stats_input
+rule combine_refs_info:
+    input:   expand("stats/q_{{stage}}/{ref}.tsv", ref=REFS)
+    output:  "stats/q_{stage}/total.tsv"
+    params:  " ".join(REFS)
+    message: "Combining good contigs of {wildcards.stage} for all bins"
+    shell:   "rm -f {output}\n"
+             "for ref in {params}\ndo\n"
+             "    file=stats/q_{wildcards.stage}/$ref.tsv\n"
+             "    if [ -f $file ] ; then\n"
+             "        awk -v r=$ref '{{print $0 \"\\t\" r}}' $file >> {output}\n"
+             "    fi\n"
+             "done"
+
+ruleorder: combine_refs_info > filter_ref_alignments
+
+rule calc_f1:
+    input:   binning="{stage}/binning.tsv", contigs="{stage}/all.fasta", etalons="stats/q_{stage}/total.tsv"
+    output:  "stats/summary/f1_{stage}.txt"
+    log:     "stats/f1_{stage}.log"
+    message: "Calculating F1 metrics for {wildcards.stage}"
+    shell:   "{SCRIPTS}/validate.pl --ffile={input.contigs} --cfile={input.binning} --sfile={input.etalons} --ofile={log} > {output}"
+
+#---- Reference profiles -------------------------------------------------------
+rule combine_refs:
+    input:   list(REFS.values())
+    output:  "stats/refs.fasta"
+    message: "Combining all references in a single file"
+    shell:   "cat {input} > {output}"
+
+rule ref_profiles:
+    input:   "stats/refs.fasta"
+    output:  "stats/summary/ref_profiles.tsv"
+    log:     "stats/ref_profiles.log"
+    message: "Calculating etalon profiles of references"
+    shell:   "{BIN}/contig_abundance_counter -k {PROFILE_K} -w tmp -c {input}"
+             " -n {SAMPLE_COUNT} -m profile/mts/kmers -o {output} >{log} 2>&1"
+
+#---- Genome fraction ----------------------------------------------------------
+rule genome_fraction:
+    input:   expand("{{stage}}/bins/{bin}.fasta", bin=BINS) #stats_input
     output:  "stats/summary/gf_{stage}.tsv"
-    params:  data=stats_data, out="stats/q_{stage}"
-    log:     "stats/q_{stage}.log"
+    params:  out="stats/q_{stage}"
+    log:     "stats/q_{stage}/.log"
     threads: THREADS
-    message: "Aligning {wildcards.stage} assemblies on all references"
-    shell:   "{METAQUAST} -t {threads} -R {ALL_REFS} {params.data} -o {params.out} >{log} 2>&1 && "
+    message: "Aligning all of {wildcards.stage} on all references"
+    shell:   "{METAQUAST} -t {threads} -R {REFS_STR} {input} -o {params.out} >/dev/null 2>&1 && "
              "cp '{params.out}/summary/TSV/Genome_fraction_(%).tsv' {output}"
 
-# Run this AFTER 'all'
-rule stats_all:
-    input:   expand("stats/summary/gf_{bin}_{prop}.tsv", bin=["bin"], prop=["prelim", "prop"]), 
-             "stats/initial_assembly/total.cont"
-    message: "Gathered some numbers, deal with them."
-
-#---- Reassembly statistics ----------------------------------------------------
-
-# Run this AFTER 'reassembly_all'
-rule stats_reassembly:
-    input:   "stats/summary/gf_reassembly.tsv",
-             "stats/reassembly/total.cont"
-    output:  "stats/summary/reassembly.tsv"
-    params:  "stats/q_reassembly"
-    message: "Gathered bins stats"
-    shell:   "{SCRIPTS}/gather_stats.py {params} > {output}"
+#---- GF per bin per reference -------------------------------------------------
+CONTIGS_INPUT_DIR = {"binning": "assembly/splits", "propagation": "propagation/edges"}
+
+rule combine_splits:
+    input:   expand("propagation/edges/{group}.fasta", assembler=ASSEMBLER, group=GROUPS)
+    output:  "propagation/edges/all.fasta"
+    message: "Combine all propagated edges"
+    shell:   "{SCRIPTS}/combine_contigs.py {input} > {output}"
+
+#Redistribute sample pieces to corresponding bins
+rule filter_bin:
+    input:   contigs=lambda w: "{}/all.fasta".format(CONTIGS_INPUT_DIR[w.stage]),
+             ann="{stage}/annotation/all.ann"
+    output:  "{stage,(binning|propagation)}/bins/{bin,\w*\d+}.fasta"
+    message: "Filtering contigs from {wildcards.bin} for all of {wildcards.stage}"
+    shell:   "{SCRIPTS}/filter_bin.py {input.contigs} {input.ann} {wildcards.bin} >{output}"
 
 #---- Propagator statistics ----------------------------------------------------
-rule prop_stats:
-    input:   prelim="annotation/{sample}.ann", prop="annotation/{sample}_edges.ann",
-             contigs="assembly/{sample}.fasta", edges="assembly/{sample}_edges.fasta",
-             ref=REFS.values() #, bins="{sample}/{ref}.bin"
-    output:  "stats/prop_{cag}/{sample}.tsv"
-    log:     "stats/prop_{cag}/{sample}.log"
-    message: "Calculating propagation statistics for {wildcards.sample}"
-    shell:   "{BIN}/stats -k {K} -s {wildcards.sample}/assembly/{SAVES} -r {input.ref}"
-             " -c {input.contigs} -a {input.prelim} -e {input.edges} -p {input.prop}"
-             " -b {wildcards.cag} -o {output} >{log}"
+# rule prop_stats:
+#     input:   prelim="binning/annotation/{sample}.ann", prop="propagation/annotation/{sample}.ann",
+#              contigs="assembly/{sample}.fasta", edges="assembly/edges/{sample}.fasta",
+#              ref=REFS.values() #, bins="{sample}/{ref}.bin"
+#     output:  "stats/prop_{bin}/{sample}.tsv"
+#     log:     "stats/prop_{bin}/{sample}.log"
+#     message: "Calculating propagation statistics for {wildcards.sample}"
+#     shell:   "{BIN}/stats -k {K} -s {wildcards.sample}/assembly/{SAVES} -r {input.ref}"
+#              " -c {input.contigs} -a {input.prelim} -e {input.edges} -p {input.prop}"
+#              " -b {wildcards.bin} -o {output} >{log}"
 
 # Run this
-rule prop_stats_all:
-    input:   expand("stats/prop_{cag}/{sample}.tsv", sample=GROUPS, cag=CAGS)
-    message: "Calculated propagation statistics"
+# rule prop_stats_all:
+#     input:   expand("stats/prop_{cag}/{sample}.tsv", sample=GROUPS, cag=BINS)
+#     message: "Calculated propagation statistics"
+
+#===============================================================================
+#---- Reference-free stats and metrics -----------------------------------------
+#===============================================================================
 
 #---- CheckM stats -------------------------------------------------------------
 rule checkm:
-    input:   expand("reassembly/{cag}.fasta", cag=CAGS)
-    output:  qa="stats/checkm/qa.tsv", tree_qa="stats/checkm/tree_qa.tsv"
-    params:  dir="stats/checkm"
+    output:  qa="stats/checkm_{stage}/qa.tsv", tree_qa="stats/checkm_{stage}/tree_qa.tsv"
+    params:  dir="{stage}/bins", out="stats/checkm_{stage}",
     threads: THREADS
-    shell:   "set +u; source activate concoct_env; set -u \n"
-             "checkm tree -x fasta reassembly {params.dir} \n"
-             "checkm tree_qa -o 2 --tab_table -f {output.tree_qa} {params.dir}\n"
-             "checkm lineage_set {params.dir} {params.dir}/lineage.ms\n"
-             "checkm analyze -x fasta {params.dir}/lineage.ms reassembly {params.dir}\n"
-             "checkm qa -o 2 --tab_table -f {output.qa} {params.dir}/lineage.ms {params.dir}"
+    log:     "stats/checkm_{stage}.log"
+    message: "Running CheckM for results of {wildcards.stage}"
+    run:
+        # for file in os.listdir(params.dir):
+        #     ext = os.path.splitext(file)
+        #     if ext in FASTA_EXTS:
+        #         break
+        ext = ".fasta"
+        shell("set +u; source activate py27; set -u \n"
+             "checkm tree -t {THREADS} --pplacer_threads {THREADS} -x {ext} {params.dir} {params.out} >{log} 2>&1\n"
+             "checkm tree_qa -o 2 --tab_table -f {output.tree_qa} {params.out} >>{log} 2>&1\n"
+             "checkm lineage_set {params.out} {params.out}/lineage.ms >>{log} 2>&1\n"
+             "checkm analyze -t {THREADS} -x fasta {params.out}/lineage.ms {params.dir} {params.out} >>{log} 2>&1\n"
+             "checkm qa -t {THREADS} -o 2 --tab_table -f {output.qa} {params.out}/lineage.ms {params.out} >>{log} 2>&1")
 
 rule parse_checkm:
     input:   qa=rules.checkm.output.qa, tree_qa=rules.checkm.output.tree_qa
-    output:  "stats/summary/checkm.tsv"
-    #shell:   "{SCRIPTS}/parse_checkm.py {input.qa} {input.tree_qa} > {output}"
+    output:  "stats/summary/checkm_{stage}.tsv"
+    message: "Parse CheckM results for {wildcards.stage}"
     run:
         table = pandas.read_table(input.qa, dtype="str")
         tree_table = pandas.read_table(input.tree_qa, dtype="str", na_filter=False)
@@ -238,33 +212,50 @@ rule parse_checkm:
             res_table[column] = res_table[column].apply(extract_taxon)
         res_table.to_csv(output[0], index=False, sep="\t")
 
-#---- PCA ----------------------------------------------------------------------
-rule pca:
-    input:   "profile/canopy.in", "profile/canopy.out", "{sample}.cont"
-    output:  "stats/{sample}.png"
-    message: "Doing some visualization"
-    shell:
-        "Rscript {SCRIPTS}/pca.R {input} {output}"
+rule combine_bins:
+    input:   expand("{{stage}}/bins/{bin}.fasta", bin=BINS)
+    output:  "{stage,(binning|propagation|reassembly)}/all.fasta"
+    message: "Combine all contigs from {wildcards.stage}"
+    shell:   "{SCRIPTS}/combine_contigs.py -r {input} > {output}"
 
-def fragments_info_by_assembly_type(wildcards):
-    frags=FRAGMENT_NAMES_BY_TYPE[wildcards.assembly_type]
-    return expand("stats/{assembly_type}/{ref}/{fragments}.info", assembly_type=wildcards.assembly_type, ref=wildcards.ref, fragments=frags)
+rule reassembly_mock_binning:
+    output:  "reassembly/binning.tsv"
+    message: "Preparing reassembly mock binning"
+    run:
+        shell("rm -f {output}")
+        for bin in BINS:
+            shell("grep '>' reassembly/bins/{bin}.fasta | cut -c 2- | awk '{{print \"{bin}-\" $0 \"\\t{bin}\"}}' >> {output}")
+
+#---- Summary table ------------------------------------------------------------
+rule gather_stats:
+    input:   "stats/summary/gf_{stage}.tsv"
+    output:  "stats/summary/{stage}_summary.tsv"
+    params:  "--problematic", "--heatmap", "stats/q_{stage}", "stats/summary/{stage}"
+    message: "Gathering {wildcards.stage} stats"
+    shell:   "{SCRIPTS}/gather_stats.py {params}"
 
-rule combine_fragments_info:
-    input:  fragments_info_by_assembly_type 
-    output: "stats/{assembly_type}/{ref}/ref.cont"
-    shell: "rm -rf {output}; for f in {input}; do name=$(basename $f .info); cat $f | sed 's/^/'$name'-/g' >> {output} ; done"
+#---- PCA ----------------------------------------------------------------------
+# FRAGMENT_NAMES_BY_STAGE = {"reassembly": CAG_EDGES,
+#                            "assembly": list(GROUPS.keys())}
 
-rule combine_refs_info:
-    input:  expand("stats/{{assembly_type}}/{ref}/ref.cont", ref=list(REFS.keys()))
-    output:  "stats/{assembly_type}/total.cont"
-    run:
-        shell("rm -rf {output}")
-        for ref in REFS.keys():
-            shell("awk '{{print $0 \"\t{ref}\"}}' stats/{wildcards.assembly_type}/{ref}/ref.cont >> {output}")
+def fragments_info_by_stage(wildcards):
+    fragments=FRAGMENT_NAMES_BY_STAGE[wildcards.stage]
+    return expand("stats/q_{stage}/runs_per_reference/{ref}/{fs}.info", stage=wildcards.stage, ref=wildcards.ref, fs=fragments)
 
-# Run this
-rule pca_total:
-    input:   "binning/canopy/profiles.in", "binning/canopy/binning.out", "stats/total.cont"
-    output:  "stats/summary/pca.png"
+rule pca:
+    input:   "profile/{}/all.tsv".format(PROFILER),
+             "binning/{}/unified_binning.tsv".format(BINNER),
+             #"stats/q_{stage}/total.info"
+    output:  "stats/summary/pca_{stage}.png"
+    message: "Drawing PCA visualisation for {wildcards.stage}"
     shell:   "Rscript {SCRIPTS}/pca.R {input} {output}"
+
+#---- TSNE ----------------------------------------------------------------------
+
+rule tsne:
+    input:   "profile/{}/all.tsv".format(PROFILER),
+             "binning/{}/unified_binning.tsv".format(BINNER),
+    output:  "stats/summary/tsne_{stage}.png"
+    message: "Drawing BH-TSNE visualisation for {wildcards.stage}"
+    shell:   "python2 {SCRIPTS}/run_tsne.py {input} {output}"
+             " --percent 1.0 --iteration 2000 --perplexity 50"
diff --git a/src/projects/mts/annotation.hpp b/src/projects/mts/annotation.hpp
index fa9ccf8..17be8dc 100644
--- a/src/projects/mts/annotation.hpp
+++ b/src/projects/mts/annotation.hpp
@@ -20,11 +20,8 @@ class AnnotationStream {
 
     ContigAnnotation Parse(const std::string& s) const {
         ContigAnnotation annotation;
-        stringstream ss(s);
+        std::istringstream ss(s);
         ss >> annotation.first;
-        string delim;
-        ss >> delim;
-        VERIFY(delim == ":");
         while (true) {
             bin_id bin;
             ss >> bin;
@@ -67,7 +64,7 @@ public:
 
     AnnotationOutStream& operator <<(const ContigAnnotation& annotation) {
         inner_stream_ << annotation.first;
-        string delim = " : ";
+        string delim = "\t";
         for (bin_id bin : annotation.second) {
             inner_stream_ << delim << bin;
             delim = " ";
@@ -121,14 +118,14 @@ public:
         if (!edge_annotation_.count(e)) {
             return {};
         }
-        const auto& annotation = get(edge_annotation_, e);
+        const auto& annotation = utils::get(edge_annotation_, e);
         return vector<bin_id>(annotation.begin(), annotation.end());
     }
 
     set<bin_id> RelevantBins(const vector<EdgeId>& path) const {
         set<bin_id> answer;
         for (EdgeId e : path) {
-            insert_all(answer, Annotation(e));
+            utils::insert_all(answer, Annotation(e));
         }
         return answer;
     }
@@ -171,7 +168,7 @@ class AnnotationFiller {
             for (const bin_id& bin : bins) {
                 if (interesting_bins_.count(bin)) {
                     answer.push_back(bin);
-                } 
+                }
             }
             return answer;
         }
@@ -186,7 +183,7 @@ class AnnotationFiller {
             splits_annotation_stream >> contig_annotation;
             auto bins = FilterInteresting(contig_annotation.second);
             if (!bins.empty()) {
-                insert_all(annotation_map[contig_annotation.first], bins);
+                utils::insert_all(annotation_map[contig_annotation.first], bins);
             }
             ++cnt;
         }
@@ -226,11 +223,15 @@ class AnnotationFiller {
         return answer;
     };
 
+    static bool IsSpurious(size_t colored_len, size_t full_len) {
+        return math::ls(double(colored_len) / double(full_len), 0.3);
+    }
+
     void FilterSpuriousInfo(map<EdgeId, map<bin_id, size_t>>& coloring) const {
         for (auto& edge_info : coloring) {
             size_t edge_len = gp_.g.length(edge_info.first);
             for (auto color_it = edge_info.second.begin(); color_it != edge_info.second.end(); ) {
-                if (math::ls(double(color_it->second) / double(edge_len), 0.3)) {
+                if (IsSpurious(color_it->second, edge_len)) {
                     edge_info.second.erase(color_it++);
                 } else {
                     ++color_it;
@@ -265,8 +266,15 @@ class AnnotationFiller {
             }
         }
         set<bin_id> answer;
+        using ColorInfo = std::pair<bin_id, size_t>;
+        auto it = std::max_element(path_colors.begin(), path_colors.end(),
+            [](const ColorInfo& p1, const ColorInfo& p2) {
+                 return p1.second < p2.second;
+             });
+         if (it != path_colors.end())
+            answer.insert(it->first);
         for (auto color_info : path_colors) {
-            if (math::gr(double(color_info.second) / double(total_len), 0.3)) {
+            if (!IsSpurious(color_info.second, total_len)) {
                 answer.insert(color_info.first);
             }
         }
diff --git a/src/projects/mts/config.yaml b/src/projects/mts/config.yaml
index 0150528..65347f2 100644
--- a/src/projects/mts/config.yaml
+++ b/src/projects/mts/config.yaml
@@ -1,10 +1,24 @@
-IN: "/Sid/snurk/mts/sim/data"
-SPADES: "~/Projects/mts/assembler/"
-QUAST: "python2 ~/opt/quast-3.2/metaquast.py"
-BIN: "~/Projects/mts/assembler/build/release/bin"
-SCRIPTS: "~/Projects/mts/assembler/src/projects/mts/scripts"
-SOFT: "/home/snurk/soft/"
-REF: "/Sid/snurk/mts/nielsen/ref.fasta"
-K: 55
-small_k: 21
-MIN_CONTIG_LENGTH: 2000
+data: "path/to/samples/directory"
+soft: "path/to/canopy/etc"
+#bin: "/path/to/mts/binaries" #Derived from the current dir by default
+#scripts: "/path/to/scripts/directory" #Derived from the current dir by default
+#assembly:
+    #assembler: spades
+    #dir: "/path/to/assembler/directory" #Derived from the current dir by default
+    #k: 55
+    #groups: []
+#profile:
+    #k: 21
+    #split: 10000
+#binning:
+    #binner: canopy
+    #min_nonzeroes: 3
+    #min_length: 2000
+#propagation:
+    #enabled: true
+#reassembly:
+    #enabled: true
+#stats:
+    #stage: ["gf", "summary", "pca", "tsne", "checkm"] #stage can be binning, propagation, reassembly, or all
+    #quast: "python2 /path/to/QUAST" #Default is global
+    #refs: "path/to/refs/directory/or/list/of/them"
diff --git a/src/projects/mts/contig_abundance.cpp b/src/projects/mts/contig_abundance.cpp
index ef00ce7..61bd8ad 100644
--- a/src/projects/mts/contig_abundance.cpp
+++ b/src/projects/mts/contig_abundance.cpp
@@ -1,5 +1,5 @@
 #include "contig_abundance.hpp"
-#include "utils/indices/kmer_splitters.hpp"
+#include "utils/kmer_mph/kmer_splitters.hpp"
 
 namespace debruijn_graph {
 
@@ -13,7 +13,7 @@ size_t SampleCount() {
     return sample_cnt_;
 }
 
-MplVector SingleClusterAnalyzer::SampleMpls(const KmerProfiles& kmer_mpls, size_t sample) const {
+MplVector SampleMpls(const KmerProfiles& kmer_mpls, size_t sample) {
     MplVector answer;
     answer.reserve(kmer_mpls.size());
     for (const auto& kmer_mpl : kmer_mpls) {
@@ -22,14 +22,14 @@ MplVector SingleClusterAnalyzer::SampleMpls(const KmerProfiles& kmer_mpls, size_
     return answer;
 }
 
-Mpl SingleClusterAnalyzer::SampleMedian(const KmerProfiles& kmer_mpls, size_t sample) const {
+Mpl SampleMedian(const KmerProfiles& kmer_mpls, size_t sample) {
     std::vector<Mpl> sample_mpls = SampleMpls(kmer_mpls, sample);
 
     std::nth_element(sample_mpls.begin(), sample_mpls.begin() + sample_mpls.size()/2, sample_mpls.end());
     return sample_mpls[sample_mpls.size()/2];
 }
 
-MplVector SingleClusterAnalyzer::MedianVector(const KmerProfiles& kmer_mpls) const {
+MplVector MedianVector(const KmerProfiles& kmer_mpls) {
     VERIFY(kmer_mpls.size() != 0);
     MplVector answer(SampleCount(), 0);
     for (size_t i = 0; i < SampleCount(); ++i) {
@@ -66,42 +66,44 @@ KmerProfiles SingleClusterAnalyzer::CloseKmerMpls(const KmerProfiles& kmer_mpls,
     return answer;
 }
 
-boost::optional<AbundanceVector> SingleClusterAnalyzer::operator()(const KmerProfiles& kmer_mpls) const {
+boost::optional<AbundanceVector> TrivialClusterAnalyzer::operator()(const KmerProfiles& kmer_mpls) const {
     auto med = MedianVector(kmer_mpls);
     return AbundanceVector(med.begin(), med.end());
-    //return boost::optional<AbundanceVector>(answer);
-    //MplVector center = MedianVector(kmer_mpls);
-    //auto locality = CloseKmerMpls(kmer_mpls, KmerProfile(center));
-
-    //for (size_t it_cnt = 0; it_cnt < MAX_IT; ++it_cnt) {
-    //    DEBUG("Iteration " << it_cnt);
-    //    DEBUG("Center is " << PrintVector(center));
-
-    //    DEBUG("Locality size is " << locality.size()
-    //              << " making " << (double(locality.size()) / double(kmer_mpls.size()))
-    //              << " of total # points");
-
-    //    double center_share = double(locality.size()) / double(kmer_mpls.size());
-    //    if (math::ls(center_share, central_clust_share_)) {
-    //        DEBUG("Detected central area contains too few k-mers: share " << center_share
-    //                  << " ; center size " << locality.size()
-    //                  << " ; total size " << kmer_mpls.size());
-    //        return boost::none;
-    //    }
-
-    //    MplVector update = MedianVector(locality);
-    //    DEBUG("Center update is " << PrintVector(update));
-
-    //    if (center == update) {
-    //        DEBUG("Old and new centers matched on iteration " << it_cnt);
-    //        break;
-    //    }
-
-    //    center = update;
-    //    locality = CloseKmerMpls(kmer_mpls, center);
-    //}
-
-    //return boost::optional<AbundanceVector>(MeanVector(locality, sample_cnt_));
+}
+
+boost::optional<AbundanceVector> SingleClusterAnalyzer::operator()(const KmerProfiles& kmer_mpls) const {
+    MplVector center = MedianVector(kmer_mpls);
+    auto locality = CloseKmerMpls(kmer_mpls, KmerProfile(center));
+
+    for (size_t it_cnt = 0; it_cnt < MAX_IT; ++it_cnt) {
+        DEBUG("Iteration " << it_cnt);
+        DEBUG("Center is " << PrintVector(center));
+
+        DEBUG("Locality size is " << locality.size()
+                  << " making " << (double(locality.size()) / double(kmer_mpls.size()))
+                  << " of total # points");
+
+        double center_share = double(locality.size()) / double(kmer_mpls.size());
+        if (math::ls(center_share, central_clust_share_)) {
+            DEBUG("Detected central area contains too few k-mers: share " << center_share
+                      << " ; center size " << locality.size()
+                      << " ; total size " << kmer_mpls.size());
+            return boost::none;
+        }
+
+        MplVector update = MedianVector(locality);
+        DEBUG("Center update is " << PrintVector(update));
+
+        if (center == update) {
+            DEBUG("Old and new centers matched on iteration " << it_cnt);
+            break;
+        }
+
+        center = update;
+        locality = CloseKmerMpls(kmer_mpls, center);
+    }
+
+    return boost::optional<AbundanceVector>(MeanVector(locality));
 }
 
 vector<std::string> ContigAbundanceCounter::SplitOnNs(const std::string& seq) const {
@@ -170,7 +172,7 @@ boost::optional<AbundanceVector> ContigAbundanceCounter::operator()(
         return boost::none;
     }
 
-    return cluster_analyzer_(kmer_mpls);
+    return (*cluster_analyzer_)(kmer_mpls);
 }
 
 }
diff --git a/src/projects/mts/contig_abundance.hpp b/src/projects/mts/contig_abundance.hpp
index fb5c9d7..eeb7ae1 100644
--- a/src/projects/mts/contig_abundance.hpp
+++ b/src/projects/mts/contig_abundance.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "pipeline/graph_pack.hpp"
-#include "utils/indices/perfect_hash_map_builder.hpp"
+#include "utils/ph_map/perfect_hash_map_builder.hpp"
 
 namespace debruijn_graph {
 
@@ -77,15 +77,32 @@ std::string PrintVector(const AbVector& mpl_vector) {
     return ss.str();
 }
 
-class SingleClusterAnalyzer {
+MplVector SampleMpls(const KmerProfiles& kmer_mpls, size_t sample);
+Mpl SampleMedian(const KmerProfiles& kmer_mpls, size_t sample);
+MplVector MedianVector(const KmerProfiles& kmer_mpls);
+
+class ClusterAnalyzer {
+public:
+    virtual boost::optional<AbundanceVector> operator()(const KmerProfiles& kmer_mpls) const = 0;
+    virtual ~ClusterAnalyzer() {};
+};
+
+class TrivialClusterAnalyzer : public ClusterAnalyzer {
+public:
+    TrivialClusterAnalyzer() {}
+
+    boost::optional<AbundanceVector> operator()(const KmerProfiles& kmer_mpls) const override;
+
+private:
+    DECL_LOGGER("TrivialClusterAnalyzer");
+};
+
+class SingleClusterAnalyzer : public ClusterAnalyzer {
     static const uint MAX_IT = 10;
 
     double coord_vise_proximity_;
     double central_clust_share_;
 
-    MplVector SampleMpls(const KmerProfiles& kmer_mpls, size_t sample) const;
-    Mpl SampleMedian(const KmerProfiles& kmer_mpls, size_t sample) const;
-    MplVector MedianVector(const KmerProfiles& kmer_mpls) const;
     bool AreClose(const KmerProfile& c, const KmerProfile& v) const;
     KmerProfiles CloseKmerMpls(const KmerProfiles& kmer_mpls, const KmerProfile& center) const;
 
@@ -96,34 +113,32 @@ public:
         central_clust_share_(central_clust_share) {
     }
 
-    boost::optional<AbundanceVector> operator()(const KmerProfiles& kmer_mpls) const;
+    boost::optional<AbundanceVector> operator()(const KmerProfiles& kmer_mpls) const override;
 
 private:
     DECL_LOGGER("SingleClusterAnalyzer");
 };
 
 class ContigAbundanceCounter {
-    typedef typename InvertableStoring::trivial_inverter<Offset> InverterT;
+    typedef typename utils::InvertableStoring::trivial_inverter<Offset> InverterT;
 
-    typedef KeyStoringMap<conj_graph_pack::seq_t,
+    typedef utils::KeyStoringMap<conj_graph_pack::seq_t,
                           Offset,
-                          kmer_index_traits<conj_graph_pack::seq_t>,
-                          InvertableStoring> IndexT;
+                          utils::kmer_index_traits<conj_graph_pack::seq_t>,
+                          utils::InvertableStoring> IndexT;
 
     unsigned k_;
-    SingleClusterAnalyzer cluster_analyzer_;
+    shared_ptr<ClusterAnalyzer> cluster_analyzer_;
     double min_earmark_share_;
     IndexT kmer_mpl_;
     InverterT inverter_;
     std::vector<Mpl> mpl_data_;
 
-    void FillMplMap(const std::string& kmers_mpl_file);
-
     vector<std::string> SplitOnNs(const std::string& seq) const;
 
 public:
     ContigAbundanceCounter(unsigned k,
-                           const SingleClusterAnalyzer& cluster_analyzer,
+                           shared_ptr<ClusterAnalyzer> cluster_analyzer,
                            const std::string& work_dir,
                            double min_earmark_share = 0.7) :
         k_(k),
diff --git a/src/projects/mts/contig_abundance_counter.cpp b/src/projects/mts/contig_abundance_counter.cpp
index f2a2ba8..4daf7c0 100644
--- a/src/projects/mts/contig_abundance_counter.cpp
+++ b/src/projects/mts/contig_abundance_counter.cpp
@@ -15,39 +15,28 @@ using namespace debruijn_graph;
 class Runner {
 public:
     static void Run(ContigAbundanceCounter& abundance_counter, size_t min_length_bound,
-                    io::FileReadStream& contigs_stream, io::osequencestream& splits_os,
-                    std::ofstream& id_out, std::ofstream& mpl_out) {
-        static const size_t split_length = 10000;
-        io::SingleRead full_contig;
+                    io::FileReadStream& contigs_stream, std::ofstream& out) {
+        io::SingleRead contig;
         while (!contigs_stream.eof()) {
-            contigs_stream >> full_contig;
-            DEBUG("Analyzing contig " << GetId(full_contig));
+            contigs_stream >> contig;
+            contig_id id = GetId(contig);
+            DEBUG("Analyzing contig " << id);
 
-            for (size_t i = 0; i < full_contig.size(); i += split_length) {
-                if (full_contig.size() - i < min_length_bound) {
-                    DEBUG("Fragment shorter than min_length_bound " << min_length_bound);
-                    break;
-                }
-
-                io::SingleRead contig = full_contig.Substr(i, std::min(i + split_length, full_contig.size()));
-                splits_os << contig;
-
-                contig_id id = GetId(contig);
-                DEBUG("Processing fragment # " << (i / split_length) << " with id " << id);
-
-                auto abundance_vec = abundance_counter(contig.GetSequenceString(), contig.name());
+            if (contig.size() < min_length_bound) {
+                DEBUG("Fragment is shorter than min_length_bound " << min_length_bound);
+                break;
+            }
 
-                if (abundance_vec) {
-                    stringstream ss;
-                    copy(abundance_vec->begin(), abundance_vec->end(),
-                         ostream_iterator<Mpl>(ss, " "));
-                    DEBUG("Successfully estimated abundance of " << id << " : " << ss.str());
+            auto abundance_vec = abundance_counter(contig.GetSequenceString(), contig.name());
 
-                    id_out << id << std::endl;
-                    mpl_out << ss.str() << std::endl;
-                } else {
-                    DEBUG("Failed to estimate abundance of " << id);
-                }
+            if (abundance_vec) {
+                DEBUG("Successfully estimated abundance of " << id);
+                out << id;
+                for (auto mpl : *abundance_vec)
+                     out << "\t" << mpl;
+                out << std::endl;
+            } else {
+                DEBUG("Failed to estimate abundance of " << id);
             }
         }
     }
@@ -60,7 +49,7 @@ int main(int argc, char** argv) {
 
     unsigned k;
     size_t sample_cnt, min_length_bound;
-    std::string work_dir, contigs_path, splits_path;
+    std::string work_dir, contigs_path;
     std::string kmer_mult_fn, contigs_abundance_fn;
 
     try {
@@ -69,14 +58,13 @@ int main(int argc, char** argv) {
         ops >> Option('k', k)
             >> Option('w', work_dir)
             >> Option('c', contigs_path)
-            >> Option('f', splits_path)
             >> Option('n', sample_cnt)
             >> Option('m', kmer_mult_fn)
             >> Option('o', contigs_abundance_fn)
             >> Option('l', min_length_bound, size_t(0));
     } catch(GetOptEx &ex) {
         std::cout << "Usage: contig_abundance_counter -k <K> -w <work_dir> -c <contigs path> "
-                "-n <sample cnt> -m <kmer multiplicities path> -f <splits_path> "
+                "-n <sample cnt> -m <kmer multiplicities path> "
                 "-o <contigs abundance path> [-l <contig length bound> (default: 0)]"  << std::endl;
         exit(1);
     }
@@ -85,17 +73,14 @@ int main(int argc, char** argv) {
     create_console_logger();
 
     SetSampleCount(sample_cnt);
-    ContigAbundanceCounter abundance_counter(k, SingleClusterAnalyzer(), work_dir);
+    ContigAbundanceCounter abundance_counter(k, make_shared<TrivialClusterAnalyzer>(), work_dir);
     abundance_counter.Init(kmer_mult_fn);
 
     io::FileReadStream contigs_stream(contigs_path);
-    io::osequencestream splits_os(splits_path);
 
-    std::ofstream id_out(contigs_abundance_fn + ".id");
-    std::ofstream mpl_out(contigs_abundance_fn + ".mpl");
+    std::ofstream out(contigs_abundance_fn);
 
     Runner::Run(abundance_counter, min_length_bound,
-                contigs_stream, splits_os,
-                id_out, mpl_out);
+                contigs_stream, out);
     return 0;
 }
diff --git a/src/projects/mts/gzstream/gzstream.C b/src/projects/mts/gzstream/gzstream.C
new file mode 100644
index 0000000..688b625
--- /dev/null
+++ b/src/projects/mts/gzstream/gzstream.C
@@ -0,0 +1,165 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.C
+// Revision      : $Revision: 1.7 $
+// Revision_date : $Date: 2003/01/08 14:41:27 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
+// Standard C++ Library".
+// ============================================================================
+
+#include "gzstream.h"
+#include <iostream>
+#include <string.h>  // for memcpy
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See header file for user classes.
+// ----------------------------------------------------------------------------
+
+// --------------------------------------
+// class gzstreambuf:
+// --------------------------------------
+
+gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
+    if ( is_open())
+        return (gzstreambuf*)0;
+    mode = open_mode;
+    // no append nor read/write mode
+    if ((mode & std::ios::ate) || (mode & std::ios::app)
+        || ((mode & std::ios::in) && (mode & std::ios::out)))
+        return (gzstreambuf*)0;
+    char  fmode[10];
+    char* fmodeptr = fmode;
+    if ( mode & std::ios::in)
+        *fmodeptr++ = 'r';
+    else if ( mode & std::ios::out)
+        *fmodeptr++ = 'w';
+    *fmodeptr++ = 'b';
+    *fmodeptr = '\0';
+    file = gzopen( name, fmode);
+    if (file == 0)
+        return (gzstreambuf*)0;
+    opened = 1;
+    return this;
+}
+
+gzstreambuf * gzstreambuf::close() {
+    if ( is_open()) {
+        sync();
+        opened = 0;
+        if ( gzclose( file) == Z_OK)
+            return this;
+    }
+    return (gzstreambuf*)0;
+}
+
+int gzstreambuf::underflow() { // used for input buffer only
+    if ( gptr() && ( gptr() < egptr()))
+        return * reinterpret_cast<unsigned char *>( gptr());
+
+    if ( ! (mode & std::ios::in) || ! opened)
+        return EOF;
+    // Josuttis' implementation of inbuf
+    int n_putback = gptr() - eback();
+    if ( n_putback > 4)
+        n_putback = 4;
+    memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
+
+    int num = gzread( file, buffer+4, bufferSize-4);
+    if (num <= 0) // ERROR or EOF
+        return EOF;
+
+    // reset buffer pointers
+    setg( buffer + (4 - n_putback),   // beginning of putback area
+          buffer + 4,                 // read position
+          buffer + 4 + num);          // end of buffer
+
+    // return next character
+    return * reinterpret_cast<unsigned char *>( gptr());
+}
+
+int gzstreambuf::flush_buffer() {
+    // Separate the writing of the buffer from overflow() and
+    // sync() operation.
+    int w = pptr() - pbase();
+    if ( gzwrite( file, pbase(), w) != w)
+        return EOF;
+    pbump( -w);
+    return w;
+}
+
+int gzstreambuf::overflow( int c) { // used for output buffer only
+    if ( ! ( mode & std::ios::out) || ! opened)
+        return EOF;
+    if (c != EOF) {
+        *pptr() = c;
+        pbump(1);
+    }
+    if ( flush_buffer() == EOF)
+        return EOF;
+    return c;
+}
+
+int gzstreambuf::sync() {
+    // Changed to use flush_buffer() instead of overflow( EOF)
+    // which caused improper behavior with std::endl and flush(),
+    // bug reported by Vincent Ricard.
+    if ( pptr() && pptr() > pbase()) {
+        if ( flush_buffer() == EOF)
+            return -1;
+    }
+    return 0;
+}
+
+// --------------------------------------
+// class gzstreambase:
+// --------------------------------------
+
+gzstreambase::gzstreambase( const char* name, int mode) {
+    init( &buf);
+    open( name, mode);
+}
+
+gzstreambase::~gzstreambase() {
+    buf.close();
+}
+
+void gzstreambase::open( const char* name, int open_mode) {
+    if ( ! buf.open( name, open_mode))
+        clear( rdstate() | std::ios::badbit);
+}
+
+void gzstreambase::close() {
+    if ( buf.is_open())
+        if ( ! buf.close())
+            clear( rdstate() | std::ios::badbit);
+}
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+// ============================================================================
+// EOF //
diff --git a/src/projects/mts/gzstream/gzstream.h b/src/projects/mts/gzstream/gzstream.h
new file mode 100644
index 0000000..861653f
--- /dev/null
+++ b/src/projects/mts/gzstream/gzstream.h
@@ -0,0 +1,121 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.h
+// Revision      : $Revision: 1.5 $
+// Revision_date : $Date: 2002/04/26 23:30:15 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+// 
+// Standard streambuf implementation following Nicolai Josuttis, "The 
+// Standard C++ Library".
+// ============================================================================
+
+#ifndef GZSTREAM_H
+#define GZSTREAM_H 1
+
+// standard C++ with new header file names and std:: namespace
+#include <iostream>
+#include <fstream>
+#include <zlib.h>
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See below for user classes.
+// ----------------------------------------------------------------------------
+
+class gzstreambuf : public std::streambuf {
+private:
+    static const int bufferSize = 47+256;    // size of data buff
+    // totals 512 bytes under g++ for igzstream at the end.
+
+    gzFile           file;               // file handle for compressed file
+    char             buffer[bufferSize]; // data buffer
+    char             opened;             // open/close state of stream
+    int              mode;               // I/O mode
+
+    int flush_buffer();
+public:
+    gzstreambuf() : opened(0) {
+        setp( buffer, buffer + (bufferSize-1));
+        setg( buffer + 4,     // beginning of putback area
+              buffer + 4,     // read position
+              buffer + 4);    // end position      
+        // ASSERT: both input & output capabilities will not be used together
+    }
+    int is_open() { return opened; }
+    gzstreambuf* open( const char* name, int open_mode);
+    gzstreambuf* close();
+    ~gzstreambuf() { close(); }
+    
+    virtual int     overflow( int c = EOF);
+    virtual int     underflow();
+    virtual int     sync();
+};
+
+class gzstreambase : virtual public std::ios {
+protected:
+    gzstreambuf buf;
+public:
+    gzstreambase() { init(&buf); }
+    gzstreambase( const char* name, int open_mode);
+    ~gzstreambase();
+    void open( const char* name, int open_mode);
+    void close();
+    gzstreambuf* rdbuf() { return &buf; }
+};
+
+// ----------------------------------------------------------------------------
+// User classes. Use igzstream and ogzstream analogously to ifstream and
+// ofstream respectively. They read and write files based on the gz* 
+// function interface of the zlib. Files are compatible with gzip compression.
+// ----------------------------------------------------------------------------
+
+class igzstream : public gzstreambase, public std::istream {
+public:
+    igzstream() : std::istream( &buf) {} 
+    igzstream( const char* name, int open_mode = std::ios::in)
+        : gzstreambase( name, open_mode), std::istream( &buf) {}  
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::in) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+class ogzstream : public gzstreambase, public std::ostream {
+public:
+    ogzstream() : std::ostream( &buf) {}
+    ogzstream( const char* name, int mode = std::ios::out)
+        : gzstreambase( name, mode), std::ostream( &buf) {}  
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::out) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+#endif // GZSTREAM_H
+// ============================================================================
+// EOF //
+
diff --git a/src/projects/mts/kmer_multiplicity_counter.cpp b/src/projects/mts/kmer_multiplicity_counter.cpp
index 37d4a62..4adf99e 100644
--- a/src/projects/mts/kmer_multiplicity_counter.cpp
+++ b/src/projects/mts/kmer_multiplicity_counter.cpp
@@ -12,10 +12,10 @@
 #include "kmc_api/kmc_file.h"
 //#include "omp.h"
 #include "io/kmers/mmapped_reader.hpp"
-#include "utils/path_helper.hpp"
-#include "utils/simple_tools.hpp"
-#include "utils/indices/perfect_hash_map_builder.hpp"
-#include "utils/indices/kmer_splitters.hpp"
+#include "utils/filesystem/path_helper.hpp"
+#include "utils/stl_utils.hpp"
+#include "utils/ph_map/perfect_hash_map_builder.hpp"
+#include "utils/kmer_mph/kmer_splitters.hpp"
 #include "logger.hpp"
 
 using std::string;
@@ -49,7 +49,7 @@ class KmerMultiplicityCounter {
 
     string SortKmersCountFile(const string& filename) {
         MMappedRecordArrayReader<seq_element_type> ins(filename, RtSeq::GetDataSize(k_) + 1, false);
-        libcxx::sort(ins.begin(), ins.end(), array_less<seq_element_type>());
+        libcxx::sort(ins.begin(), ins.end(), adt::array_less<seq_element_type>());
         std::string sorted_filename = filename + KMER_SORTED_EXTENSION;
         std::ofstream out(sorted_filename);
         out.write((char*) ins.data(), ins.data_size());
@@ -137,7 +137,7 @@ class KmerMultiplicityCounter {
         //TODO: extract into a common header
         typedef size_t Offset;
         typedef uint16_t Mpl;
-        using namespace debruijn_graph;
+        using namespace utils;
 
         KeyStoringMap<RtSeq, Offset, kmer_index_traits<RtSeq>, InvertableStoring>
             kmer_mpl(k_, workdir);
@@ -247,7 +247,7 @@ int main(int argc, char *argv[]) {
 
     std::vector<string> input_files;
     for (size_t i = 1; i <= sample_cnt; ++i) {
-        input_files.push_back(work_dir + "/sample" + ToString(i));
+        input_files.push_back(work_dir + "/sample" + std::to_string(i));
     }
 
     KmerMultiplicityCounter kmcounter(k, output);
diff --git a/src/projects/mts/logger.hpp b/src/projects/mts/logger.hpp
index a8d2b02..44060bb 100644
--- a/src/projects/mts/logger.hpp
+++ b/src/projects/mts/logger.hpp
@@ -5,7 +5,7 @@ void create_console_logger() {
 
     string log_props_file = "log.properties";
 
-    logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+    logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
     lg->add_writer(std::make_shared<console_writer>());
     attach_logger(lg);
 }
diff --git a/src/projects/mts/mts.py b/src/projects/mts/mts.py
index b80f145..f23cc4a 100755
--- a/src/projects/mts/mts.py
+++ b/src/projects/mts/mts.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import (print_function)
 
 import argparse
@@ -7,6 +7,9 @@ import sys
 import os
 import os.path
 import shutil
+import yaml
+
+from scripts.common import fill_default_values
 
 #copied from http://stackoverflow.com/questions/431684/how-do-i-cd-in-python/13197763#13197763
 class cd:
@@ -26,9 +29,12 @@ parser = argparse.ArgumentParser(description="MTS - Metagenomic Time Series")
 parser.add_argument("--threads", "-t", type=int, default=8, help="Number of threads")
 parser.add_argument("dir", type=str, help="Output directory")
 parser.add_argument("--config", "-c", type=str, default="", help="config.yaml to be copied to the directory (unnecessary if config.yaml is already there)")
-parser.add_argument("--stats", "-s", action="store_true", help="Calculate stats (when the REFS parameter in config.yaml is provided)")
-parser.add_argument("--reuse-assemblies", action="store_true", help="Use existing assemblies (put them in the corresponding folders)")
+parser.add_argument("--reuse-assemblies", type=str, help="Directory with existing assemblies to reuse")
+parser.add_argument("--reuse-profiles", type=str, help="Directory with existing profiles to reuse")
+parser.add_argument("--reuse-from", type=str, help="Directory with another assembly to reuse everything that is possible (overrides other --reuses)")
+parser.add_argument("--no-stats", "-S", action="store_true", help="Skip the stats section (overrides the config value)")
 parser.add_argument("--verbose", "-v", action="store_true", help="Increase verbosity level")
+parser.add_argument("--alt", action="store_true", help=argparse.SUPPRESS)
 
 args = parser.parse_args()
 
@@ -40,34 +46,55 @@ base_params = ["snakemake", "--directory", os.path.realpath(args.dir), "--cores"
 if args.verbose:
     base_params.extend(["-p", "--verbose"])
 
-if args.config:
-    if os.path.exists(os.path.join(args.dir, "config.yaml")):
-        print("Config path specified, but config.yaml already exists in output folder " + args.dir)
-        sys.exit(239)
-
 if not os.path.exists(args.dir):
     os.makedirs(args.dir)
 
-print("Output folder set to " + args.dir)
+print("Output folder set to", args.dir)
 
+config_path = os.path.join(args.dir, "config.yaml")
 if args.config:
-    print("Copying config from " + args.config)
-    shutil.copy(args.config, args.dir)
+    if os.path.exists(config_path):
+        if subprocess.call(["diff", config_path, args.config]):
+            print("\033[31mConfig path specified, but different config.yaml already exists in output folder", args.dir, "\033[0m")
+            sys.exit(239)
+    else:
+        print("Copying config from", args.config)
+        shutil.copy(args.config, config_path)
 
 with cd(exec_dir):
     def call_snake(extra_params=[]):
         subprocess.check_call(base_params + extra_params, stdout=sys.stdout, stderr=sys.stderr)
-    
-    print("Step #1 - Assembly")
-    if args.reuse_assemblies:
-        call_snake(["assemble_all", "--touch"])
 
+    def reuse_dir(dir_from, dir_name):
+        if not dir_from:
+            return
+        local_dir = os.path.join(args.dir, dir_name)
+        if not os.path.isdir(dir_from):
+            print("\033[33mWarning: {} source directory doesn't exist\033[0m".format(dir_from))
+            return
+        if os.path.exists(local_dir):
+            print("\033[33mWarning: {} destination directory already exists\033[0m".format(dir_name))
+            return
+        os.symlink(dir_from, local_dir)
+
+    with open(config_path) as config_in:
+        config = yaml.load(config_in)
+    fill_default_values(config)
+
+    if args.reuse_from:
+        args.reuse_assemblies = os.path.join(args.reuse_from, "assembly")
+        args.reuse_profiles = os.path.join(args.reuse_from, "profile")
+
+    reuse_dir(args.reuse_assemblies, "assembly")
+    reuse_dir(args.reuse_profiles, "profile")
+
+    print("Step #1 - Assembly")
     call_snake()
-    
-    if args.stats:
-        print("Step #2a - Assembly statistics")
-        call_snake(["--snakefile", "Stats.snake", "stats_all"])
-    
-        print("Step #2b - Reassembly statistics")
-        call_snake(["--snakefile", "Stats.snake", "stats_reassembly"])
 
+    if config.get("reassembly", dict()).get("enabled", True):
+        print("Step #1b - Reassembly")
+        call_snake(["--snakefile", "Reassembly.snake"])
+
+    if not args.no_stats and len(config.get("stats", dict())) > 0:
+        print("Step #2 - Stats")
+        call_snake(["--snakefile", "Stats.snake"])
diff --git a/src/projects/mts/multirun.py b/src/projects/mts/multirun.py
new file mode 100755
index 0000000..ed39b0d
--- /dev/null
+++ b/src/projects/mts/multirun.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+from __future__ import (print_function)
+
+import argparse
+import os
+import os.path
+import subprocess
+import sys
+import yaml
+
+parser = argparse.ArgumentParser(description="MTS Multi Runner")
+
+all_assemblers = ["main", "spades", "megahit"]
+all_binners = ["canopy", "concoct", "metabat"]
+unsupported = set(["main_metabat", "spades_canopy", "megahit_canopy"])
+
+parser.add_argument("--threads", "-t", type=int, default=8, help="Number of threads for each run")
+parser.add_argument("dir", type=str, help="Output directory")
+parser.add_argument("--config", "-c", type=str, help="Base config")
+parser.add_argument("--pipelines", "-p", type=str, nargs="+", default=[], help="Pipeline configurations to run")
+parser.add_argument("--assemblers", "-a", type=str, nargs="+", default=all_assemblers, help="Assemblers to use")
+parser.add_argument("--binners", "-b", type=str, nargs="+", default=all_binners, help="Binners to use")
+parser.add_argument("--exclude", "-e", type=str, nargs="+", default=[], help="Excluded (skipped) configurations")
+parser.add_argument("--no-stats", "-S", action="store_true", help="Skip the stats section (overrides the config value)")
+parser.add_argument("--verbose", "-v", action="store_true", help="Increase verbosity level")
+parser.add_argument("--ignore-errors", action="store_true")
+
+args = parser.parse_args()
+
+with open(args.config) as config_in:
+    config_template = yaml.load(config_in)
+
+def pipelines():
+    for assembler in args.assemblers:
+        for binner in args.binners:
+            yield assembler + "_" + binner
+    for pipeline in args.pipelines:
+        yield pipeline
+
+prev_runs = dict()
+
+excluded = unsupported.union(args.exclude)
+for pipeline in pipelines():
+    if pipeline in excluded:
+        if pipeline in unsupported:
+            print("\033[33mWarning:", pipeline, "is not currently supported; skipping\033[0m\n")
+        continue
+    print("Running", pipeline)
+    cur_dir = os.path.join(args.dir, pipeline)
+    if not os.path.exists(cur_dir):
+        os.makedirs(cur_dir)
+    call_params = ["./mts.py", "-t", str(args.threads), cur_dir]
+    if args.no_stats:
+        call_params.extend(["--no-stats"])
+    config = config_template.copy()
+    params = pipeline.split("_")
+    assembly_name = params[0]
+    if assembly_name == "main":
+        config["profile"] = {"profiler": "mts"}
+    else:
+        config["assembly"] = {"assembler": params[0], "groups": ["*"]}
+        config["profile"] = {"profiler": "jgi"}
+        config["propagation"] = {"enabled": False}
+        config["reassembly"] = {"enabled": False}
+
+    config["binning"] = {"binner": params[1]}
+    with open(os.path.join(cur_dir, "config.yaml"), "w") as config_out:
+        yaml.dump(config, config_out)
+    # Try to reuse assemblies from previous runs with the same assembler
+    prev_run = prev_runs.get(assembly_name)
+    if prev_run:
+        print("Reusing same data from", prev_run)
+        call_params.extend(["--reuse-from", prev_run])
+    #TODO: rewrite using Snakemake API
+    errcode = subprocess.call(call_params)
+    if errcode:
+        print(" ".join(call_params), "returned with error:", errcode)
+        if not args.ignore_errors:
+            sys.exit(errcode)
+    elif not prev_run: #Reuse only successful run
+        prev_runs[assembly_name] = cur_dir
+    print()
+
+#TODO: compare stats
diff --git a/src/projects/mts/prop_binning.cpp b/src/projects/mts/prop_binning.cpp
index 0df9038..d36ec75 100644
--- a/src/projects/mts/prop_binning.cpp
+++ b/src/projects/mts/prop_binning.cpp
@@ -17,25 +17,28 @@
 using namespace debruijn_graph;
 
 std::string add_suffix(const std::string& path, const std::string& suffix) {
-    auto ext = path::extension(path);
+    auto ext = fs::extension(path);
     return path.substr(0, path.length() - ext.length()) + suffix + ext;
 }
 
-void DumpEdgesAndAnnotation(const Graph& g,
-                            const EdgeAnnotation& edge_annotation,
-                            const string& out_edges,
-                            const string& out_annotation) {
-    INFO("Dumping edges to " << out_edges << "; their annotation to " << out_annotation);
-    io::osequencestream oss(out_edges);
+//TODO: refactor to process the graph only once
+void DumpEdges(const Graph& g, const string& out_edges) {
+    INFO("Dumping edges to " << out_edges);
+    io::OutputSequenceStream oss(out_edges);
+    for (auto it = g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
+        EdgeId e = *it;
+        oss << io::SingleRead("NODE_" + std::to_string(g.int_id(e)), g.EdgeNucls(e).str());
+    }
+}
+
+void DumpAnnotation(const Graph& g, const EdgeAnnotation& edge_annotation, const string& out_annotation) {
+    INFO("Dumping annotation to " << out_annotation);
     AnnotationOutStream annotation_out(out_annotation);
     for (auto it = g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
         EdgeId e = *it;
-        io::SingleRead edge_read("NODE_" + ToString(g.int_id(e)),
-                                 g.EdgeNucls(e).str());
-        oss << edge_read;
         auto relevant_bins = edge_annotation.Annotation(e);
         if (!relevant_bins.empty()) {
-            annotation_out << ContigAnnotation(GetId(edge_read),
+            annotation_out << ContigAnnotation("NODE_" + std::to_string(g.int_id(e)),
                                                vector<bin_id>(relevant_bins.begin(), relevant_bins.end()));
         }
     }
@@ -48,10 +51,10 @@ int main(int argc, char** argv) {
     create_console_logger();
 
     size_t k;
-    string saves_path, contigs_path, splits_path, annotation_path;
+    string saves_path, contigs_path, splits_path, annotation_path, bins_file;
     vector<string> sample_names, left_reads, right_reads;
-    string out_root, propagation_dump;
-    vector<bin_id> bins_of_interest;
+    string out_root, edges_dump, propagation_dump;
+    size_t length_threshold;
     bool no_binning;
     try {
         GetOpt_pp ops(argc, argv);
@@ -65,19 +68,29 @@ int main(int argc, char** argv) {
             >> Option('l', left_reads)
             >> Option('r', right_reads)
             >> Option('o', out_root)
-            >> Option('d', propagation_dump, "")
-            >> Option('b', bins_of_interest, {})
-            >> OptionPresent('p', no_binning);
+            >> Option('p', propagation_dump, "")
+            >> Option('e', edges_dump, "")
+            >> Option('b', bins_file)
+            >> Option('t', length_threshold, (size_t)2000)
+            >> OptionPresent('D', "no-binning", no_binning)
+        ;
     } catch(GetOptEx &ex) {
         cout << "Usage: prop_binning -k <K> -s <saves path> -c <contigs path> -f <splits path> "
-                "-a <binning annotation> -n <sample names> -l <left reads> -r <right reads> -o <output root> "
-                "[-d <propagation info dump>] [-p to disable binning] [-b <bins of interest>*]"  << endl;
+                "-a <binning annotation> -n <sample names> -l <left reads> -r <right reads> "
+                "-o <reads output root> -b <bins to propagate> [-D to disable binning] "
+                "[-p <propagation info dump>] [-e <propagated edges dump>]"  << endl;
         exit(1);
     }
 
-    for (const auto& bin_id : bins_of_interest) {
-        VERIFY_MSG(bin_id.find_last_of(',') == std::string::npos, "Specify bins of interest via space, not comma");
+    vector<bin_id> bins_of_interest;
+    ifstream bins_stream(bins_file);
+    bin_id bin;
+    while (!bins_stream.eof()) {
+        bins_stream >> bin;
+        bins_of_interest.push_back(bin);
+        bins_stream.ignore(numeric_limits<std::streamsize>::max(), '\n'); //Skip the rest of bin info
     }
+    INFO("Loaded " << bins_of_interest.size() << " interesting bins");
 
     conj_graph_pack gp(k, "tmp", 1);
     gp.kmer_mapper.Attach();
@@ -96,33 +109,28 @@ int main(int argc, char** argv) {
     EdgeAnnotation edge_annotation = filler(contigs_stream, split_stream, annotation_in);
 
     INFO("Propagation launched");
-    AnnotationPropagator propagator(gp);
+    AnnotationPropagator propagator(gp, length_threshold);
     propagator.Run(contigs_stream, edge_annotation);
     INFO("Propagation finished");
 
+    if (!edges_dump.empty()) {
+        INFO("Dumping propagated edges to " << edges_dump);
+        DumpEdges(gp.g, edges_dump);
+    }
+
     if (!propagation_dump.empty()) {
-        INFO("Dumping propagation info to " << propagation_dump);
-        DumpEdgesAndAnnotation(gp.g, edge_annotation,
-                               propagation_dump + ".fasta",
-                               propagation_dump + ".ann");
+        INFO("Dumping propagated annotation to " << propagation_dump);
+        DumpAnnotation(gp.g, edge_annotation, propagation_dump);
     }
 
+    //Binning stage
     if (no_binning) {
         INFO("Binning was disabled with -p flag");
         return 0;
     }
-    //Binning stage
-//    contigs_stream.reset();
-//    INFO("Using propagated annotation from " << propagated_path);
-//    AnnotationStream binning_stream(propagated_path);
-    for (size_t i = 0; i < sample_names.size(); ++i) {
-        ContigBinner binner(gp, edge_annotation, out_root, sample_names[i]);
-        INFO("Initializing binner for " << sample_names[i]);
-        auto paired_stream = io::PairedEasyStream(left_reads[i], right_reads[i], false, 0);
-        INFO("Running binner on " << left_reads[i] << " and " << right_reads[i]);
-        binner.Run(*paired_stream);
-        binner.close();
-    }
+
+    for (size_t i = 0; i < sample_names.size(); ++i)
+        BinReads(gp, out_root, sample_names[i], left_reads[i], right_reads[i], edge_annotation, bins_of_interest);
 
     return 0;
 }
diff --git a/src/projects/mts/propagate.cpp b/src/projects/mts/propagate.cpp
index be650e8..14e1a82 100644
--- a/src/projects/mts/propagate.cpp
+++ b/src/projects/mts/propagate.cpp
@@ -5,7 +5,7 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 
 //#include "pipeline/graphio.hpp"
 #include "pipeline/graph_pack.hpp"
@@ -15,15 +15,20 @@
 #include "visualization.hpp"
 
 namespace debruijn_graph {
-static const size_t EDGE_LENGTH_THRESHOLD = 2000;
+static const size_t EDGE_LENGTH_THRESHOLD = 2000,
+                    EDGE_UPPER_THRESHOLD = 3000;
 
 //FIXME 2kb edge length threshold might affect tip propagator in undesired way
 class EdgeAnnotationPropagator {
     const conj_graph_pack& gp_;
     const string name_;
-    size_t edge_length_threshold_;
+    size_t edge_length_threshold_, edge_upper_threshold_;
 
 protected:
+    size_t edge_length_threshold() const {
+        return edge_length_threshold_;
+    }
+
     const conj_graph_pack& gp() const {
         return gp_;
     }
@@ -37,10 +42,12 @@ protected:
 public:
     EdgeAnnotationPropagator(const conj_graph_pack& gp,
                              const string& name,
-                             size_t edge_length_threshold = EDGE_LENGTH_THRESHOLD) :
+                             size_t edge_length_threshold = EDGE_LENGTH_THRESHOLD,
+                             size_t edge_upper_threshold = EDGE_UPPER_THRESHOLD) :
                     gp_(gp),
                     name_(name),
-                    edge_length_threshold_(edge_length_threshold) {}
+                    edge_length_threshold_(edge_length_threshold),
+                    edge_upper_threshold_(edge_upper_threshold) {}
 
     const std::string& name() const {
         return name_;
@@ -51,9 +58,20 @@ public:
         DEBUG("Propagating with propagator: " << name_);
         for (bin_id bin : edge_annotation.interesting_bins()) {
             DEBUG("Processing bin " << bin << " with propagator: " << name_);
-            auto init_edges = edge_annotation.EdgesOfBin(bin, edge_length_threshold_);
-            DEBUG("Initial edge cnt " << init_edges.size() << " (edge length threshold " << edge_length_threshold_ << ")");
+            auto init_edges = edge_annotation.EdgesOfBin(bin, edge_length_threshold());
+            DEBUG("Initial edge cnt " << init_edges.size() << " (edge length threshold " << edge_length_threshold() << ")");
             auto raw_propagated = PropagateEdges(init_edges);
+            auto old_size = raw_propagated.size();
+            //Filter
+            size_t n = 0;
+            for (auto i = raw_propagated.begin(); i != raw_propagated.end(); ++n) {
+                DEBUG("Edge cnt: " << raw_propagated.size() << "; iter " << n);
+                if (gp_.g.length(*i) > edge_upper_threshold_)
+                    raw_propagated.erase(i++);
+                else
+                    ++i;
+            }
+            DEBUG("Excluded " << (old_size - raw_propagated.size()) << " >" << edge_upper_threshold_ << "bp edges");
             set<EdgeId> propagated;
             std::set_difference(raw_propagated.begin(), raw_propagated.end(),
                                 init_edges.begin(), init_edges.end(),
@@ -72,40 +90,6 @@ private:
 class ConnectingPathPropagator : public EdgeAnnotationPropagator {
     size_t path_length_threshold_;
     size_t path_edge_cnt_;
-    const EdgeAnnotation& debug_annotation_;
-
-    bin_id DetermineBin(const set<EdgeId>& edges) const {
-        map<bin_id, size_t> cnt_map;
-        for (EdgeId e : edges) {
-            for (auto b : debug_annotation_.Annotation(e)) {
-                cnt_map[b]++;
-            }
-        }
-        bin_id candidate = "";
-        for (auto cnt_el : cnt_map) {
-            if (cnt_el.second > edges.size() / 2) {
-                if (candidate.empty())
-                    candidate = cnt_el.first;
-                else
-                    return "";
-            }
-        }
-        return candidate;
-    }
-
-    bool BadPath(const vector<EdgeId>& path, bin_id base_bin) const {
-        size_t cnt = 0;
-        for (EdgeId e : path) {
-            if (g().length(e) < 2000) 
-                continue;
-            auto ann = debug_annotation_.Annotation(e);
-            if (!ann.empty() &&
-                std::find(ann.begin(), ann.end(), base_bin) == ann.end()) {
-                cnt++;
-            }
-        }
-        return cnt > 0;
-    }
 
     set<VertexId> CollectEdgeStarts(const set<EdgeId>& edges) const {
         set<VertexId> answer;
@@ -116,29 +100,16 @@ class ConnectingPathPropagator : public EdgeAnnotationPropagator {
     }
 
     set<EdgeId> PropagateEdges(const set<EdgeId>& edges) const override {
-        //static size_t pic_cnt = 0;
-        bin_id bin = DetermineBin(edges);
-        if (!bin.empty()) {
-            DEBUG("Bin determined as " << bin);
-        } else {
-            DEBUG("Failed to determine bin");
-        }
+        DEBUG(__FUNCTION__);
         set<EdgeId> answer;
         set<VertexId> starts = CollectEdgeStarts(edges);
         for (EdgeId e : edges) {
             PathProcessor<Graph> path_searcher(g(), g().EdgeEnd(e), path_length_threshold_);
             for (VertexId v : starts) {
                 auto callback = AdapterCallback<Graph>([&](const vector<EdgeId>& path) {
-                    //if (pic_cnt < 10) {
-                    //if (BadPath(path, bin)) {
-                    //    auto to_draw = path;
-                    //    to_draw.insert(to_draw.begin(), e);
-                    //    PrintAnnotatedAlongPath(gp(), to_draw, debug_annotation_, "/home/snurk/tmp/pics/pic_" + ToString(++pic_cnt) + "_");
-                    //}
-                    //}
-                    insert_all(answer, path);
+                    utils::insert_all(answer, path);
                 }, true);
-                TRACE("Launching path search between edge " << g().str(e) << " and vertex "
+                DEBUG("Launching path search between edge " << g().str(e) << " and vertex "
                         << g().str(v) << " with length bound " << path_length_threshold_);
                 path_searcher.Process(v, 0, path_length_threshold_, callback, path_edge_cnt_);
             }
@@ -148,13 +119,12 @@ class ConnectingPathPropagator : public EdgeAnnotationPropagator {
 
 public:
     ConnectingPathPropagator(const conj_graph_pack& gp,
-                             size_t path_length_threshold, 
-                             size_t path_edge_cnt,
-                             const EdgeAnnotation& ann) :
-        EdgeAnnotationPropagator(gp, "ConnectingPath"),
+                             size_t length_threshold,
+                             size_t path_length_threshold,
+                             size_t path_edge_cnt) :
+        EdgeAnnotationPropagator(gp, "ConnectingPath", length_threshold),
         path_length_threshold_(path_length_threshold),
-        path_edge_cnt_(path_edge_cnt),
-        debug_annotation_(ann) {}
+        path_edge_cnt_(path_edge_cnt) {}
 
 private:
     DECL_LOGGER("ConnectingPathPropagator");
@@ -173,13 +143,15 @@ class PairedInfoPropagator : public EdgeAnnotationPropagator {
                         if (math::ge(point.weight, weight_threshold_)) {
                             DEBUG("Adding (" << g().str(e1) << "," << g().str(i.first) << "); " << point);
                             answer.insert(i.first);
-                        }	    
+                        }
         }
         return answer;
     }
 public:
-    PairedInfoPropagator(const conj_graph_pack& gp, omnigraph::de::DEWeight threshold):
-        EdgeAnnotationPropagator(gp, "PairedInfo"), weight_threshold_(threshold) {}
+    PairedInfoPropagator(const conj_graph_pack& gp,
+                         size_t length_threshold,
+                         omnigraph::de::DEWeight threshold):
+        EdgeAnnotationPropagator(gp, "PairedInfo", length_threshold), weight_threshold_(threshold) {}
 private:
     DECL_LOGGER("PairedInfoPropagator");
 };
@@ -202,9 +174,9 @@ protected:
             auto edges_of_contig = mapper_->MapRead(contig).simple_path();
             for (EdgeId e : edges_of_contig) {
                 if (edges.count(e)) {
-                    DEBUG("Edge " << gp().g.str(e) << " belongs to the contig #" << 
+                    DEBUG("Edge " << gp().g.str(e) << " belongs to the contig #" <<
                             contig.name() << " of " << edges_of_contig.size() << " edges");
-                    insert_all(answer, edges_of_contig);
+                    utils::insert_all(answer, edges_of_contig);
                     break;
                 }
             }
@@ -222,8 +194,8 @@ private:
 class TipPropagator : public EdgeAnnotationPropagator {
 
 public:
-    TipPropagator(const conj_graph_pack& gp) :
-        EdgeAnnotationPropagator(gp, "TipPropagator"), tipper_(gp.g) {}
+    TipPropagator(const conj_graph_pack& gp, size_t length_threshold) :
+        EdgeAnnotationPropagator(gp, "TipPropagator", length_threshold), tipper_(gp.g) {}
 
 protected:
     set<EdgeId> PropagateEdges(const set<EdgeId>& edges) const override {
@@ -290,13 +262,13 @@ private:
     DECL_LOGGER("AnnotationChecker");
 };
 
-void AnnotationPropagator::Run(io::SingleStream& /*contigs*/, 
+void AnnotationPropagator::Run(io::SingleStream& /*contigs*/,
                      EdgeAnnotation& edge_annotation
                      /*const string& annotation_out_fn*/) {
     std::vector<std::shared_ptr<EdgeAnnotationPropagator>> propagator_pipeline {
-        std::make_shared<ConnectingPathPropagator>(gp_, 8000, 10, edge_annotation),
-        std::make_shared<TipPropagator>(gp_), 
-        std::make_shared<PairedInfoPropagator>(gp_, 10.)};//,
+        make_propagator<ConnectingPathPropagator>(8000, 10),
+        make_propagator<TipPropagator>(),
+        make_propagator<PairedInfoPropagator>(10.)};//,
 //        std::make_shared<ContigPropagator>(gp_, contigs)};//,
 //        std::make_shared<ConnectingPathPropagator>(gp_, 8000, 10, edge_annotation),
 //        std::make_shared<ContigPropagator>(gp_, contigs),
diff --git a/src/projects/mts/propagate.hpp b/src/projects/mts/propagate.hpp
index 1c3ce0f..0f74c0a 100644
--- a/src/projects/mts/propagate.hpp
+++ b/src/projects/mts/propagate.hpp
@@ -14,10 +14,16 @@ namespace debruijn_graph {
 
 class AnnotationPropagator {
     const conj_graph_pack& gp_;
+    size_t length_threshold_;
 
 public:
-    AnnotationPropagator(const conj_graph_pack& gp) :
-                     gp_(gp) {
+    AnnotationPropagator(const conj_graph_pack& gp, size_t length_threshold) :
+                     gp_(gp), length_threshold_(length_threshold) {
+    }
+
+    template<typename Result, typename... Args>
+    std::shared_ptr<Result> make_propagator(Args... args) {
+        return std::make_shared<Result>(gp_, length_threshold_, args...);
     }
 
     void Run(io::SingleStream& contigs, EdgeAnnotation& edge_annotation);
diff --git a/src/projects/mts/read_binning.cpp b/src/projects/mts/read_binning.cpp
index ac2dea2..148f1f3 100644
--- a/src/projects/mts/read_binning.cpp
+++ b/src/projects/mts/read_binning.cpp
@@ -5,7 +5,7 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include "utils/logger/log_writers.hpp"
 
 #include "pipeline/graphio.hpp"
@@ -19,10 +19,12 @@ set<bin_id> ContigBinner::RelevantBins(const io::SingleRead& r) const {
 }
 
 void ContigBinner::Init(bin_id bin) {
-    string out_dir = out_root_ + "/" + ToString(bin) + "/";
-    path::make_dirs(out_dir);
-    out_streams_.insert(make_pair(bin, make_shared<io::OPairedReadStream>(out_dir + sample_name_ + "_1.fastq",
-                                                                          out_dir + sample_name_ + "_2.fastq")));
+    string out_dir = out_root_ + "/" + bin + "/";
+    fs::make_dirs(out_dir);
+    out_streams_.insert(make_pair(bin, make_shared<ContigBinner::Stream>(
+        out_dir + sample_name_ + "_1.fastq.gz",
+        out_dir + sample_name_ + "_2.fastq.gz")
+    ));
 }
 
 void ContigBinner::Run(io::PairedStream& paired_reads) {
@@ -30,9 +32,13 @@ void ContigBinner::Run(io::PairedStream& paired_reads) {
     while (!paired_reads.eof()) {
         paired_reads >> paired_read;
         set<bin_id> bins;
-        insert_all(bins, RelevantBins(paired_read.first()));
-        insert_all(bins, RelevantBins(paired_read.second()));
-        for (auto bin : bins) {
+        utils::insert_all(bins, RelevantBins(paired_read.first()));
+        utils::insert_all(bins, RelevantBins(paired_read.second()));
+        for (const auto& bin : bins) {
+            if (bins_of_interest_.size() && !bins_of_interest_.count(bin)) {
+                INFO(bin << " was excluded from read binning");
+                continue;
+            }
             if (out_streams_.find(bin) == out_streams_.end()) {
                 Init(bin);
             }
@@ -41,50 +47,18 @@ void ContigBinner::Run(io::PairedStream& paired_reads) {
     }
 }
 
-};
-
-//todo make it take dataset info
-/*
-int main(int argc, char** argv) {
-    using namespace debruijn_graph;
-
-    if (argc < 9) {
-        cout << "Usage: read_binning <K> <saves path> <contigs path> <contigs binning info> "
-                "<left reads> <right reads> <output root> <sample name> (<bins of interest>)*"  << endl;
-        exit(1);
-    }
-
-    //TmpFolderFixture fixture("tmp");
-    create_console_logger();
-    size_t k = lexical_cast<size_t>(argv[1]);
-    string saves_path = argv[2];
-    string contigs_path = argv[3];
-    string contigs_binning_path = argv[4];
-    string left_reads = argv[5];
-    string right_reads = argv[6];
-    string out_root = argv[7];
-    string sample_name = argv[8];
-
-    std::vector<bin_id> bins_of_interest;
-    for (int i = 9; i < argc; ++i) {
-        bins_of_interest.push_back(argv[i]);
-    }
-
-    conj_graph_pack gp(k, "tmp", 0);
-    gp.kmer_mapper.Attach();
-    INFO("Load graph from " << saves_path);
-    graphio::ScanGraphPack(saves_path, gp);
-
-    ContigBinner binner(gp, bins_of_interest);
-
-    auto contigs_stream_ptr = make_shared<io::FileReadStream>(contigs_path);
-    AnnotationStream binning_stream(contigs_binning_path);
-
-    binner.Init(out_root, sample_name, *contigs_stream_ptr, binning_stream);
-
+int BinReads(const conj_graph_pack& gp, const std::string& out_root,
+             const std::string& sample,
+             const std::string& left_reads, const std::string& right_reads,
+             const EdgeAnnotation& edge_annotation,
+             const vector<string>& bins_of_interest) {
+    ContigBinner binner(gp, edge_annotation, out_root, sample, bins_of_interest);
+    INFO("Initializing binner for " << sample);
     auto paired_stream = io::PairedEasyStream(left_reads, right_reads, false, 0);
+    INFO("Running binner on " << left_reads << " and " << right_reads);
     binner.Run(*paired_stream);
     binner.close();
     return 0;
 }
-*/
+
+};
diff --git a/src/projects/mts/read_binning.hpp b/src/projects/mts/read_binning.hpp
index 87aeadd..014dba6 100644
--- a/src/projects/mts/read_binning.hpp
+++ b/src/projects/mts/read_binning.hpp
@@ -8,15 +8,17 @@
 
 #include "annotation.hpp"
 #include "io/reads/io_helper.hpp"
+#include "gzstream/gzstream.h"
 
 namespace io {
 
+template<typename Stream>
 class OSingleReadStream {
-    std::ofstream os_;
+    Stream os_;
 
 public:
-    OSingleReadStream(const std::string& fn) :
-        os_(fn) {
+    OSingleReadStream(const std::string& fn) {
+        os_.open(fn.c_str());
     }
 
     OSingleReadStream& operator<<(const SingleRead& read) {
@@ -32,9 +34,10 @@ public:
     }
 };
 
+template<typename Stream>
 class OPairedReadStream {
-    OSingleReadStream l_os_;
-    OSingleReadStream r_os_;
+    OSingleReadStream<Stream> l_os_;
+    OSingleReadStream<Stream> r_os_;
 
 public:
     OPairedReadStream(const std::string& l_fn, const std::string& r_fn) :
@@ -63,23 +66,27 @@ class ContigBinner {
     std::string out_root_;
     std::string sample_name_;
     shared_ptr<SequenceMapper<Graph>> mapper_;
+    std::set<std::string> bins_of_interest_;
 
-    map<bin_id, std::shared_ptr<io::OPairedReadStream>> out_streams_;
+    typedef io::OPairedReadStream<ogzstream> Stream;
+    map<bin_id, std::shared_ptr<Stream>> out_streams_;
 
     set<bin_id> RelevantBins(const io::SingleRead& r) const;
 
     void Init(bin_id bin);
 
 public:
-    ContigBinner(const conj_graph_pack& gp, 
+    ContigBinner(const conj_graph_pack& gp,
                  const EdgeAnnotation& edge_annotation,
                  const std::string& out_root,
-                 const std::string& sample_name) :
+                 const std::string& sample_name,
+                 const std::vector<std::string>& bins_of_interest = {}) :
                      gp_(gp),
                      edge_annotation_(edge_annotation),
                      out_root_(out_root),
                      sample_name_(sample_name),
-                     mapper_(MapperInstance(gp)) {
+                     mapper_(MapperInstance(gp)),
+                     bins_of_interest_(bins_of_interest.begin(), bins_of_interest.end()) {
     }
 
     void Run(io::PairedStream& paired_reads);
@@ -89,4 +96,10 @@ public:
     }
 };
 
+int BinReads(const conj_graph_pack& gp, const std::string& out_root,
+             const std::string& sample,
+             const std::string& left_reads, const std::string& right_reads,
+             const EdgeAnnotation& edge_annotation,
+             const vector<string>& bins_of_interest);
+
 }
diff --git a/src/projects/mts/scripts/Dataset_analysis.ipynb b/src/projects/mts/scripts/Dataset_analysis.ipynb
new file mode 100644
index 0000000..53265f1
--- /dev/null
+++ b/src/projects/mts/scripts/Dataset_analysis.ipynb
@@ -0,0 +1,2246 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Load contig profiles and binning results.\n",
+    "# TODO: Make profiles and binning results the same for all algos.\n",
+    "profile = pd.read_csv(\"/Users/tanunia/PycharmProjects/biolab_scripts/canopy_profiles.in\", sep=\" \", header=None)\n",
+    "clusters = pd.read_csv(\"/Users/tanunia/PycharmProjects/biolab_scripts/canopy_binning.tsv\", sep=\"\\t\", header=None)\n",
+    "\n",
+    "# Add binning column to profile\n",
+    "clusters = clusters.rename(columns={1:'contig', 0:'color'})\n",
+    "cols = clusters.columns\n",
+    "clusters = clusters[cols[::-1]]\n",
+    "clusters[\"color\"] = clusters[\"color\"].apply(lambda x: int(x[3:]))\n",
+    "profile = profile.rename(columns={0:'contig'})\n",
+    "profile = pd.merge(clusters, profile, on='contig')\n",
+    "#profile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>color</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>10</th>\n",
+       "      <th>11</th>\n",
+       "      <th>12</th>\n",
+       "      <th>13</th>\n",
+       "      <th>14</th>\n",
+       "      <th>15</th>\n",
+       "      <th>16</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "      <td>874229.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>114.345023</td>\n",
+       "      <td>7.021815</td>\n",
+       "      <td>9.682457</td>\n",
+       "      <td>16.023759</td>\n",
+       "      <td>12.826631</td>\n",
+       "      <td>11.029218</td>\n",
+       "      <td>9.773265</td>\n",
+       "      <td>15.424325</td>\n",
+       "      <td>12.309418</td>\n",
+       "      <td>15.037634</td>\n",
+       "      <td>11.235127</td>\n",
+       "      <td>17.829746</td>\n",
+       "      <td>19.183485</td>\n",
+       "      <td>13.395388</td>\n",
+       "      <td>19.277283</td>\n",
+       "      <td>14.636003</td>\n",
+       "      <td>11.643889</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>175.732041</td>\n",
+       "      <td>22.349057</td>\n",
+       "      <td>37.198960</td>\n",
+       "      <td>35.921960</td>\n",
+       "      <td>36.780672</td>\n",
+       "      <td>36.521695</td>\n",
+       "      <td>28.569736</td>\n",
+       "      <td>45.451414</td>\n",
+       "      <td>36.727628</td>\n",
+       "      <td>38.228719</td>\n",
+       "      <td>26.672553</td>\n",
+       "      <td>32.773979</td>\n",
+       "      <td>50.958231</td>\n",
+       "      <td>34.555440</td>\n",
+       "      <td>51.378334</td>\n",
+       "      <td>27.745981</td>\n",
+       "      <td>36.868664</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>20.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>127.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>21.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>14.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>24.000000</td>\n",
+       "      <td>20.000000</td>\n",
+       "      <td>14.000000</td>\n",
+       "      <td>18.000000</td>\n",
+       "      <td>20.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>1899.000000</td>\n",
+       "      <td>2962.000000</td>\n",
+       "      <td>2890.000000</td>\n",
+       "      <td>4656.000000</td>\n",
+       "      <td>4791.000000</td>\n",
+       "      <td>5243.000000</td>\n",
+       "      <td>3941.000000</td>\n",
+       "      <td>6316.000000</td>\n",
+       "      <td>3737.000000</td>\n",
+       "      <td>4412.000000</td>\n",
+       "      <td>3137.000000</td>\n",
+       "      <td>3693.000000</td>\n",
+       "      <td>6464.000000</td>\n",
+       "      <td>3996.000000</td>\n",
+       "      <td>5213.000000</td>\n",
+       "      <td>3440.000000</td>\n",
+       "      <td>4765.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               color              1              2              3  \\\n",
+       "count  874229.000000  874229.000000  874229.000000  874229.000000   \n",
+       "mean      114.345023       7.021815       9.682457      16.023759   \n",
+       "std       175.732041      22.349057      37.198960      35.921960   \n",
+       "min         1.000000       0.000000       0.000000       0.000000   \n",
+       "25%        20.000000       0.000000       0.000000       2.000000   \n",
+       "50%        61.000000       3.000000       2.000000       7.000000   \n",
+       "75%       127.000000       7.000000       8.000000      21.000000   \n",
+       "max      1899.000000    2962.000000    2890.000000    4656.000000   \n",
+       "\n",
+       "                   4              5              6              7  \\\n",
+       "count  874229.000000  874229.000000  874229.000000  874229.000000   \n",
+       "mean       12.826631      11.029218       9.773265      15.424325   \n",
+       "std        36.780672      36.521695      28.569736      45.451414   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         0.000000       0.000000       0.000000       2.000000   \n",
+       "50%         5.000000       4.000000       4.000000       5.000000   \n",
+       "75%        12.000000      10.000000       9.000000      15.000000   \n",
+       "max      4791.000000    5243.000000    3941.000000    6316.000000   \n",
+       "\n",
+       "                   8              9             10             11  \\\n",
+       "count  874229.000000  874229.000000  874229.000000  874229.000000   \n",
+       "mean       12.309418      15.037634      11.235127      17.829746   \n",
+       "std        36.727628      38.228719      26.672553      32.773979   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         0.000000       2.000000       2.000000       4.000000   \n",
+       "50%         4.000000       5.000000       5.000000       9.000000   \n",
+       "75%        10.000000      14.000000      12.000000      24.000000   \n",
+       "max      3737.000000    4412.000000    3137.000000    3693.000000   \n",
+       "\n",
+       "                  12             13             14             15  \\\n",
+       "count  874229.000000  874229.000000  874229.000000  874229.000000   \n",
+       "mean       19.183485      13.395388      19.277283      14.636003   \n",
+       "std        50.958231      34.555440      51.378334      27.745981   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         3.000000       2.000000       3.000000       4.000000   \n",
+       "50%         9.000000       5.000000       7.000000       8.000000   \n",
+       "75%        20.000000      14.000000      18.000000      20.000000   \n",
+       "max      6464.000000    3996.000000    5213.000000    3440.000000   \n",
+       "\n",
+       "                  16  \n",
+       "count  874229.000000  \n",
+       "mean       11.643889  \n",
+       "std        36.868664  \n",
+       "min         0.000000  \n",
+       "25%         0.000000  \n",
+       "50%         4.000000  \n",
+       "75%        11.000000  \n",
+       "max      4765.000000  "
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Information about profile\n",
+    "profile.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>color</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>10</th>\n",
+       "      <th>11</th>\n",
+       "      <th>12</th>\n",
+       "      <th>13</th>\n",
+       "      <th>14</th>\n",
+       "      <th>15</th>\n",
+       "      <th>16</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "      <td>350711.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>21.828813</td>\n",
+       "      <td>6.711198</td>\n",
+       "      <td>12.784426</td>\n",
+       "      <td>22.158333</td>\n",
+       "      <td>16.605630</td>\n",
+       "      <td>12.973206</td>\n",
+       "      <td>11.563843</td>\n",
+       "      <td>18.747259</td>\n",
+       "      <td>14.536362</td>\n",
+       "      <td>19.205451</td>\n",
+       "      <td>13.463379</td>\n",
+       "      <td>21.720180</td>\n",
+       "      <td>23.329459</td>\n",
+       "      <td>19.492075</td>\n",
+       "      <td>28.524683</td>\n",
+       "      <td>19.643273</td>\n",
+       "      <td>17.859403</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>21.429488</td>\n",
+       "      <td>17.495651</td>\n",
+       "      <td>47.772374</td>\n",
+       "      <td>29.768226</td>\n",
+       "      <td>30.134884</td>\n",
+       "      <td>21.407159</td>\n",
+       "      <td>19.749674</td>\n",
+       "      <td>28.541668</td>\n",
+       "      <td>32.811062</td>\n",
+       "      <td>27.869045</td>\n",
+       "      <td>18.852696</td>\n",
+       "      <td>21.706461</td>\n",
+       "      <td>36.840619</td>\n",
+       "      <td>32.843480</td>\n",
+       "      <td>54.120753</td>\n",
+       "      <td>19.491147</td>\n",
+       "      <td>33.627913</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>17.000000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>14.000000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>30.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>30.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>18.000000</td>\n",
+       "      <td>31.000000</td>\n",
+       "      <td>29.000000</td>\n",
+       "      <td>19.000000</td>\n",
+       "      <td>28.000000</td>\n",
+       "      <td>28.000000</td>\n",
+       "      <td>21.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>122.000000</td>\n",
+       "      <td>922.000000</td>\n",
+       "      <td>2890.000000</td>\n",
+       "      <td>965.000000</td>\n",
+       "      <td>1499.000000</td>\n",
+       "      <td>1290.000000</td>\n",
+       "      <td>1281.000000</td>\n",
+       "      <td>1273.000000</td>\n",
+       "      <td>1922.000000</td>\n",
+       "      <td>1199.000000</td>\n",
+       "      <td>1492.000000</td>\n",
+       "      <td>925.000000</td>\n",
+       "      <td>2252.000000</td>\n",
+       "      <td>1641.000000</td>\n",
+       "      <td>3422.000000</td>\n",
+       "      <td>897.000000</td>\n",
+       "      <td>2124.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               color              1              2              3  \\\n",
+       "count  350711.000000  350711.000000  350711.000000  350711.000000   \n",
+       "mean       21.828813       6.711198      12.784426      22.158333   \n",
+       "std        21.429488      17.495651      47.772374      29.768226   \n",
+       "min         1.000000       0.000000       0.000000       0.000000   \n",
+       "25%         6.000000       0.000000       0.000000       4.000000   \n",
+       "50%        15.000000       3.000000       4.000000      12.000000   \n",
+       "75%        30.000000       7.000000      12.000000      30.000000   \n",
+       "max       122.000000     922.000000    2890.000000     965.000000   \n",
+       "\n",
+       "                   4              5              6              7  \\\n",
+       "count  350711.000000  350711.000000  350711.000000  350711.000000   \n",
+       "mean       16.605630      12.973206      11.563843      18.747259   \n",
+       "std        30.134884      21.407159      19.749674      28.541668   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         2.000000       2.000000       2.000000       3.000000   \n",
+       "50%         7.000000       6.000000       6.000000       8.000000   \n",
+       "75%        15.000000      12.000000      10.000000      23.000000   \n",
+       "max      1499.000000    1290.000000    1281.000000    1273.000000   \n",
+       "\n",
+       "                   8              9             10             11  \\\n",
+       "count  350711.000000  350711.000000  350711.000000  350711.000000   \n",
+       "mean       14.536362      19.205451      13.463379      21.720180   \n",
+       "std        32.811062      27.869045      18.852696      21.706461   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         3.000000       4.000000       4.000000       7.000000   \n",
+       "50%         6.000000      10.000000       9.000000      17.000000   \n",
+       "75%        13.000000      23.000000      18.000000      31.000000   \n",
+       "max      1922.000000    1199.000000    1492.000000     925.000000   \n",
+       "\n",
+       "                  12             13             14             15  \\\n",
+       "count  350711.000000  350711.000000  350711.000000  350711.000000   \n",
+       "mean       23.329459      19.492075      28.524683      19.643273   \n",
+       "std        36.840619      32.843480      54.120753      19.491147   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         6.000000       5.000000       6.000000       7.000000   \n",
+       "50%        13.000000      10.000000      14.000000      13.000000   \n",
+       "75%        29.000000      19.000000      28.000000      28.000000   \n",
+       "max      2252.000000    1641.000000    3422.000000     897.000000   \n",
+       "\n",
+       "                  16  \n",
+       "count  350711.000000  \n",
+       "mean       17.859403  \n",
+       "std        33.627913  \n",
+       "min         0.000000  \n",
+       "25%         3.000000  \n",
+       "50%         7.000000  \n",
+       "75%        21.000000  \n",
+       "max      2124.000000  "
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Leave only clusters with significant contig length in profile\n",
+    "\n",
+    "#CANOPY: bin_info$third_largest > 3 000 000\n",
+    "cag_str = \"CAG0001 CAG0002 CAG0004 CAG0003 CAG0005 CAG0008 CAG0007 CAG0006 CAG0010 CAG0015 CAG0014 CAG0009 CAG0012 CAG0074 CAG0018 CAG0040 CAG0016 CAG0029 CAG0013 CAG0017 CAG0021 CAG0020 CAG0085 CAG0019 CAG0028 CAG0047 CAG0057 CAG0032 CAG0039 CAG0027 CAG0024 CAG0122 CAG0062 CAG0048 CAG0030 CAG0022 CAG0025 CAG0056 CAG0071 CAG0077 CAG0049 CAG0034 CAG0023 CAG0051 CAG0036 CAG0059\"\n",
+    "filter1 = [int(x[3:]) for x in cag_str.split(\" \")]\n",
+    "#CONCOCT: bin_info$third_largest > 20 000 000 \n",
+    "#filter1 = [89, 243, 312, 278, 109, 250, 60, 59, 195, 277, 190, 394, 311, 301, 333, 51, 143, 327, 338, 147, 256, 163, 18, 141, 134, 317, 81, 371, 288, 216, 388, 135, 71, 341, 367, 92, 232, 119, 252, 293, 361, 350, 168]\n",
+    "profile = profile[profile[\"color\"].isin(filter1)]\n",
+    "\n",
+    "# New profile info\n",
+    "profile.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Get fraction of profile - profile_small. Normalize profile_small data (like in CONCOCT) and convert it to numpy array\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "profile_small = profile.sample(frac=0.1)\n",
+    "data = profile_small.as_matrix(columns = profile.columns[2:])\n",
+    "v = (1.0/2000)\n",
+    "data = data + v\n",
+    "along_Y = np.apply_along_axis(sum, 0, data)\n",
+    "data = data/along_Y[None, :]\n",
+    "along_X = np.apply_along_axis(sum, 1, data)\n",
+    "data = data/along_X[:, None]\n",
+    "data = np.log(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Run bht-sne\n",
+    "path_bhtsne = '/Users/tanunia/PycharmProjects/biolab_t-sne/'\n",
+    "\n",
+    "# Save profile_small to tsv file\n",
+    "np.savetxt(\"data.in\", data, delimiter=\"\\t\")\n",
+    "\n",
+    "import sys, os\n",
+    "os.system(path_bhtsne + 'bhtsne.py -p 50 -m 1000 -i data.in -o data.out')\n",
+    "\n",
+    "# Load coordinates from data.out\n",
+    "ar = np.loadtxt(\"data.out\", delimiter=\"\\t\")\n",
+    "len(ar[:, 0])\n",
+    "\n",
+    "# Save bhtsne result to profile_small\n",
+    "profile_small[\"x\"] = ar[:, 0]\n",
+    "profile_small[\"y\"] = ar[:, 1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXd8VMX6h58529N7SIAAoYMKCKgoKnYBr4qoYAMrXtu1\n/VSu5XrVa+/92kUFFBA7erEAigoKCKFDICEJ6b1tP/P7YzfJJrtJNsmSgJ6Hz37IzpkzM2ez+Z45\n77zvO0JKiYaGhobGnx+lpwegoaGhodE9aIKvoaGh8RdBE3wNDQ2Nvwia4GtoaGj8RdAEX0NDQ+Mv\ngib4GhoaGn8RNMHX0NDQ+IugCb6GhobGX4SgBV8I8bYQolgIscWnLE4I8a0QYrf3/1ifY/8UQmQK\nIXYKIc4I9cA1NDQ0NDqGCDbSVghxAlALvCelPMxb9gRQLqV8TAgxF4iVUt4lhBgBLASOAlKB74Ah\nUkp3 [...]
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x127b81710>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Draw bhtsne result for a fraction of profile - profile_small\n",
+    "\n",
+    "from matplotlib import pyplot as plt\n",
+    "import matplotlib.cm as cm\n",
+    "\n",
+    "color = profile_small[\"color\"].tolist()\n",
+    "mx_color = max(color)\n",
+    "plt.scatter(profile_small[\"x\"], profile_small[\"y\"], c=[cm.spectral(float(i) /mx_color) for i in color])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Run PCA on profile_small\n",
+    "import numpy as np\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "pcaed = pca.fit(data).transform(data)\n",
+    "profile_small[\"x_pca\"] = pcaed[:, 0]\n",
+    "profile_small[\"y_pca\"] = pcaed[:, 1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXecFEX6h5/qnrizEXaB3YUlJwmCgAqICTBiDocieofh\nzGeOd3hGMOKZM4o/D0QFMXCAICZAyTktYVmWzXknz3TX74+ZzbMBWCT1w4fPznSoemem+1vVb731\nlpBSYmBgYGBw7KMcbgMMDAwMDP4cDME3MDAwOE4wBN/AwMDgOMEQfAMDA4PjBEPwDQwMDI4TDME3\nMDAwOE44aMEXQnQQQiwWQmwWQmwSQvwjvL2VEOIHIUR6+G/CwZtrYGBgYHCgiIONwxdCJAPJUsrV\nQogYYBVwKfBXoFhKOVkI8QiQIKV8+GANNjAwMDA4MA66hy+lzJFSrg6/rgC2AKnAJcAn4cM+IdQI\nGBgY [...]
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x128162650>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Draw PCA for profile_small\n",
+    "plt.scatter(profile_small[\"x_pca\"], profile_small[\"y_pca\"], c=[cm.spectral(float(i) /mx_color) for i in color])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'plt' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-550c05617b60>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprofile\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"color\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malign\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'left'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m \u001b[0;34m=\u001b[0m \u0 [...]
+      "\u001b[0;31mNameError\u001b[0m: name 'plt' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "# Draw histogram with number of contigs in each bin\n",
+    "plt.hist(profile[\"color\"], align='left', bins = 400)\n",
+    "plt.title(\"Number of contigs in each bin\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>10</th>\n",
+       "      <th>11</th>\n",
+       "      <th>12</th>\n",
+       "      <th>13</th>\n",
+       "      <th>14</th>\n",
+       "      <th>15</th>\n",
+       "      <th>16</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>color</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8.876186</td>\n",
+       "      <td>18.261282</td>\n",
+       "      <td>51.187960</td>\n",
+       "      <td>6.134054</td>\n",
+       "      <td>12.633700</td>\n",
+       "      <td>5.477037</td>\n",
+       "      <td>6.014585</td>\n",
+       "      <td>5.304814</td>\n",
+       "      <td>6.874191</td>\n",
+       "      <td>12.439046</td>\n",
+       "      <td>29.793732</td>\n",
+       "      <td>22.665219</td>\n",
+       "      <td>8.702589</td>\n",
+       "      <td>11.272143</td>\n",
+       "      <td>18.419851</td>\n",
+       "      <td>3.956024</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2.989724</td>\n",
+       "      <td>1.488277</td>\n",
+       "      <td>7.840371</td>\n",
+       "      <td>21.469497</td>\n",
+       "      <td>5.473986</td>\n",
+       "      <td>6.484202</td>\n",
+       "      <td>27.589086</td>\n",
+       "      <td>8.524538</td>\n",
+       "      <td>36.255537</td>\n",
+       "      <td>13.439261</td>\n",
+       "      <td>16.893876</td>\n",
+       "      <td>16.096439</td>\n",
+       "      <td>17.427745</td>\n",
+       "      <td>16.574618</td>\n",
+       "      <td>21.106419</td>\n",
+       "      <td>25.184315</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3.720807</td>\n",
+       "      <td>12.429749</td>\n",
+       "      <td>28.174160</td>\n",
+       "      <td>5.869552</td>\n",
+       "      <td>8.563257</td>\n",
+       "      <td>5.155427</td>\n",
+       "      <td>7.075434</td>\n",
+       "      <td>5.348820</td>\n",
+       "      <td>7.066567</td>\n",
+       "      <td>11.246722</td>\n",
+       "      <td>28.066567</td>\n",
+       "      <td>20.510428</td>\n",
+       "      <td>12.535407</td>\n",
+       "      <td>14.223367</td>\n",
+       "      <td>19.688710</td>\n",
+       "      <td>5.910453</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5.762003</td>\n",
+       "      <td>1.769082</td>\n",
+       "      <td>14.549234</td>\n",
+       "      <td>30.423671</td>\n",
+       "      <td>8.715794</td>\n",
+       "      <td>9.015703</td>\n",
+       "      <td>42.401146</td>\n",
+       "      <td>11.751899</td>\n",
+       "      <td>45.332861</td>\n",
+       "      <td>20.204209</td>\n",
+       "      <td>22.506758</td>\n",
+       "      <td>23.953855</td>\n",
+       "      <td>23.170228</td>\n",
+       "      <td>23.510362</td>\n",
+       "      <td>27.957910</td>\n",
+       "      <td>33.567834</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>2.803744</td>\n",
+       "      <td>2.260464</td>\n",
+       "      <td>5.384254</td>\n",
+       "      <td>9.709994</td>\n",
+       "      <td>6.948961</td>\n",
+       "      <td>2.923761</td>\n",
+       "      <td>7.159026</td>\n",
+       "      <td>2.579015</td>\n",
+       "      <td>17.632118</td>\n",
+       "      <td>6.025057</td>\n",
+       "      <td>10.539294</td>\n",
+       "      <td>11.215262</td>\n",
+       "      <td>12.417924</td>\n",
+       "      <td>20.254556</td>\n",
+       "      <td>11.322110</td>\n",
+       "      <td>21.699245</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>0.597182</td>\n",
+       "      <td>11.346204</td>\n",
+       "      <td>19.132968</td>\n",
+       "      <td>8.455170</td>\n",
+       "      <td>5.968345</td>\n",
+       "      <td>7.536916</td>\n",
+       "      <td>9.468041</td>\n",
+       "      <td>6.318810</td>\n",
+       "      <td>9.369510</td>\n",
+       "      <td>12.423428</td>\n",
+       "      <td>41.204192</td>\n",
+       "      <td>28.746239</td>\n",
+       "      <td>18.627272</td>\n",
+       "      <td>21.547352</td>\n",
+       "      <td>27.557788</td>\n",
+       "      <td>10.249935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2.218029</td>\n",
+       "      <td>10.157114</td>\n",
+       "      <td>23.773194</td>\n",
+       "      <td>5.336961</td>\n",
+       "      <td>3.908277</td>\n",
+       "      <td>5.085754</td>\n",
+       "      <td>7.689371</td>\n",
+       "      <td>4.638199</td>\n",
+       "      <td>6.644870</td>\n",
+       "      <td>9.314052</td>\n",
+       "      <td>27.603528</td>\n",
+       "      <td>21.129202</td>\n",
+       "      <td>12.519881</td>\n",
+       "      <td>14.149741</td>\n",
+       "      <td>16.151058</td>\n",
+       "      <td>8.002107</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1.575741</td>\n",
+       "      <td>10.346771</td>\n",
+       "      <td>22.638090</td>\n",
+       "      <td>5.676162</td>\n",
+       "      <td>3.974290</td>\n",
+       "      <td>5.669085</td>\n",
+       "      <td>8.581475</td>\n",
+       "      <td>4.796650</td>\n",
+       "      <td>7.207829</td>\n",
+       "      <td>9.250739</td>\n",
+       "      <td>28.592135</td>\n",
+       "      <td>22.005285</td>\n",
+       "      <td>13.801397</td>\n",
+       "      <td>15.090119</td>\n",
+       "      <td>17.547433</td>\n",
+       "      <td>8.376870</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>5.283090</td>\n",
+       "      <td>17.775891</td>\n",
+       "      <td>46.993908</td>\n",
+       "      <td>6.290751</td>\n",
+       "      <td>6.759553</td>\n",
+       "      <td>4.634946</td>\n",
+       "      <td>4.491508</td>\n",
+       "      <td>3.912036</td>\n",
+       "      <td>6.604947</td>\n",
+       "      <td>5.856747</td>\n",
+       "      <td>15.012368</td>\n",
+       "      <td>18.485785</td>\n",
+       "      <td>6.398652</td>\n",
+       "      <td>8.899114</td>\n",
+       "      <td>15.555289</td>\n",
+       "      <td>4.008030</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>3.452875</td>\n",
+       "      <td>0.668882</td>\n",
+       "      <td>5.485553</td>\n",
+       "      <td>15.041078</td>\n",
+       "      <td>5.360934</td>\n",
+       "      <td>7.015324</td>\n",
+       "      <td>15.554694</td>\n",
+       "      <td>9.098680</td>\n",
+       "      <td>6.356780</td>\n",
+       "      <td>10.844457</td>\n",
+       "      <td>4.134404</td>\n",
+       "      <td>9.089634</td>\n",
+       "      <td>4.111142</td>\n",
+       "      <td>7.369242</td>\n",
+       "      <td>10.711437</td>\n",
+       "      <td>6.492015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>8.246084</td>\n",
+       "      <td>15.033800</td>\n",
+       "      <td>32.710120</td>\n",
+       "      <td>5.181884</td>\n",
+       "      <td>12.197754</td>\n",
+       "      <td>3.953421</td>\n",
+       "      <td>3.386129</td>\n",
+       "      <td>8.057811</td>\n",
+       "      <td>4.936727</td>\n",
+       "      <td>5.820899</td>\n",
+       "      <td>18.617374</td>\n",
+       "      <td>10.184151</td>\n",
+       "      <td>5.140045</td>\n",
+       "      <td>15.910655</td>\n",
+       "      <td>12.137778</td>\n",
+       "      <td>5.062036</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>0.584997</td>\n",
+       "      <td>0.062744</td>\n",
+       "      <td>2.609888</td>\n",
+       "      <td>7.971209</td>\n",
+       "      <td>10.116311</td>\n",
+       "      <td>5.365451</td>\n",
+       "      <td>11.266231</td>\n",
+       "      <td>3.404336</td>\n",
+       "      <td>10.007112</td>\n",
+       "      <td>4.673205</td>\n",
+       "      <td>6.870383</td>\n",
+       "      <td>5.352833</td>\n",
+       "      <td>6.724363</td>\n",
+       "      <td>4.729984</td>\n",
+       "      <td>6.320257</td>\n",
+       "      <td>5.385639</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>10.285632</td>\n",
+       "      <td>3.370204</td>\n",
+       "      <td>39.660790</td>\n",
+       "      <td>31.097938</td>\n",
+       "      <td>63.584399</td>\n",
+       "      <td>27.368937</td>\n",
+       "      <td>47.739140</td>\n",
+       "      <td>17.804931</td>\n",
+       "      <td>22.058417</td>\n",
+       "      <td>14.028805</td>\n",
+       "      <td>31.784768</td>\n",
+       "      <td>24.160963</td>\n",
+       "      <td>15.563083</td>\n",
+       "      <td>13.804010</td>\n",
+       "      <td>13.530706</td>\n",
+       "      <td>54.312478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>5.791718</td>\n",
+       "      <td>22.044100</td>\n",
+       "      <td>41.488028</td>\n",
+       "      <td>5.091253</td>\n",
+       "      <td>5.368434</td>\n",
+       "      <td>3.731248</td>\n",
+       "      <td>4.200220</td>\n",
+       "      <td>3.388468</td>\n",
+       "      <td>6.180186</td>\n",
+       "      <td>5.273027</td>\n",
+       "      <td>14.283289</td>\n",
+       "      <td>17.374908</td>\n",
+       "      <td>5.786343</td>\n",
+       "      <td>7.882604</td>\n",
+       "      <td>14.754825</td>\n",
+       "      <td>2.979721</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>9.841274</td>\n",
+       "      <td>4.174403</td>\n",
+       "      <td>39.461849</td>\n",
+       "      <td>21.626332</td>\n",
+       "      <td>50.364483</td>\n",
+       "      <td>46.409553</td>\n",
+       "      <td>44.900184</td>\n",
+       "      <td>35.059767</td>\n",
+       "      <td>52.550276</td>\n",
+       "      <td>23.415432</td>\n",
+       "      <td>33.419473</td>\n",
+       "      <td>87.867238</td>\n",
+       "      <td>54.242866</td>\n",
+       "      <td>77.074342</td>\n",
+       "      <td>25.267238</td>\n",
+       "      <td>38.063686</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>8.076575</td>\n",
+       "      <td>20.318730</td>\n",
+       "      <td>104.826513</td>\n",
+       "      <td>77.196962</td>\n",
+       "      <td>49.132814</td>\n",
+       "      <td>44.755586</td>\n",
+       "      <td>94.593648</td>\n",
+       "      <td>44.417524</td>\n",
+       "      <td>24.696334</td>\n",
+       "      <td>65.645870</td>\n",
+       "      <td>55.305800</td>\n",
+       "      <td>102.823123</td>\n",
+       "      <td>82.419784</td>\n",
+       "      <td>109.871579</td>\n",
+       "      <td>66.819106</td>\n",
+       "      <td>110.729977</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3.117764</td>\n",
+       "      <td>2.163362</td>\n",
+       "      <td>24.956252</td>\n",
+       "      <td>67.610626</td>\n",
+       "      <td>87.484668</td>\n",
+       "      <td>49.998282</td>\n",
+       "      <td>67.801216</td>\n",
+       "      <td>55.489691</td>\n",
+       "      <td>59.346815</td>\n",
+       "      <td>26.625033</td>\n",
+       "      <td>27.430611</td>\n",
+       "      <td>32.191250</td>\n",
+       "      <td>10.565028</td>\n",
+       "      <td>22.097674</td>\n",
+       "      <td>32.502776</td>\n",
+       "      <td>19.317209</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2.393682</td>\n",
+       "      <td>0.055353</td>\n",
+       "      <td>2.096665</td>\n",
+       "      <td>6.177805</td>\n",
+       "      <td>5.615364</td>\n",
+       "      <td>4.633590</td>\n",
+       "      <td>7.547185</td>\n",
+       "      <td>4.919401</td>\n",
+       "      <td>8.712569</td>\n",
+       "      <td>5.193061</td>\n",
+       "      <td>5.590117</td>\n",
+       "      <td>6.207371</td>\n",
+       "      <td>5.638585</td>\n",
+       "      <td>3.272310</td>\n",
+       "      <td>6.886324</td>\n",
+       "      <td>4.478871</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>3.525765</td>\n",
+       "      <td>3.095343</td>\n",
+       "      <td>10.604712</td>\n",
+       "      <td>28.575365</td>\n",
+       "      <td>37.072472</td>\n",
+       "      <td>9.322816</td>\n",
+       "      <td>33.526178</td>\n",
+       "      <td>16.519565</td>\n",
+       "      <td>36.144530</td>\n",
+       "      <td>21.231744</td>\n",
+       "      <td>26.399146</td>\n",
+       "      <td>23.185726</td>\n",
+       "      <td>12.706944</td>\n",
+       "      <td>32.464040</td>\n",
+       "      <td>22.168641</td>\n",
+       "      <td>25.041196</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>5.743260</td>\n",
+       "      <td>4.832874</td>\n",
+       "      <td>5.839154</td>\n",
+       "      <td>2.948683</td>\n",
+       "      <td>9.521599</td>\n",
+       "      <td>1.167279</td>\n",
+       "      <td>3.499847</td>\n",
+       "      <td>8.151808</td>\n",
+       "      <td>14.434436</td>\n",
+       "      <td>18.306985</td>\n",
+       "      <td>25.753370</td>\n",
+       "      <td>24.048100</td>\n",
+       "      <td>17.901501</td>\n",
+       "      <td>90.245558</td>\n",
+       "      <td>31.771293</td>\n",
+       "      <td>5.635110</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>0.001719</td>\n",
+       "      <td>0.000313</td>\n",
+       "      <td>0.001563</td>\n",
+       "      <td>3.793438</td>\n",
+       "      <td>4.291719</td>\n",
+       "      <td>4.475625</td>\n",
+       "      <td>2.629375</td>\n",
+       "      <td>5.270313</td>\n",
+       "      <td>2.415313</td>\n",
+       "      <td>2.826406</td>\n",
+       "      <td>4.040938</td>\n",
+       "      <td>8.905938</td>\n",
+       "      <td>0.847812</td>\n",
+       "      <td>4.703906</td>\n",
+       "      <td>4.397969</td>\n",
+       "      <td>4.439375</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>3.381265</td>\n",
+       "      <td>0.067379</td>\n",
+       "      <td>5.667543</td>\n",
+       "      <td>2.866886</td>\n",
+       "      <td>5.008546</td>\n",
+       "      <td>11.233689</td>\n",
+       "      <td>5.872638</td>\n",
+       "      <td>8.505012</td>\n",
+       "      <td>6.408381</td>\n",
+       "      <td>10.862284</td>\n",
+       "      <td>6.733607</td>\n",
+       "      <td>14.372227</td>\n",
+       "      <td>3.506984</td>\n",
+       "      <td>4.217420</td>\n",
+       "      <td>6.728348</td>\n",
+       "      <td>4.541988</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>3.950365</td>\n",
+       "      <td>2.161189</td>\n",
+       "      <td>1.699535</td>\n",
+       "      <td>42.691567</td>\n",
+       "      <td>6.211487</td>\n",
+       "      <td>10.543161</td>\n",
+       "      <td>35.040007</td>\n",
+       "      <td>1.148406</td>\n",
+       "      <td>8.081673</td>\n",
+       "      <td>9.018260</td>\n",
+       "      <td>10.304449</td>\n",
+       "      <td>4.331341</td>\n",
+       "      <td>101.074037</td>\n",
+       "      <td>144.088313</td>\n",
+       "      <td>24.463977</td>\n",
+       "      <td>12.969290</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>7.016312</td>\n",
+       "      <td>5.696904</td>\n",
+       "      <td>7.138981</td>\n",
+       "      <td>50.494840</td>\n",
+       "      <td>11.181092</td>\n",
+       "      <td>9.333056</td>\n",
+       "      <td>28.123502</td>\n",
+       "      <td>10.745506</td>\n",
+       "      <td>63.328063</td>\n",
+       "      <td>20.450566</td>\n",
+       "      <td>15.501831</td>\n",
+       "      <td>8.434920</td>\n",
+       "      <td>49.988182</td>\n",
+       "      <td>53.852530</td>\n",
+       "      <td>34.231858</td>\n",
+       "      <td>61.643975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3.941512</td>\n",
+       "      <td>10.158678</td>\n",
+       "      <td>26.509085</td>\n",
+       "      <td>4.395224</td>\n",
+       "      <td>11.370998</td>\n",
+       "      <td>2.870912</td>\n",
+       "      <td>3.107285</td>\n",
+       "      <td>3.234124</td>\n",
+       "      <td>4.751860</td>\n",
+       "      <td>8.971102</td>\n",
+       "      <td>18.993424</td>\n",
+       "      <td>10.510988</td>\n",
+       "      <td>8.261983</td>\n",
+       "      <td>10.290881</td>\n",
+       "      <td>16.632808</td>\n",
+       "      <td>1.611005</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>2.080410</td>\n",
+       "      <td>6.656999</td>\n",
+       "      <td>18.259986</td>\n",
+       "      <td>2.532824</td>\n",
+       "      <td>6.433831</td>\n",
+       "      <td>1.617749</td>\n",
+       "      <td>1.951893</td>\n",
+       "      <td>1.697985</td>\n",
+       "      <td>2.944772</td>\n",
+       "      <td>4.763807</td>\n",
+       "      <td>12.032998</td>\n",
+       "      <td>4.697291</td>\n",
+       "      <td>4.873394</td>\n",
+       "      <td>7.738277</td>\n",
+       "      <td>10.461619</td>\n",
+       "      <td>1.374957</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>12.270294</td>\n",
+       "      <td>10.462736</td>\n",
+       "      <td>17.751749</td>\n",
+       "      <td>1.906053</td>\n",
+       "      <td>2.322078</td>\n",
+       "      <td>1.808782</td>\n",
+       "      <td>2.543212</td>\n",
+       "      <td>2.295836</td>\n",
+       "      <td>1.231805</td>\n",
+       "      <td>1.188768</td>\n",
+       "      <td>10.370364</td>\n",
+       "      <td>7.140133</td>\n",
+       "      <td>2.849020</td>\n",
+       "      <td>4.938418</td>\n",
+       "      <td>3.887684</td>\n",
+       "      <td>2.684220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>2.137901</td>\n",
+       "      <td>2.511828</td>\n",
+       "      <td>11.139828</td>\n",
+       "      <td>2.696163</td>\n",
+       "      <td>4.295602</td>\n",
+       "      <td>4.462590</td>\n",
+       "      <td>16.943052</td>\n",
+       "      <td>8.039776</td>\n",
+       "      <td>17.726827</td>\n",
+       "      <td>20.556685</td>\n",
+       "      <td>29.304363</td>\n",
+       "      <td>27.276327</td>\n",
+       "      <td>34.198703</td>\n",
+       "      <td>40.283161</td>\n",
+       "      <td>49.789732</td>\n",
+       "      <td>20.057123</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>1.916786</td>\n",
+       "      <td>6.804878</td>\n",
+       "      <td>8.044118</td>\n",
+       "      <td>4.256456</td>\n",
+       "      <td>0.710904</td>\n",
+       "      <td>1.270803</td>\n",
+       "      <td>3.299857</td>\n",
+       "      <td>1.842898</td>\n",
+       "      <td>2.596664</td>\n",
+       "      <td>4.058465</td>\n",
+       "      <td>13.060079</td>\n",
+       "      <td>3.050574</td>\n",
+       "      <td>4.385760</td>\n",
+       "      <td>4.974354</td>\n",
+       "      <td>8.554699</td>\n",
+       "      <td>1.846664</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>59.526995</td>\n",
+       "      <td>18.241201</td>\n",
+       "      <td>50.925557</td>\n",
+       "      <td>43.806523</td>\n",
+       "      <td>51.430440</td>\n",
+       "      <td>69.373503</td>\n",
+       "      <td>43.467846</td>\n",
+       "      <td>126.274185</td>\n",
+       "      <td>34.428966</td>\n",
+       "      <td>67.276212</td>\n",
+       "      <td>63.054358</td>\n",
+       "      <td>163.466372</td>\n",
+       "      <td>23.613230</td>\n",
+       "      <td>44.510779</td>\n",
+       "      <td>65.316750</td>\n",
+       "      <td>15.552792</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>4.169038</td>\n",
+       "      <td>0.842416</td>\n",
+       "      <td>9.648808</td>\n",
+       "      <td>14.825420</td>\n",
+       "      <td>17.146499</td>\n",
+       "      <td>20.670608</td>\n",
+       "      <td>23.284316</td>\n",
+       "      <td>32.354702</td>\n",
+       "      <td>22.204138</td>\n",
+       "      <td>7.848328</td>\n",
+       "      <td>16.880103</td>\n",
+       "      <td>16.831147</td>\n",
+       "      <td>6.981711</td>\n",
+       "      <td>14.136708</td>\n",
+       "      <td>11.754849</td>\n",
+       "      <td>6.418252</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>0.466091</td>\n",
+       "      <td>0.278227</td>\n",
+       "      <td>1.852339</td>\n",
+       "      <td>13.839564</td>\n",
+       "      <td>10.562277</td>\n",
+       "      <td>6.755025</td>\n",
+       "      <td>28.641743</td>\n",
+       "      <td>17.244035</td>\n",
+       "      <td>71.874319</td>\n",
+       "      <td>46.774939</td>\n",
+       "      <td>38.575991</td>\n",
+       "      <td>22.364832</td>\n",
+       "      <td>121.582754</td>\n",
+       "      <td>258.355439</td>\n",
+       "      <td>40.232012</td>\n",
+       "      <td>164.324629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>3.466692</td>\n",
+       "      <td>0.080431</td>\n",
+       "      <td>3.399319</td>\n",
+       "      <td>0.991105</td>\n",
+       "      <td>1.827214</td>\n",
+       "      <td>49.035011</td>\n",
+       "      <td>2.293149</td>\n",
+       "      <td>6.952498</td>\n",
+       "      <td>4.045420</td>\n",
+       "      <td>3.310371</td>\n",
+       "      <td>12.964799</td>\n",
+       "      <td>5.512491</td>\n",
+       "      <td>0.866011</td>\n",
+       "      <td>0.801287</td>\n",
+       "      <td>7.362604</td>\n",
+       "      <td>0.928274</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>47</th>\n",
+       "      <td>0.380402</td>\n",
+       "      <td>1.216092</td>\n",
+       "      <td>2.614818</td>\n",
+       "      <td>2.163513</td>\n",
+       "      <td>1.371241</td>\n",
+       "      <td>13.117905</td>\n",
+       "      <td>16.182633</td>\n",
+       "      <td>12.895041</td>\n",
+       "      <td>11.481179</td>\n",
+       "      <td>7.604063</td>\n",
+       "      <td>42.336985</td>\n",
+       "      <td>31.820554</td>\n",
+       "      <td>20.550289</td>\n",
+       "      <td>25.567815</td>\n",
+       "      <td>26.991436</td>\n",
+       "      <td>21.536746</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>48</th>\n",
+       "      <td>121.099206</td>\n",
+       "      <td>382.128336</td>\n",
+       "      <td>104.400896</td>\n",
+       "      <td>188.980240</td>\n",
+       "      <td>5.042779</td>\n",
+       "      <td>71.577918</td>\n",
+       "      <td>135.162762</td>\n",
+       "      <td>200.053779</td>\n",
+       "      <td>148.892850</td>\n",
+       "      <td>8.436341</td>\n",
+       "      <td>76.961703</td>\n",
+       "      <td>133.352007</td>\n",
+       "      <td>132.365248</td>\n",
+       "      <td>147.416174</td>\n",
+       "      <td>35.548992</td>\n",
+       "      <td>0.536769</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49</th>\n",
+       "      <td>2.542787</td>\n",
+       "      <td>6.437042</td>\n",
+       "      <td>4.536064</td>\n",
+       "      <td>1.443969</td>\n",
+       "      <td>2.351263</td>\n",
+       "      <td>1.246944</td>\n",
+       "      <td>2.383456</td>\n",
+       "      <td>1.362877</td>\n",
+       "      <td>3.133659</td>\n",
+       "      <td>3.697229</td>\n",
+       "      <td>11.426650</td>\n",
+       "      <td>2.861043</td>\n",
+       "      <td>4.262632</td>\n",
+       "      <td>5.899552</td>\n",
+       "      <td>8.562958</td>\n",
+       "      <td>3.708028</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>51</th>\n",
+       "      <td>0.121143</td>\n",
+       "      <td>19.624062</td>\n",
+       "      <td>14.381985</td>\n",
+       "      <td>12.325063</td>\n",
+       "      <td>0.032736</td>\n",
+       "      <td>4.366138</td>\n",
+       "      <td>20.628649</td>\n",
+       "      <td>5.116555</td>\n",
+       "      <td>36.307131</td>\n",
+       "      <td>8.160342</td>\n",
+       "      <td>9.880108</td>\n",
+       "      <td>7.239366</td>\n",
+       "      <td>13.688699</td>\n",
+       "      <td>13.484779</td>\n",
+       "      <td>19.002711</td>\n",
+       "      <td>21.630734</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56</th>\n",
+       "      <td>8.096633</td>\n",
+       "      <td>0.388282</td>\n",
+       "      <td>12.994097</td>\n",
+       "      <td>3.941627</td>\n",
+       "      <td>3.318540</td>\n",
+       "      <td>16.433100</td>\n",
+       "      <td>0.650853</td>\n",
+       "      <td>56.710319</td>\n",
+       "      <td>2.859641</td>\n",
+       "      <td>44.544600</td>\n",
+       "      <td>5.201793</td>\n",
+       "      <td>21.594447</td>\n",
+       "      <td>0.349147</td>\n",
+       "      <td>0.690643</td>\n",
+       "      <td>12.655662</td>\n",
+       "      <td>0.188675</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>57</th>\n",
+       "      <td>0.585809</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.007761</td>\n",
+       "      <td>0.002217</td>\n",
+       "      <td>4.278271</td>\n",
+       "      <td>7.796674</td>\n",
+       "      <td>0.002217</td>\n",
+       "      <td>4.324612</td>\n",
+       "      <td>0.003326</td>\n",
+       "      <td>3.960976</td>\n",
+       "      <td>1.947228</td>\n",
+       "      <td>4.638581</td>\n",
+       "      <td>0.001109</td>\n",
+       "      <td>4.477827</td>\n",
+       "      <td>0.010421</td>\n",
+       "      <td>0.217073</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>59</th>\n",
+       "      <td>9.002233</td>\n",
+       "      <td>28.178835</td>\n",
+       "      <td>50.646350</td>\n",
+       "      <td>0.884572</td>\n",
+       "      <td>3.313686</td>\n",
+       "      <td>0.803751</td>\n",
+       "      <td>6.336906</td>\n",
+       "      <td>0.882340</td>\n",
+       "      <td>6.095557</td>\n",
+       "      <td>3.698370</td>\n",
+       "      <td>17.841259</td>\n",
+       "      <td>4.257424</td>\n",
+       "      <td>14.322170</td>\n",
+       "      <td>32.066086</td>\n",
+       "      <td>3.711766</td>\n",
+       "      <td>5.126367</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>62</th>\n",
+       "      <td>3.852505</td>\n",
+       "      <td>9.924165</td>\n",
+       "      <td>3.733998</td>\n",
+       "      <td>2.778293</td>\n",
+       "      <td>2.651670</td>\n",
+       "      <td>1.100649</td>\n",
+       "      <td>3.850186</td>\n",
+       "      <td>3.188312</td>\n",
+       "      <td>7.121289</td>\n",
+       "      <td>7.048469</td>\n",
+       "      <td>12.543367</td>\n",
+       "      <td>11.084416</td>\n",
+       "      <td>18.497913</td>\n",
+       "      <td>78.724026</td>\n",
+       "      <td>17.065863</td>\n",
+       "      <td>4.556586</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>0.026564</td>\n",
+       "      <td>9.219712</td>\n",
+       "      <td>2.927756</td>\n",
+       "      <td>2.005214</td>\n",
+       "      <td>0.779543</td>\n",
+       "      <td>0.533515</td>\n",
+       "      <td>4.837388</td>\n",
+       "      <td>6.061072</td>\n",
+       "      <td>32.991063</td>\n",
+       "      <td>6.736097</td>\n",
+       "      <td>31.253972</td>\n",
+       "      <td>14.201341</td>\n",
+       "      <td>22.141758</td>\n",
+       "      <td>1.370655</td>\n",
+       "      <td>14.240566</td>\n",
+       "      <td>26.971698</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>0.005896</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>3.681364</td>\n",
+       "      <td>0.001794</td>\n",
+       "      <td>1.869777</td>\n",
+       "      <td>0.952320</td>\n",
+       "      <td>0.013074</td>\n",
+       "      <td>14.330428</td>\n",
+       "      <td>1.042041</td>\n",
+       "      <td>1.482440</td>\n",
+       "      <td>1.107665</td>\n",
+       "      <td>14.001025</td>\n",
+       "      <td>17.977954</td>\n",
+       "      <td>7.421687</td>\n",
+       "      <td>2.789797</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>77</th>\n",
+       "      <td>2.173670</td>\n",
+       "      <td>6.897074</td>\n",
+       "      <td>18.482979</td>\n",
+       "      <td>2.410106</td>\n",
+       "      <td>5.388032</td>\n",
+       "      <td>1.591489</td>\n",
+       "      <td>1.782181</td>\n",
+       "      <td>1.816489</td>\n",
+       "      <td>2.523138</td>\n",
+       "      <td>3.285638</td>\n",
+       "      <td>10.931649</td>\n",
+       "      <td>3.776596</td>\n",
+       "      <td>3.994415</td>\n",
+       "      <td>8.679255</td>\n",
+       "      <td>9.338032</td>\n",
+       "      <td>1.817819</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>85</th>\n",
+       "      <td>1.108329</td>\n",
+       "      <td>0.008160</td>\n",
+       "      <td>0.013787</td>\n",
+       "      <td>0.001688</td>\n",
+       "      <td>0.002814</td>\n",
+       "      <td>0.030107</td>\n",
+       "      <td>0.105234</td>\n",
+       "      <td>0.090884</td>\n",
+       "      <td>2.783343</td>\n",
+       "      <td>0.154192</td>\n",
+       "      <td>0.193022</td>\n",
+       "      <td>0.143500</td>\n",
+       "      <td>7.643500</td>\n",
+       "      <td>6.826393</td>\n",
+       "      <td>22.131683</td>\n",
+       "      <td>41.960889</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>122</th>\n",
+       "      <td>0.014581</td>\n",
+       "      <td>0.002430</td>\n",
+       "      <td>2.062778</td>\n",
+       "      <td>0.000810</td>\n",
+       "      <td>0.443499</td>\n",
+       "      <td>0.200486</td>\n",
+       "      <td>1.331308</td>\n",
+       "      <td>0.012961</td>\n",
+       "      <td>1.962333</td>\n",
+       "      <td>0.383556</td>\n",
+       "      <td>0.021871</td>\n",
+       "      <td>0.232888</td>\n",
+       "      <td>10.975699</td>\n",
+       "      <td>7.136087</td>\n",
+       "      <td>6.498177</td>\n",
+       "      <td>5.916565</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               1           2           3           4          5          6   \\\n",
+       "color                                                                         \n",
+       "1        8.876186   18.261282   51.187960    6.134054  12.633700   5.477037   \n",
+       "2        2.989724    1.488277    7.840371   21.469497   5.473986   6.484202   \n",
+       "3        3.720807   12.429749   28.174160    5.869552   8.563257   5.155427   \n",
+       "4        5.762003    1.769082   14.549234   30.423671   8.715794   9.015703   \n",
+       "5        2.803744    2.260464    5.384254    9.709994   6.948961   2.923761   \n",
+       "6        0.597182   11.346204   19.132968    8.455170   5.968345   7.536916   \n",
+       "7        2.218029   10.157114   23.773194    5.336961   3.908277   5.085754   \n",
+       "8        1.575741   10.346771   22.638090    5.676162   3.974290   5.669085   \n",
+       "9        5.283090   17.775891   46.993908    6.290751   6.759553   4.634946   \n",
+       "10       3.452875    0.668882    5.485553   15.041078   5.360934   7.015324   \n",
+       "12       8.246084   15.033800   32.710120    5.181884  12.197754   3.953421   \n",
+       "13       0.584997    0.062744    2.609888    7.971209  10.116311   5.365451   \n",
+       "14      10.285632    3.370204   39.660790   31.097938  63.584399  27.368937   \n",
+       "15       5.791718   22.044100   41.488028    5.091253   5.368434   3.731248   \n",
+       "16       9.841274    4.174403   39.461849   21.626332  50.364483  46.409553   \n",
+       "17       8.076575   20.318730  104.826513   77.196962  49.132814  44.755586   \n",
+       "18       3.117764    2.163362   24.956252   67.610626  87.484668  49.998282   \n",
+       "19       2.393682    0.055353    2.096665    6.177805   5.615364   4.633590   \n",
+       "20       3.525765    3.095343   10.604712   28.575365  37.072472   9.322816   \n",
+       "21       5.743260    4.832874    5.839154    2.948683   9.521599   1.167279   \n",
+       "22       0.001719    0.000313    0.001563    3.793438   4.291719   4.475625   \n",
+       "23       3.381265    0.067379    5.667543    2.866886   5.008546  11.233689   \n",
+       "24       3.950365    2.161189    1.699535   42.691567   6.211487  10.543161   \n",
+       "25       7.016312    5.696904    7.138981   50.494840  11.181092   9.333056   \n",
+       "27       3.941512   10.158678   26.509085    4.395224  11.370998   2.870912   \n",
+       "28       2.080410    6.656999   18.259986    2.532824   6.433831   1.617749   \n",
+       "29      12.270294   10.462736   17.751749    1.906053   2.322078   1.808782   \n",
+       "30       2.137901    2.511828   11.139828    2.696163   4.295602   4.462590   \n",
+       "32       1.916786    6.804878    8.044118    4.256456   0.710904   1.270803   \n",
+       "34      59.526995   18.241201   50.925557   43.806523  51.430440  69.373503   \n",
+       "36       4.169038    0.842416    9.648808   14.825420  17.146499  20.670608   \n",
+       "39       0.466091    0.278227    1.852339   13.839564  10.562277   6.755025   \n",
+       "40       3.466692    0.080431    3.399319    0.991105   1.827214  49.035011   \n",
+       "47       0.380402    1.216092    2.614818    2.163513   1.371241  13.117905   \n",
+       "48     121.099206  382.128336  104.400896  188.980240   5.042779  71.577918   \n",
+       "49       2.542787    6.437042    4.536064    1.443969   2.351263   1.246944   \n",
+       "51       0.121143   19.624062   14.381985   12.325063   0.032736   4.366138   \n",
+       "56       8.096633    0.388282   12.994097    3.941627   3.318540  16.433100   \n",
+       "57       0.585809    0.000000    0.007761    0.002217   4.278271   7.796674   \n",
+       "59       9.002233   28.178835   50.646350    0.884572   3.313686   0.803751   \n",
+       "62       3.852505    9.924165    3.733998    2.778293   2.651670   1.100649   \n",
+       "71       0.026564    9.219712    2.927756    2.005214   0.779543   0.533515   \n",
+       "74       0.005896    0.000000    0.000000    3.681364   0.001794   1.869777   \n",
+       "77       2.173670    6.897074   18.482979    2.410106   5.388032   1.591489   \n",
+       "85       1.108329    0.008160    0.013787    0.001688   0.002814   0.030107   \n",
+       "122      0.014581    0.002430    2.062778    0.000810   0.443499   0.200486   \n",
+       "\n",
+       "               7           8           9          10         11          12  \\\n",
+       "color                                                                         \n",
+       "1        6.014585    5.304814    6.874191  12.439046  29.793732   22.665219   \n",
+       "2       27.589086    8.524538   36.255537  13.439261  16.893876   16.096439   \n",
+       "3        7.075434    5.348820    7.066567  11.246722  28.066567   20.510428   \n",
+       "4       42.401146   11.751899   45.332861  20.204209  22.506758   23.953855   \n",
+       "5        7.159026    2.579015   17.632118   6.025057  10.539294   11.215262   \n",
+       "6        9.468041    6.318810    9.369510  12.423428  41.204192   28.746239   \n",
+       "7        7.689371    4.638199    6.644870   9.314052  27.603528   21.129202   \n",
+       "8        8.581475    4.796650    7.207829   9.250739  28.592135   22.005285   \n",
+       "9        4.491508    3.912036    6.604947   5.856747  15.012368   18.485785   \n",
+       "10      15.554694    9.098680    6.356780  10.844457   4.134404    9.089634   \n",
+       "12       3.386129    8.057811    4.936727   5.820899  18.617374   10.184151   \n",
+       "13      11.266231    3.404336   10.007112   4.673205   6.870383    5.352833   \n",
+       "14      47.739140   17.804931   22.058417  14.028805  31.784768   24.160963   \n",
+       "15       4.200220    3.388468    6.180186   5.273027  14.283289   17.374908   \n",
+       "16      44.900184   35.059767   52.550276  23.415432  33.419473   87.867238   \n",
+       "17      94.593648   44.417524   24.696334  65.645870  55.305800  102.823123   \n",
+       "18      67.801216   55.489691   59.346815  26.625033  27.430611   32.191250   \n",
+       "19       7.547185    4.919401    8.712569   5.193061   5.590117    6.207371   \n",
+       "20      33.526178   16.519565   36.144530  21.231744  26.399146   23.185726   \n",
+       "21       3.499847    8.151808   14.434436  18.306985  25.753370   24.048100   \n",
+       "22       2.629375    5.270313    2.415313   2.826406   4.040938    8.905938   \n",
+       "23       5.872638    8.505012    6.408381  10.862284   6.733607   14.372227   \n",
+       "24      35.040007    1.148406    8.081673   9.018260  10.304449    4.331341   \n",
+       "25      28.123502   10.745506   63.328063  20.450566  15.501831    8.434920   \n",
+       "27       3.107285    3.234124    4.751860   8.971102  18.993424   10.510988   \n",
+       "28       1.951893    1.697985    2.944772   4.763807  12.032998    4.697291   \n",
+       "29       2.543212    2.295836    1.231805   1.188768  10.370364    7.140133   \n",
+       "30      16.943052    8.039776   17.726827  20.556685  29.304363   27.276327   \n",
+       "32       3.299857    1.842898    2.596664   4.058465  13.060079    3.050574   \n",
+       "34      43.467846  126.274185   34.428966  67.276212  63.054358  163.466372   \n",
+       "36      23.284316   32.354702   22.204138   7.848328  16.880103   16.831147   \n",
+       "39      28.641743   17.244035   71.874319  46.774939  38.575991   22.364832   \n",
+       "40       2.293149    6.952498    4.045420   3.310371  12.964799    5.512491   \n",
+       "47      16.182633   12.895041   11.481179   7.604063  42.336985   31.820554   \n",
+       "48     135.162762  200.053779  148.892850   8.436341  76.961703  133.352007   \n",
+       "49       2.383456    1.362877    3.133659   3.697229  11.426650    2.861043   \n",
+       "51      20.628649    5.116555   36.307131   8.160342   9.880108    7.239366   \n",
+       "56       0.650853   56.710319    2.859641  44.544600   5.201793   21.594447   \n",
+       "57       0.002217    4.324612    0.003326   3.960976   1.947228    4.638581   \n",
+       "59       6.336906    0.882340    6.095557   3.698370  17.841259    4.257424   \n",
+       "62       3.850186    3.188312    7.121289   7.048469  12.543367   11.084416   \n",
+       "71       4.837388    6.061072   32.991063   6.736097  31.253972   14.201341   \n",
+       "74       0.952320    0.013074   14.330428   1.042041   1.482440    1.107665   \n",
+       "77       1.782181    1.816489    2.523138   3.285638  10.931649    3.776596   \n",
+       "85       0.105234    0.090884    2.783343   0.154192   0.193022    0.143500   \n",
+       "122      1.331308    0.012961    1.962333   0.383556   0.021871    0.232888   \n",
+       "\n",
+       "               13          14         15          16  \n",
+       "color                                                 \n",
+       "1        8.702589   11.272143  18.419851    3.956024  \n",
+       "2       17.427745   16.574618  21.106419   25.184315  \n",
+       "3       12.535407   14.223367  19.688710    5.910453  \n",
+       "4       23.170228   23.510362  27.957910   33.567834  \n",
+       "5       12.417924   20.254556  11.322110   21.699245  \n",
+       "6       18.627272   21.547352  27.557788   10.249935  \n",
+       "7       12.519881   14.149741  16.151058    8.002107  \n",
+       "8       13.801397   15.090119  17.547433    8.376870  \n",
+       "9        6.398652    8.899114  15.555289    4.008030  \n",
+       "10       4.111142    7.369242  10.711437    6.492015  \n",
+       "12       5.140045   15.910655  12.137778    5.062036  \n",
+       "13       6.724363    4.729984   6.320257    5.385639  \n",
+       "14      15.563083   13.804010  13.530706   54.312478  \n",
+       "15       5.786343    7.882604  14.754825    2.979721  \n",
+       "16      54.242866   77.074342  25.267238   38.063686  \n",
+       "17      82.419784  109.871579  66.819106  110.729977  \n",
+       "18      10.565028   22.097674  32.502776   19.317209  \n",
+       "19       5.638585    3.272310   6.886324    4.478871  \n",
+       "20      12.706944   32.464040  22.168641   25.041196  \n",
+       "21      17.901501   90.245558  31.771293    5.635110  \n",
+       "22       0.847812    4.703906   4.397969    4.439375  \n",
+       "23       3.506984    4.217420   6.728348    4.541988  \n",
+       "24     101.074037  144.088313  24.463977   12.969290  \n",
+       "25      49.988182   53.852530  34.231858   61.643975  \n",
+       "27       8.261983   10.290881  16.632808    1.611005  \n",
+       "28       4.873394    7.738277  10.461619    1.374957  \n",
+       "29       2.849020    4.938418   3.887684    2.684220  \n",
+       "30      34.198703   40.283161  49.789732   20.057123  \n",
+       "32       4.385760    4.974354   8.554699    1.846664  \n",
+       "34      23.613230   44.510779  65.316750   15.552792  \n",
+       "36       6.981711   14.136708  11.754849    6.418252  \n",
+       "39     121.582754  258.355439  40.232012  164.324629  \n",
+       "40       0.866011    0.801287   7.362604    0.928274  \n",
+       "47      20.550289   25.567815  26.991436   21.536746  \n",
+       "48     132.365248  147.416174  35.548992    0.536769  \n",
+       "49       4.262632    5.899552   8.562958    3.708028  \n",
+       "51      13.688699   13.484779  19.002711   21.630734  \n",
+       "56       0.349147    0.690643  12.655662    0.188675  \n",
+       "57       0.001109    4.477827   0.010421    0.217073  \n",
+       "59      14.322170   32.066086   3.711766    5.126367  \n",
+       "62      18.497913   78.724026  17.065863    4.556586  \n",
+       "71      22.141758    1.370655  14.240566   26.971698  \n",
+       "74      14.001025   17.977954   7.421687    2.789797  \n",
+       "77       3.994415    8.679255   9.338032    1.817819  \n",
+       "85       7.643500    6.826393  22.131683   41.960889  \n",
+       "122     10.975699    7.136087   6.498177    5.916565  "
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Look at mean frequencies in each sample for each bin\n",
+    "profile.groupby('color').mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "214731"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Number of unique profiles in profile\n",
+    "len(-profile.groupby(profile.columns.tolist()[2:],as_index=False).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Make new dataframe - only unique profiles and run bhtsne on it\n",
+    "new_profile = profile.drop_duplicates(profile.columns.tolist()[2:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>color</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>10</th>\n",
+       "      <th>11</th>\n",
+       "      <th>12</th>\n",
+       "      <th>13</th>\n",
+       "      <th>14</th>\n",
+       "      <th>15</th>\n",
+       "      <th>16</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "      <td>214731.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>22.129800</td>\n",
+       "      <td>7.893974</td>\n",
+       "      <td>14.111134</td>\n",
+       "      <td>20.949183</td>\n",
+       "      <td>18.048246</td>\n",
+       "      <td>13.387517</td>\n",
+       "      <td>12.819826</td>\n",
+       "      <td>19.863946</td>\n",
+       "      <td>16.551271</td>\n",
+       "      <td>19.722951</td>\n",
+       "      <td>13.629411</td>\n",
+       "      <td>21.115116</td>\n",
+       "      <td>24.002212</td>\n",
+       "      <td>20.522281</td>\n",
+       "      <td>29.781308</td>\n",
+       "      <td>19.061570</td>\n",
+       "      <td>17.828744</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>20.749713</td>\n",
+       "      <td>20.238912</td>\n",
+       "      <td>56.429132</td>\n",
+       "      <td>31.039870</td>\n",
+       "      <td>33.749958</td>\n",
+       "      <td>22.096052</td>\n",
+       "      <td>21.876617</td>\n",
+       "      <td>30.797125</td>\n",
+       "      <td>37.646332</td>\n",
+       "      <td>29.446247</td>\n",
+       "      <td>20.329130</td>\n",
+       "      <td>22.523138</td>\n",
+       "      <td>40.986660</td>\n",
+       "      <td>34.669525</td>\n",
+       "      <td>53.139510</td>\n",
+       "      <td>20.306207</td>\n",
+       "      <td>31.855980</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>17.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>32.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>28.000000</td>\n",
+       "      <td>17.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>18.000000</td>\n",
+       "      <td>30.000000</td>\n",
+       "      <td>27.000000</td>\n",
+       "      <td>19.000000</td>\n",
+       "      <td>30.000000</td>\n",
+       "      <td>27.000000</td>\n",
+       "      <td>22.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>122.000000</td>\n",
+       "      <td>922.000000</td>\n",
+       "      <td>2890.000000</td>\n",
+       "      <td>965.000000</td>\n",
+       "      <td>1499.000000</td>\n",
+       "      <td>1290.000000</td>\n",
+       "      <td>1281.000000</td>\n",
+       "      <td>1273.000000</td>\n",
+       "      <td>1922.000000</td>\n",
+       "      <td>1199.000000</td>\n",
+       "      <td>1492.000000</td>\n",
+       "      <td>925.000000</td>\n",
+       "      <td>2252.000000</td>\n",
+       "      <td>1641.000000</td>\n",
+       "      <td>3422.000000</td>\n",
+       "      <td>897.000000</td>\n",
+       "      <td>2124.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               color              1              2              3  \\\n",
+       "count  214731.000000  214731.000000  214731.000000  214731.000000   \n",
+       "mean       22.129800       7.893974      14.111134      20.949183   \n",
+       "std        20.749713      20.238912      56.429132      31.039870   \n",
+       "min         1.000000       0.000000       0.000000       0.000000   \n",
+       "25%         5.000000       0.000000       0.000000       3.000000   \n",
+       "50%        17.000000       3.000000       3.000000       9.000000   \n",
+       "75%        32.000000       7.000000      10.000000      28.000000   \n",
+       "max       122.000000     922.000000    2890.000000     965.000000   \n",
+       "\n",
+       "                   4              5              6              7  \\\n",
+       "count  214731.000000  214731.000000  214731.000000  214731.000000   \n",
+       "mean       18.048246      13.387517      12.819826      19.863946   \n",
+       "std        33.749958      22.096052      21.876617      30.797125   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         3.000000       3.000000       2.000000       3.000000   \n",
+       "50%         7.000000       6.000000       6.000000       8.000000   \n",
+       "75%        17.000000      12.000000      11.000000      25.000000   \n",
+       "max      1499.000000    1290.000000    1281.000000    1273.000000   \n",
+       "\n",
+       "                   8              9             10             11  \\\n",
+       "count  214731.000000  214731.000000  214731.000000  214731.000000   \n",
+       "mean       16.551271      19.722951      13.629411      21.115116   \n",
+       "std        37.646332      29.446247      20.329130      22.523138   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         3.000000       4.000000       4.000000       7.000000   \n",
+       "50%         6.000000      10.000000       8.000000      15.000000   \n",
+       "75%        13.000000      23.000000      18.000000      30.000000   \n",
+       "max      1922.000000    1199.000000    1492.000000     925.000000   \n",
+       "\n",
+       "                  12             13             14             15  \\\n",
+       "count  214731.000000  214731.000000  214731.000000  214731.000000   \n",
+       "mean       24.002212      20.522281      29.781308      19.061570   \n",
+       "std        40.986660      34.669525      53.139510      20.306207   \n",
+       "min         0.000000       0.000000       0.000000       0.000000   \n",
+       "25%         6.000000       4.000000       5.000000       7.000000   \n",
+       "50%        12.000000      10.000000      13.000000      12.000000   \n",
+       "75%        27.000000      19.000000      30.000000      27.000000   \n",
+       "max      2252.000000    1641.000000    3422.000000     897.000000   \n",
+       "\n",
+       "                  16  \n",
+       "count  214731.000000  \n",
+       "mean       17.828744  \n",
+       "std        31.855980  \n",
+       "min         0.000000  \n",
+       "25%         4.000000  \n",
+       "50%         7.000000  \n",
+       "75%        22.000000  \n",
+       "max      2124.000000  "
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_profile.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "new_profile = new_profile.sample(frac=0.1)\n",
+    "data = new_profile.as_matrix(columns = new_profile.columns[2:])\n",
+    "v = (1.0/2000)\n",
+    "data = data + v\n",
+    "along_Y = np.apply_along_axis(sum, 0, data)\n",
+    "data = data/along_Y[None, :]\n",
+    "along_X = np.apply_along_axis(sum, 1, data)\n",
+    "data = data/along_X[:, None]\n",
+    "data = np.log(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Length of values does not match length of index",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-49-e02eb57c1b22>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mar2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mnew_profile\u001b[0m\u00 [...]
+      "\u001b[0;32m/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m   2427\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2428\u001b[0m             \u001b[0;31m# set column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2429\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b [...]
+      "\u001b[0;32m/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_set_item\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m   2493\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2494\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ensure_valid_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2495\u001b[0;31m         \u001b[0mvalue\u001b[ [...]
+      "\u001b[0;32m/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_sanitize_column\u001b[0;34m(self, key, value, broadcast)\u001b[0m\n\u001b[1;32m   2664\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2665\u001b[0m             \u001b[0;31m# turn me into an ndarray\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2666\u001b[0;31m             \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_sanitize_index\u001 [...]
+      "\u001b[0;32m/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc\u001b[0m in \u001b[0;36m_sanitize_index\u001b[0;34m(data, index, copy)\u001b[0m\n\u001b[1;32m   2877\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2878\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m: [...]
+      "\u001b[0;31mValueError\u001b[0m: Length of values does not match length of index"
+     ]
+    }
+   ],
+   "source": [
+    "np.savetxt(\"data2.in\", data, delimiter=\"\\t\")\n",
+    "\n",
+    "path_bhtsne = '/Users/tanunia/PycharmProjects/biolab_t-sne/'\n",
+    "import sys, os\n",
+    "os.system(path_bhtsne + 'bhtsne.py -p 50 -m 3000 -i data2.in -o data_canopy2.out')\n",
+    "\n",
+    "ar2 = np.loadtxt(\"data2.out\", delimiter=\"\\t\")\n",
+    "len(ar2[:, 0])\n",
+    "\n",
+    "new_profile[\"x\"] = ar2[:, 0]\n",
+    "new_profile[\"y\"] = ar2[:, 1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "plt.scatter(new_profile[\"x\"], new_profile[\"y\"], c=new_profile[\"color\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "new_profile[\"color\"].value_counts()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/projects/mts/scripts/bhtsne.py b/src/projects/mts/scripts/bhtsne.py
new file mode 100755
index 0000000..7afc856
--- /dev/null
+++ b/src/projects/mts/scripts/bhtsne.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python
+
+'''
+A simple Python wrapper for the bh_tsne binary that makes it easier to use it
+for TSV files in a pipeline without any shell script trickery.
+
+Note: The script does some minimal sanity checking of the input, but don't
+    expect it to cover all cases. After all, it is a just a wrapper.
+
+Example:
+
+    > echo -e '1.0\t0.0\n0.0\t1.0' | ./bhtsne.py -d 2 -p 0.1
+    -2458.83181442  -6525.87718385
+    2458.83181442   6525.87718385
+
+The output will not be normalised, maybe the below one-liner is of interest?:
+
+    python -c 'import numpy;  from sys import stdin, stdout;
+        d = numpy.loadtxt(stdin); d -= d.min(axis=0); d /= d.max(axis=0);
+        numpy.savetxt(stdout, d, fmt="%.8f", delimiter="\t")'
+
+Authors:     Pontus Stenetorp    <pontus stenetorp se>
+             Philippe Remy       <github: philipperemy>
+Version:    2016-03-08
+'''
+
+# Copyright (c) 2013, Pontus Stenetorp <pontus stenetorp se>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+from argparse import ArgumentParser, FileType
+from os.path import abspath, dirname, isfile, join as path_join
+from shutil import rmtree
+from struct import calcsize, pack, unpack
+from subprocess import Popen
+from sys import stderr, stdin, stdout
+from tempfile import mkdtemp
+from platform import system
+from os import devnull
+import numpy as np
+import os, sys
+import io
+
+### Constants
+IS_WINDOWS = True if system() == 'Windows' else False
+BH_TSNE_BIN_PATH = '/home/ygorshkov/opt/bh_tsne' #path_join(dirname(__file__), 'windows', 'bh_tsne.exe') if IS_WINDOWS else path_join(dirname(__file__), 'bh_tsne')
+assert isfile(BH_TSNE_BIN_PATH), ('Unable to find the bh_tsne binary in the '
+    'same directory as this script, have you forgotten to compile it?: {}'
+    ).format(BH_TSNE_BIN_PATH)
+# Default hyper-parameter values from van der Maaten (2014)
+# https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf (Experimental Setup, page 13)
+DEFAULT_NO_DIMS = 2
+INITIAL_DIMENSIONS = 50
+DEFAULT_PERPLEXITY = 50
+DEFAULT_THETA = 0.5
+EMPTY_SEED = -1
+DEFAULT_USE_PCA = True
+DEFAULT_MAX_ITERATIONS = 1000
+
+###
+
+def _argparse():
+    argparse = ArgumentParser('bh_tsne Python wrapper')
+    argparse.add_argument('-d', '--no_dims', type=int,
+                          default=DEFAULT_NO_DIMS)
+    argparse.add_argument('-p', '--perplexity', type=float,
+            default=DEFAULT_PERPLEXITY)
+    # 0.0 for theta is equivalent to vanilla t-SNE
+    argparse.add_argument('-t', '--theta', type=float, default=DEFAULT_THETA)
+    argparse.add_argument('-r', '--randseed', type=int, default=EMPTY_SEED)
+    argparse.add_argument('-n', '--initial_dims', type=int, default=INITIAL_DIMENSIONS)
+    argparse.add_argument('-v', '--verbose', action='store_true')
+    argparse.add_argument('-i', '--input', type=FileType('r'), default=stdin)
+    argparse.add_argument('-o', '--output', type=FileType('w'),
+            default=stdout)
+    argparse.add_argument('--use_pca', action='store_true')
+    argparse.add_argument('--no_pca', dest='use_pca', action='store_false')
+    argparse.set_defaults(use_pca=DEFAULT_USE_PCA)
+    argparse.add_argument('-m', '--max_iter', type=int, default=DEFAULT_MAX_ITERATIONS)
+    return argparse
+
+
+def _read_unpack(fmt, fh):
+    return unpack(fmt, fh.read(calcsize(fmt)))
+
+
+def _is_filelike_object(f):
+    try:
+        return isinstance(f, (file, io.IOBase))
+    except NameError:
+        # 'file' is not a class in python3
+        return isinstance(f, io.IOBase)
+
+
+def init_bh_tsne(samples, workdir, no_dims=DEFAULT_NO_DIMS, initial_dims=INITIAL_DIMENSIONS, perplexity=DEFAULT_PERPLEXITY,
+            theta=DEFAULT_THETA, randseed=EMPTY_SEED, verbose=False, use_pca=DEFAULT_USE_PCA, max_iter=DEFAULT_MAX_ITERATIONS):
+
+    if use_pca:
+        samples = samples - np.mean(samples, axis=0)
+        cov_x = np.dot(np.transpose(samples), samples)
+        [eig_val, eig_vec] = np.linalg.eig(cov_x)
+
+        # sorting the eigen-values in the descending order
+        eig_vec = eig_vec[:, eig_val.argsort()[::-1]]
+
+        if initial_dims > len(eig_vec):
+            initial_dims = len(eig_vec)
+
+        # truncating the eigen-vectors matrix to keep the most important vectors
+        eig_vec = eig_vec[:, :initial_dims]
+        samples = np.dot(samples, eig_vec)
+
+    # Assume that the dimensionality of the first sample is representative for
+    #   the whole batch
+    sample_dim = len(samples[0])
+    sample_count = len(samples)
+
+    # Note: The binary format used by bh_tsne is roughly the same as for
+    #   vanilla tsne
+    with open(path_join(workdir, 'data.dat'), 'wb') as data_file:
+        # Write the bh_tsne header
+        data_file.write(pack('iiddii', sample_count, sample_dim, theta, perplexity, no_dims, max_iter))
+        # Then write the data
+        for sample in samples:
+            data_file.write(pack('{}d'.format(len(sample)), *sample))
+        # Write random seed if specified
+        if randseed != EMPTY_SEED:
+            data_file.write(pack('i', randseed))
+
+def load_data(input_file):
+    # Read the data, using numpy's good judgement
+    return np.loadtxt(input_file)
+
+def bh_tsne(workdir, verbose=False):
+
+    # Call bh_tsne and let it do its thing
+    with open(devnull, 'w') as dev_null:
+        bh_tsne_p = Popen((abspath(BH_TSNE_BIN_PATH), ), cwd=workdir,
+                # bh_tsne is very noisy on stdout, tell it to use stderr
+                #   if it is to print any output
+                stdout=stderr if verbose else dev_null)
+        bh_tsne_p.wait()
+        assert not bh_tsne_p.returncode, ('ERROR: Call to bh_tsne exited '
+                'with a non-zero return code exit status, please ' +
+                ('enable verbose mode and ' if not verbose else '') +
+                'refer to the bh_tsne output for further details')
+
+    # Read and pass on the results
+    with open(path_join(workdir, 'result.dat'), 'rb') as output_file:
+        # The first two integers are just the number of samples and the
+        #   dimensionality
+        result_samples, result_dims = _read_unpack('ii', output_file)
+        # Collect the results, but they may be out of order
+        results = [_read_unpack('{}d'.format(result_dims), output_file)
+            for _ in range(result_samples)]
+        # Now collect the landmark data so that we can return the data in
+        #   the order it arrived
+        results = [(_read_unpack('i', output_file), e) for e in results]
+        # Put the results in order and yield it
+        results.sort()
+        for _, result in results:
+            yield result
+        # The last piece of data is the cost for each sample, we ignore it
+        #read_unpack('{}d'.format(sample_count), output_file)
+
+def run_bh_tsne(data, no_dims=2, perplexity=50, theta=0.5, randseed=-1, verbose=False, initial_dims=50, use_pca=True, max_iter=1000):
+    '''
+    Run TSNE based on the Barnes-HT algorithm
+
+    Parameters:
+    ----------
+    data: file or numpy.array
+        The data used to run TSNE, one sample per row
+    no_dims: int
+    perplexity: int
+    randseed: int
+    theta: float
+    initial_dims: int
+    verbose: boolean
+    use_pca: boolean
+    max_iter: int
+    '''
+
+    # bh_tsne works with fixed input and output paths, give it a temporary
+    #   directory to work in so we don't clutter the filesystem
+    tmp_dir_path = mkdtemp()
+
+    # Load data in forked process to free memory for actual bh_tsne calculation
+    child_pid = os.fork()
+    if child_pid == 0:
+        if _is_filelike_object(data):
+            data = load_data(data)
+
+        init_bh_tsne(data, tmp_dir_path, no_dims=no_dims, perplexity=perplexity, theta=theta, randseed=randseed,verbose=verbose, initial_dims=initial_dims, use_pca=use_pca, max_iter=max_iter)
+        sys.exit(0)
+    else:
+        try:
+            os.waitpid(child_pid, 0)
+        except KeyboardInterrupt:
+            print("Please run this program directly from python and not from ipython or jupyter.")
+            print("This is an issue due to asynchronous error handling.")
+
+        res = []
+        for result in bh_tsne(tmp_dir_path, verbose):
+            sample_res = []
+            for r in result:
+                sample_res.append(r)
+            res.append(sample_res)
+        rmtree(tmp_dir_path)
+        return np.asarray(res, dtype='float64')
+
+
+def main(args):
+    parser = _argparse()
+
+    if len(args) <= 1:
+        print(parser.print_help())
+        return 
+
+    argp = parser.parse_args(args[1:])
+    
+    for result in run_bh_tsne(argp.input, no_dims=argp.no_dims, perplexity=argp.perplexity, theta=argp.theta, randseed=argp.randseed,
+            verbose=argp.verbose, initial_dims=argp.initial_dims, use_pca=argp.use_pca, max_iter=argp.max_iter):
+        fmt = ''
+        for i in range(1, len(result)):
+            fmt = fmt + '{}\t'
+        fmt = fmt + '{}\n'
+        argp.output.write(fmt.format(*result))
+
+if __name__ == '__main__':
+    from sys import argv
+    exit(main(argv))
diff --git a/src/projects/mts/scripts/bin_profiles.py b/src/projects/mts/scripts/bin_profiles.py
new file mode 100755
index 0000000..2974756
--- /dev/null
+++ b/src/projects/mts/scripts/bin_profiles.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+from __future__ import (print_function)
+
+import pandas
+from pandas import DataFrame
+import sys
+
+profiles_in = pandas.read_table(sys.argv[1], index_col=0, header=None)
+binning_out = pandas.read_table(sys.argv[2], index_col=0, names=["bin"], dtype=str)
+table = profiles_in.join(binning_out)
+profiles = table.groupby("bin").median()
+profiles.to_csv(sys.stdout, sep="\t", header=False)
diff --git a/src/projects/mts/scripts/calc_kmers_mpl.py b/src/projects/mts/scripts/calc_kmers_mpl.py
deleted file mode 100755
index 26382cf..0000000
--- a/src/projects/mts/scripts/calc_kmers_mpl.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import argparse
-
-def parse_args():
-	parser = argparse.ArgumentParser(description="Kmers mpl filter")
-	parser.add_argument("-om", "--one-min", default=3, type=int, help="min kmer mpl in one sample")
-	parser.add_argument("-am", "--all-min", default=3, type=int, help="min kmer mpl in all samples")
-	parser.add_argument("-kl", "--kmer-len", default=31, type=int, help="kmer length")
-	parser.add_argument("samples_dir", help="directory with samples")
-	parser.add_argument("output", help="output files prefix")
-	args = parser.parse_args()
-	return args
-
-def calc_mpl(args):
-	if not os.path.exists(args.samples_dir):
-		os.makedirs(args.samples_dir)
-
-	files = [f for f in os.listdir(args.samples_dir) if os.path.isfile(os.path.join(args.samples_dir, f))]
-
-	cmd = "/home/toxa31/work/algorithmic-biology/assembler/src/kmer_count_filter/kmer_count_filter -kl {} -one-min {} -all-min {}".format(
-		args.kmer_len, args.one_min, args.all_min)
-
-	for f in files:
-		cmd = cmd + " " + args.samples_dir + "/" + f
-
-	cmd = cmd + " " + args.output
-
-	print(cmd)
-
-	os.system(cmd)
-
-def main():
-	args = parse_args()
-	calc_mpl(args)
-
-main()
\ No newline at end of file
diff --git a/src/projects/mts/scripts/canopy_launch.sh b/src/projects/mts/scripts/canopy_launch.sh
deleted file mode 100755
index 5f17acc..0000000
--- a/src/projects/mts/scripts/canopy_launch.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-if [ "$#" -lt 3 ]; then
-    echo "Usage: script.sh <canopy.in> <canopy.out> <canopy.prof> [thread_cnt = 4]"
-    exit
-fi
-
-thread_cnt=4
-if [ "$#" -ge 4 ]; then
-    thread_cnt=$4
-fi
-
-/home/snurk/soft/mgs-canopy-algorithm/src/cc.bin -n $thread_cnt -i $1 -o $2 -c $3 #--max_canopy_dist 0.1 --max_close_dist 0.4 --max_merge_dist 0.05 --min_step_dist 0.01 --max_num_canopy_walks 3 --stop_fraction 1 --canopy_size_stats_file stat --filter_min_obs 1 --filter_max_dominant_obs 1.0
-
-#/home/snurk/soft/canopy/cc.bin -n 32 -i $1 -o bin_canopy -c prof_canopy --max_canopy_dist 0.1 --max_close_dist 0.4 --max_merge_dist 0.05 --min_step_dist 0.01 --max_num_canopy_walks 3 --stop_fraction 1 --canopy_size_stats_file stat --filter_min_obs 1 --filter_max_dominant_obs 1.0
-
-#/home/ygorshkov/Projects/canopy/cc.bin -n 32 -i canopy_mod.in -o bin_canopy -c prof_canopy --max_canopy_dist 0.1 --max_close_dist 0.4 --max_merge_dist 0.1 --min_step_dist 0.005 --max_num_canopy_walks 5 --stop_fraction 1 --canopy_size_stats_file stat
diff --git a/src/projects/mts/scripts/choose_bins.py b/src/projects/mts/scripts/choose_bins.py
new file mode 100755
index 0000000..729645e
--- /dev/null
+++ b/src/projects/mts/scripts/choose_bins.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+from __future__ import (print_function)
+
+import re
+import sys
+
+from common import contig_length
+import numpy
+import pandas
+from pandas import DataFrame
+
+in_fn = sys.argv[1]
+d = pandas.read_table(sys.argv[1], names=["name", "bin"], dtype=str)
+d["sample"] = d.apply(lambda row: re.findall("\\w+\\d+", row["name"])[0], axis=1)
+d["length"] = d.apply(lambda row: contig_length(row["name"]), axis=1)
+del d["name"]
+info = d.groupby(["bin", "sample"], as_index=False).sum()
+info = info.groupby("bin", as_index=False)["length"].max()
+info = info[info["length"] > 500000]
+info.to_csv(sys.stdout, sep="\t", header=False, index=False)
diff --git a/src/projects/mts/scripts/choose_samples.py b/src/projects/mts/scripts/choose_samples.py
index cd58c54..46b6422 100755
--- a/src/projects/mts/scripts/choose_samples.py
+++ b/src/projects/mts/scripts/choose_samples.py
@@ -1,61 +1,90 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import (print_function)
 
-import glob
 from operator import itemgetter
-from os import path
-import subprocess
+#import subprocess
+import os.path
 import sys
+import yaml
 
 if len(sys.argv) < 3:
-    print("Usage: choose_samples.py <canopy.prof> <binning dir> [CAGS+]")
+    print("Usage: choose_samples.py <input table> <input bins> <output table> <output dir> ")
     exit(1)
 
 PROF = sys.argv[1]
-DIR = sys.argv[2]
-CAGS = None
-if len(sys.argv) == 4:
-    CAGS = set(sys.argv[3:])
-DESIRED_ABUNDANCE = 50
+FILTERED_BINS = sys.argv[2]
+PROF_OUT = sys.argv[3]
+DIR = sys.argv[4]
+BINS = set()
+with open(FILTERED_BINS) as input:
+    for line in input:
+        bin = line.split()[0]
+        BINS.add(bin)
+
+DESIRED_ABUNDANCE = 999999 #sys.maxsize
 MIN_ABUNDANCE = 4
-MIN_TOTAL_ABUNDANCE = 20
+MIN_TOTAL_ABUNDANCE = 15
+
+prof_dict = dict()
+
+make_excluded = True
+excluded_dir = os.path.join(DIR, "excluded")
 
 #Assuming that samples are enumerated consecutively from 1 to N
+#(it is forced by the pipeline)
 with open(PROF) as input:
     for line in input:
+        exclude = False
+        samples = []
         params = line.split()
-        CAG = params[0]
-        if CAGS and CAG not in CAGS:
-            continue
-        profile = map(float, params[1:])
+        print(params)
+        bin = params[0]
+        profile = list(map(float, params[1:]))
+        if bin not in BINS:
+            print(bin, "was excluded from reassembly")
+            exclude = True
+        else:
+            print("Profile of", bin, ":", profile)
 
-        print("Profile of", CAG, ":", profile)
+            #Sort samples by their abundancies
+            weighted_profile = list((i, ab)
+                for i, ab in enumerate(profile) if ab >= MIN_ABUNDANCE) #and path.exists("{}/{}/sample{}_1.fastq".format(DIR, CAG, i + 1)))
+            weighted_profile.sort(key = itemgetter(1))
 
-        weighted_profile = list((i, ab)
-            for i, ab in enumerate(profile) if ab >= MIN_ABUNDANCE and path.exists("{}/{}/sample{}_1.fastq".format(DIR, CAG, i + 1)))
-        weighted_profile.sort(key = itemgetter(1))
+            total = 0
+            #If we have overabundant samples, use the least.
+            try:
+                i = next(x for x, _ in weighted_profile if profile[x] >= DESIRED_ABUNDANCE)
+                total = profile[i]
+                samples = [i + 1]
+            except StopIteration:
+                #If there isn't any, collect from samples, starting from the largest
+                for i, _ in reversed(weighted_profile):
+                    total += profile[i]
+                    samples.append(i + 1)
+                    if total >= DESIRED_ABUNDANCE:
+                        break
 
-        sum = 0
-        samples = []
-        #If we have overabundant samples, use the least.
-        try:
-            i = next(x for x, _ in weighted_profile if profile[x] >= DESIRED_ABUNDANCE)
-            sum = profile[i]
-            samples = [i + 1]
-        except StopIteration:
-            #If there isn't any, collect from samples, starting from the largest
-            for i, _ in reversed(weighted_profile):
-                sum += profile[i]
-                samples.append(i + 1)
-                if sum >= DESIRED_ABUNDANCE:
-                    break
-
-        print("Chosen samples are", samples, "with total mean abundance", sum)
-        if sum < MIN_TOTAL_ABUNDANCE:
-            print(CAG, "is too scarce; skipping")
-            continue
-
-        for suf, name in [("1", "left"), ("2", "right")]:
-            reads = ["{}/{}/sample{}_{}.fastq".format(DIR, CAG, sample, suf) for sample in samples]
-            with open("{}/{}/{}.fastq".format(DIR, CAG, name), "w") as output:
-                subprocess.check_call(["cat"] + reads, stdout=output)
+            print("Chosen samples are", samples, "with total mean abundance", total)
+            prof_dict[bin] = total
+
+            if total < MIN_TOTAL_ABUNDANCE:
+                print(bin, "is too scarce; skipping")
+                exclude = True
+
+        config_dir = DIR
+        if exclude:
+            if make_excluded and not os.path.isdir(excluded_dir):
+                os.mkdir(excluded_dir)
+            make_excluded = False
+            config_dir = excluded_dir
+        config_path = os.path.join(config_dir, bin + ".info")
+        with open(config_path, "w") as out:
+            print("total", sum(profile), file=out)
+            for i, ab in enumerate(profile, start=1):
+                if i in samples:
+                    print("+", file=out, end="")
+                print("sample" + str(i), ab, file=out)
+
+with open(PROF_OUT, "w") as prof_out:
+    yaml.dump(prof_dict, prof_out)
diff --git a/src/projects/mts/scripts/clusters2csv.py b/src/projects/mts/scripts/clusters2csv.py
new file mode 100755
index 0000000..daca2ec
--- /dev/null
+++ b/src/projects/mts/scripts/clusters2csv.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import sys
+
+from Bio import SeqIO
+
+from os import listdir
+from os.path import isfile, join
+
+if len(sys.argv) < 3:
+    print("Usage: %s <cluster directory> <output> " % sys.argv[0])
+    sys.exit(1)
+
+path = sys.argv[1]
+
+with open(sys.argv[2], "w") as output:
+    for f in listdir(path):
+        if isfile(join(path, f)) and f.endswith("fna"):
+            cluster = f.split(".")[0].split("_")[-1]
+            record_dict = SeqIO.to_dict(SeqIO.parse(join(path, f), "fasta"))
+            for k in record_dict.keys():
+                print(str(k) + "," + str(cluster), file=output)
diff --git a/src/projects/mts/scripts/combine_contigs.py b/src/projects/mts/scripts/combine_contigs.py
index 16b448f..7134db5 100755
--- a/src/projects/mts/scripts/combine_contigs.py
+++ b/src/projects/mts/scripts/combine_contigs.py
@@ -2,8 +2,9 @@
 from __future__ import print_function
 import sys
 import os
-import re
+import os.path
 from Bio import SeqIO
+from common import sample_name
 
 replace = False
 
@@ -13,16 +14,13 @@ if sys.argv[1] == "-r":
 else:
     files = sys.argv[1:]
 
-sample_re = re.compile("sample\d+")
-
 output = sys.stdout
 
 for file in files:
-    sample = sample_re.search(file).group(0)
     for seq in SeqIO.parse(file, "fasta"):
         seq_id = seq.id
         if replace:
             seq_id = seq_id.replace(",", "~")
-        seq.id = sample + "-" + seq_id
+        seq.id = sample_name(file) + "-" + seq_id
         seq.description = ""
         SeqIO.write(seq, output, "fasta")
diff --git a/src/projects/mts/scripts/common.py b/src/projects/mts/scripts/common.py
index 4146665..c22ff63 100644
--- a/src/projects/mts/scripts/common.py
+++ b/src/projects/mts/scripts/common.py
@@ -6,24 +6,40 @@ except:
 
 import os
 import os.path
-try:
-    import yaml
-    def load_dict(input):
-        return yaml.load(input)
-    def dump_dict(dict, output):
-        yaml.dump(dict, output)
-except:
-    def load_dict(input):
-        def load_pairs():
-            for line in input:
-                params = line.split(":", 2)
-                yield (params[0].strip(), params[1].strip())
-        return dict(load_pairs())
-    def dump_dict(dict, output):
-        for k, v in dict.items():
-            print(k, ": ", v, sep="", file=output)
-
-FASTA_EXTS = {".fasta", ".fa", ".fna", ".fsa", ".fastq", ".fastq.gz", ".fq", ".fq.gz", ".fna.gz"}
+import re
+
+default_values = {
+    "threads":     16,
+    "assembly":    {"assembler": "spades", "k": 55, "groups": []},
+    "profile":     {"profiler": "mts", "k": 21, "split": 10000},
+    "binning":     {"binner": "canopy", "min_length": 2000, "min_nonzeroes": 3},
+    "propagation": {"enabled": True},
+    "reassembly":  {"enabled": True}
+}
+
+# Taken from http://stackoverflow.com/questions/36831998/how-to-fill-default-parameters-in-yaml-file-using-python
+def setdefault_recursively(tgt, default = default_values):
+    for k in default:
+        if isinstance(default[k], dict): # if the current item is a dict,
+            # expand it recursively
+            setdefault_recursively(tgt.setdefault(k, {}), default[k])
+        else:
+            # ... otherwise simply set a default value if it's not set before
+            tgt.setdefault(k, default[k])
+
+def fill_default_values(config):
+    local_dir = config.get("LOCAL_DIR")
+    if local_dir:
+        default_values["bin"] = os.path.join(local_dir, "build/release/bin")
+        default_values["scripts"] = os.path.join(local_dir, "src/projects/mts/scripts")
+        default_values["assembly"]["dir"] = os.path.join(local_dir, "bin")
+    setdefault_recursively(config)
+    config["reassembly"].setdefault("dir", config["assembly"].get("dir"))
+
+def sample_name(fullname):
+    return os.path.splitext(os.path.basename(fullname))[0]
+
+FASTA_EXTS = {".fasta", ".fasta.gz", ".fa", ".fna", ".fsa", ".fastq", ".fastq.gz", ".fq", ".fq.gz", ".fna.gz"}
 def gather_paths(path, basename=False):
     for filename in os.listdir(path):
         name = os.path.basename(filename)
@@ -54,7 +70,7 @@ def gather_refs(data):
             for ref in gather_paths(data, True):
                 yield ref
         else:
-            yield (os.path.splitext(os.path.basename(data))[0], data)
+            yield (sample_name(data), data)
 
 def get_id(internal_id, sample):
     res = internal_id.split("_", 2)[1]
@@ -65,7 +81,7 @@ def load_annotation(file, normalize=True):
     sample, _ = os.path.splitext(os.path.basename(file))
     with open(file) as input:
         for line in input:
-            info = line.split(" : ")
+            info = line.split("\t")
             id = get_id(info[0], sample) if normalize else info[0]
             bins = info[1].split()
             if id in res:
@@ -74,6 +90,13 @@ def load_annotation(file, normalize=True):
                 res[id] = set(bins)
     return res
 
+def contig_length(name):
+    if "(" in name:
+        start, end = re.search("\((\d+)_(\d+)\)", name).groups()
+        return int(end) - int(start)
+    else:
+        return int(name.split("_")[3])
+
 class Row:
     def __init__(self, data, colnames):
         self.data = data
diff --git a/src/projects/mts/scripts/contig_name_filter.py b/src/projects/mts/scripts/contig_name_filter.py
new file mode 100755
index 0000000..09f606b
--- /dev/null
+++ b/src/projects/mts/scripts/contig_name_filter.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import sys
+from Bio import SeqIO
+
+if len(sys.argv) < 4:
+    print("Usage:", sys.argv[0], "<contigs_file> <file with names> <output> [<operation mode>]")
+    print("Operation mode is \"retain\" (default) or \"remove\"")
+    sys.exit(1)
+
+f_n = sys.argv[1]
+names_f = open(sys.argv[2], "r")
+names = set(l.strip() for l in names_f.readlines())
+input_seq_iterator = SeqIO.parse(open(f_n, "r"), "fasta")
+
+filtered_iterator = (record for record in input_seq_iterator \
+                      if record.name in names)
+
+if (len(sys.argv) == 5):
+    if sys.argv[4] == "remove":
+        filtered_iterator = (record for record in input_seq_iterator \
+                      if record.name not in names)
+    else:
+        if sys.argv[4] != "retain":
+            print("Wrong operation mode")
+
+output_handle = open(sys.argv[3], "w")
+SeqIO.write(filtered_iterator, output_handle, "fasta")
+output_handle.close()
diff --git a/src/projects/mts/scripts/convert_output.py b/src/projects/mts/scripts/convert_output.py
new file mode 100755
index 0000000..1d1e26b
--- /dev/null
+++ b/src/projects/mts/scripts/convert_output.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import argparse
+import os.path
+import re
+
+argparser = argparse.ArgumentParser(description="Binner output formatter")
+argparser.add_argument("--type", "-t", choices=["canopy", "concoct", "maxbin", "gattaca", "binsanity"], help="Binner type", default="canopy")
+argparser.add_argument("--output", "-o", type=str, help="Output directory with unified binning results")
+argparser.add_argument("input", type=str, help="File with binning info")
+
+extract_num = re.compile("\d+")
+
+class Parser:
+    def __init__(self):
+        self.bins = []
+
+    def add(self, line):
+        sample_contig, bin_id = self.parse(line)
+        bin_num = extract_num.findall(bin_id)[0]
+        self.bins.append(sample_contig + "\t" + "BIN" + bin_num)
+
+    def parse_file(self, file):
+        with open(file, "r") as input_file:
+            for line in input_file:
+                self.add(line)
+
+class CanopyParser(Parser):
+    def parse(self, line):
+        annotation_str = line.split()
+        bin_id = annotation_str[0].strip()
+        sample_contig = annotation_str[1].strip()
+        return (sample_contig, bin_id)
+
+class ConcoctParser(Parser):
+    def parse(self, line):
+        annotation_str = line.split(",", 1)
+        bin_id = annotation_str[1].strip()
+        sample_contig = annotation_str[0].replace("~", ",")
+        return (sample_contig, bin_id)
+
+parsers = {"canopy": CanopyParser(), "concoct": ConcoctParser(), "maxbin": ConcoctParser(), "gattaca": ConcoctParser(), "binsanity": ConcoctParser()}
+
+if __name__ == "__main__":
+    args = argparser.parse_args()
+    parser = parsers[args.type]
+
+    parser.parse_file(args.input)
+
+    with open(args.output, "w") as sample_out:
+        for sample in parser.bins:
+            print(sample, file=sample_out)
diff --git a/src/projects/mts/scripts/cut_fasta.py b/src/projects/mts/scripts/cut_fasta.py
new file mode 100755
index 0000000..23cd076
--- /dev/null
+++ b/src/projects/mts/scripts/cut_fasta.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python2
+"""Cut up fasta file in non-overlapping or overlapping parts of equal length.
+"""
+import argparse
+from Bio import SeqIO
+
+
+def cut_up_fasta(fastfiles, chunk_size, overlap, merge_last):
+    for ff in fastfiles:
+        for record in SeqIO.parse(ff, "fasta"):
+            if (not merge_last and len(record.seq) > chunk_size) or (merge_last and len(record.seq) >= 2 * chunk_size):
+                i = 0
+                for split_seq in chunks(record.seq, chunk_size, overlap, merge_last):
+                    start = i*chunk_size
+                    end = start + len(split_seq)
+                    print ">%s_(%i_%i)\n%s" % (record.id, start, end, split_seq)
+                    i = i + 1
+            else:
+                print ">%s\n%s" % (record.id, record.seq)
+
+
+def chunks(l, n, o, merge_last):
+    """ Yield successive n-sized chunks from l with given overlap o between the
+    chunks.
+    """
+    assert n > o
+
+    if not merge_last:
+        for i in xrange(0, len(l), n - o):
+            yield l[i:i + n]
+    else:
+        for i in xrange(0, len(l) - n + 1, n - o):
+            yield l[i:i + n] if i + n + n - o <= len(l) else l[i:]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__,
+                formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        "contigs", nargs="+", help="Fasta files with contigs\n")
+    parser.add_argument("-c", "--chunk_size", default=1999, type=int, help="Chunk size\n")
+    parser.add_argument("-o", "--overlap_size", default=1900, type=int, help="Overlap size\n")
+    parser.add_argument("-m", "--merge_last", default=False, action="store_true", help="Concatenate final part to last contig\n")
+    args = parser.parse_args()
+    cut_up_fasta(args.contigs, args.chunk_size, args.overlap_size, args.merge_last)
diff --git a/src/projects/mts/scripts/filter_bin.py b/src/projects/mts/scripts/filter_bin.py
new file mode 100755
index 0000000..5fa6c09
--- /dev/null
+++ b/src/projects/mts/scripts/filter_bin.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import sys
+from Bio import SeqIO
+import common
+
+def print_usage():
+        print("Usage: filter_bins.py <contigs> <binning info> <bin name>")
+
+contigs = sys.argv[1]
+binning = common.load_annotation(sys.argv[2], False)
+bin_name = sys.argv[3]
+
+for seq in SeqIO.parse(contigs, "fasta"):
+    if bin_name in binning.get(seq.id, set()):
+        SeqIO.write(seq, sys.stdout, "fasta")
diff --git a/src/projects/mts/scripts/filter_nucmer.py b/src/projects/mts/scripts/filter_nucmer.py
index eae66a1..fc3eb6b 100755
--- a/src/projects/mts/scripts/filter_nucmer.py
+++ b/src/projects/mts/scripts/filter_nucmer.py
@@ -7,48 +7,48 @@ from os import path
 
 def print_usage():
     print("For a sample assembly aligned to a reference, outputs only contigs which were aligned more than <threshold> percent of their length total, and that percent.")
-    print("Usage: filter_nucmer.py <nucmer coords filtered> <output file> <length> <threshold>")
+    print("Usage: filter_nucmer.py <nucmer coords filtered> <length> <threshold>")
     print("Parameters:")
     print("<length> is minimal contig length (default: INF)")
     print("<threshold> is the minimal total alignment of a contig (0-100%)")
 
-if len(sys.argv) != 5:
+if len(sys.argv) != 4:
     print_usage()
     sys.exit(1)
 
 nucmer_output_fn = sys.argv[1]
-output_fn = sys.argv[2]
-min_length = int(sys.argv[3])
-threshold = float(sys.argv[4])
+min_length = int(sys.argv[2])
+threshold = float(sys.argv[3])
 
 if not path.exists(nucmer_output_fn):
     print("File {} doesn't exist".format(nucmer_output_fn))
     sys.exit(2)
 
 with open(nucmer_output_fn, "r") as nucmer_output:
-    with open(output_fn, "w") as output:
-        align_data = re.compile("\d+ \d+ \| \d+ \d+ \| \d+ (\d+) \| [\d.]+ \| [^ ]+ NODE_(\d+)_length_(\d+)")
-        contig = None
-        contig_len = 0
-        align_len = 0
-        def process_contig():
-            per = 100.0 * align_len / contig_len
-            if per > threshold and contig_len >= min_length:
-                print("{}\t{}\t{}".format(contig, contig_len, per), file=output)
-                return align_len
-            return 0
-        for line in nucmer_output:
-            res = align_data.search(line)
-            if res is None:
-                continue
-            new_contig = res.group(2)
-            if contig != new_contig:
-                if contig is not None:
-                    process_contig()
-                contig = new_contig
-                contig_len = int(res.group(3))
-                align_len = 0
-            #Assuming that all alignments of the same contig are consequent
-            align_len += int(res.group(1))
-        #Print the last contig separately
-        process_contig()
+    align_data = re.compile("\d+ \d+ \| \d+ \d+ \| \d+ (\d+) \| [\d.]+ \| [^ ]+ (NODE_(\d+)_length_(\d+)_.*$)")
+    contig = None
+    name = ""
+    contig_len = 0
+    align_len = 0
+    def process_contig():
+        per = 100.0 * align_len / contig_len
+        if per > threshold and contig_len >= min_length:
+            print("{}\t{}\t{}".format(name, contig_len, per))
+            return align_len
+        return 0
+    for line in nucmer_output:
+        res = align_data.search(line)
+        if res is None:
+            continue
+        new_contig = res.group(3)
+        if contig != new_contig:
+            if contig is not None:
+                process_contig()
+            contig = new_contig
+            name = res.group(2)
+            contig_len = int(res.group(4))
+            align_len = 0
+        #Assuming that all alignments of the same contig are consequent
+        align_len += int(res.group(1))
+    #Print the last contig separately
+    process_contig()
diff --git a/src/projects/mts/scripts/gather_stats.py b/src/projects/mts/scripts/gather_stats.py
index a65c1a5..3167912 100755
--- a/src/projects/mts/scripts/gather_stats.py
+++ b/src/projects/mts/scripts/gather_stats.py
@@ -1,28 +1,102 @@
 #!/usr/bin/env python
+from __future__ import (print_function)
 
+import numpy as np
 import pandas
 from pandas import DataFrame
 
-from math import isnan
+import argparse
 import os.path
+from operator import add
 import sys
 
-quast_dir = sys.argv[1]
+parser = argparse.ArgumentParser(description="MTS - Metagenomic Time Series")
+parser.add_argument("dir", type=str, help="QUAST output directory")
+parser.add_argument("name", type=str, help="Output base name")
+parser.add_argument("--problematic", action="store_true", help="Problematic references report")
+parser.add_argument("--heatmap", action="store_true", help="Best reference summary table")
 
+args = parser.parse_args()
+
+# Write summary table with correspondence between bins and their best references
 res_table = DataFrame(columns=["bin", "ref", "GF", "purity", "NGA50", "misassemblies"])
-gf_table = pandas.read_table(os.path.join(quast_dir, "summary", "TSV", "Genome_fraction_(%).tsv"), dtype=str).set_index("Assemblies")
+gf_table = pandas.read_table(os.path.join(args.dir, "summary", "TSV", "Genome_fraction_(%).tsv"), dtype=str).set_index("Assemblies")
 gfs = gf_table.apply(pandas.to_numeric, errors="coerce")
+#Drop zeroes
+gfs.fillna(0, inplace=True)
+gfs = gfs.loc[gfs.apply(lambda row: row.sum() > 0, axis=1), gfs.apply(lambda col: col.sum() > 0)]
+
 best_ref = gfs.apply(lambda col: col.idxmax())
 
+with open(args.name + "_best.tsv", "w") as out_file:
+    best_ref.to_csv(out_file, sep="\t")
+
 for bin, ref in best_ref.iteritems():
     if type(ref) is float:
         row = {"bin": bin, "GF": "-", "ref": "unknown", "purity": "-", "NGA50": "-", "misassemblies": "-"}
     else:
-        all_stats = pandas.read_table(os.path.join(quast_dir, "runs_per_reference", ref, "report.tsv"), index_col=0)
+        all_stats = pandas.read_table(os.path.join(args.dir, "runs_per_reference", ref, "report.tsv"), index_col=0)
         col = all_stats.get(bin)
         purity = 100 - float(col["Unaligned length"]) / float(col["Total length"]) * 100
         row = {"bin": bin, "GF": col["Genome fraction (%)"], "ref": ref, "purity": "{0:.2f}".format(purity),
                "NGA50": col["NGA50"], "misassemblies": col["# misassemblies"]}
     res_table = res_table.append(row, ignore_index=True)
 
-res_table.to_csv(sys.stdout, index=False, sep="\t")
+with open(args.name + "_summary.tsv", "w") as out_file:
+    res_table.to_csv(out_file, index=False, sep="\t")
+
+# (Optional) Draw GF heatmap
+if args.heatmap:
+    try:
+        import matplotlib
+        # Force matplotlib to not use any Xwindows backend.
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        import seaborn as sns
+        from sklearn.cluster.bicluster import SpectralCoclustering
+        model = SpectralCoclustering(n_clusters=gfs.shape[1], random_state=0)
+        model.fit(gfs.as_matrix())
+        fit_data = gfs.iloc[np.argsort(model.row_labels_), np.argsort(model.column_labels_)]
+
+        plot = sns.heatmap(fit_data, square=True)
+        fig = plot.get_figure()
+        fig.savefig(args.name + "_gf.png", bbox_inches="tight")
+        plt.gcf().clear()
+    except:
+        print("Can't import matplotlib and/or seaborn; heatmap drawing will be disabled")
+        args.heatmap = False
+
+# (Optional) Write summary for problematic references
+if args.problematic:
+    BAD_THRESHOLD = 90
+    ZERO_THRESHOLD = 5
+    total_gf_ref = gfs.sum(1)
+    max_gf_ref = gfs.max(1)
+    nonzeroes = gfs.applymap(lambda x: x > ZERO_THRESHOLD)
+    nonzeroes_cnt_ref = nonzeroes.sum(1)
+    good_refs = list()
+    with open(args.name + "_problems.txt", "w") as out_file:
+        for ref, gf in total_gf_ref.iteritems():
+            if max_gf_ref[ref] < BAD_THRESHOLD:
+                if gf < BAD_THRESHOLD:
+                    print(ref, "is underassembled: at least", 100 - gf, "% GF was lost", file=out_file)
+                else:
+                    print(ref, "is fractured: best bin is only", max_gf_ref[ref], "% GF", file=out_file)
+                continue
+            if nonzeroes_cnt_ref[ref] > 1:
+                print(ref, "is presented in", nonzeroes_cnt_ref[ref], "bins", file=out_file)
+                continue
+            good_refs.append(ref)
+        nonzeroes_cnt_bin = nonzeroes.sum(0)
+        good_bins = list()
+        for bin, cnt in nonzeroes_cnt_bin.iteritems():
+            if cnt > 1:
+                print(bin, "is a mixture of", cnt, "references", file=out_file) #TODO: which ones?
+            else:
+                good_bins.append(bin)
+    if args.heatmap:
+        bad_table = gfs.drop(good_refs, axis=0).drop(good_bins, axis=1)
+        if bad_table.size:
+            plot = sns.heatmap(bad_table, square=True)
+            fig = plot.get_figure()
+            fig.savefig(args.name + "_bad.png", bbox_inches="tight")
diff --git a/src/projects/mts/scripts/gen_samples.py b/src/projects/mts/scripts/gen_samples.py
index f975b73..0e8483a 100755
--- a/src/projects/mts/scripts/gen_samples.py
+++ b/src/projects/mts/scripts/gen_samples.py
@@ -13,12 +13,9 @@ from scipy.stats import expon
 
 def gen_profile(args):
     if args.distribution == "uni":
-        #def rand():
-        #    return random.randint(0, args.scale)
-        pass
+        rand = lambda: random.randint(0, args.scale)
     elif args.distribution == "exp":
-        def rand():
-            return int(expon.rvs(scale=args.scale))
+        rand = lambda: int(expon.rvs(scale=args.scale))
 
     refs = dict(gather_refs(args.references))
     if args.dump_desc:
@@ -40,39 +37,58 @@ def gen_samples(args):
     read_len = args.read_length
     adj_qual = "2" * read_len + "\n"
 
+    table = None
+
     with open(args.profile) as input:
         first_line = True
         for line in input:
             params = line.split()
-            ref_name = params[0]
-            ref_path = refs.get(ref_name)
-            if not ref_path:
-                print("Warning: no reference provided for", ref_name)
+            ref = params[0]
+            if not refs.get(ref):
+                print("Warning: no reference provided for", ref)
                 continue
-            for i, abundance in enumerate(map(int, params[1:]), start=1):
-                ref_len = os.stat(ref_path).st_size
-                reads = ref_len * abundance // read_len
-                print("Generating", reads, "reads for subsample", i, "of", ref_name)
-                sample_dir = os.path.join(args.out_dir, "sample" + str(i))
-                if first_line:
-                    shutil.rmtree(sample_dir, ignore_errors=True)
-                    subprocess.check_call(["mkdir", "-p", sample_dir])
+            if first_line:
+                table = [None] * (len(params) - 1)
+                first_line = False
+            for i, abundance in enumerate(map(int, params[1:])):
+                if not table[i]:
+                    table[i] = dict()
+                table[i][ref] = abundance
+
+    for i, abundancies in enumerate(table, start=1):
+        print("Generating reads for", i, "sample")
+        sample_dir = os.path.join(args.out_dir, "sample" + str(i))
+        shutil.rmtree(sample_dir, ignore_errors=True)
+        subprocess.check_call(["mkdir", "-p", sample_dir])
+        outs = [None] * 2
+        for dir, name in enumerate([os.path.join(sample_dir, "r1.fastq"), os.path.join(sample_dir, "r2.fastq")]):
+            if args.gzip:
+                name = name + ".gz"
+            output = open(name, "wb")
+            if args.gzip:
+                output = subprocess.Popen("gzip", stdin=subprocess.PIPE, stdout=output).stdin
+            outs[dir] = output
+
+        for ref, abundance in abundancies.items():
+            ref_path = refs.get(ref)
+            ref_len = os.stat(ref_path).st_size
+            reads = ref_len * abundance // read_len
+            print("Generating", reads, "reads for subsample", i, "of", ref)
 
-                temp_1 = sample_dir + ".tmp.r1.fastq"
-                temp_2 = sample_dir + ".tmp.r2.fastq"
-                subprocess.check_call(["wgsim", "-N", str(reads), "-r", "0", "-1", str(read_len), "-2", str(read_len), "-d", "300", "-s", "10", "-e", "{:.2f}".format(args.error_rate), "-S", str(i), ref_path, temp_1, temp_2], stdout=subprocess.DEVNULL)
+            temp_1 = sample_dir + ".tmp.r1.fastq"
+            temp_2 = sample_dir + ".tmp.r2.fastq"
+            subprocess.check_call(["wgsim", "-N", str(reads), "-r", "0", "-1", str(read_len), "-2", str(read_len), "-d", "300", "-s", "10", "-e", "{:.2f}".format(args.error_rate), "-S", str(i), ref_path, temp_1, temp_2], stdout=subprocess.DEVNULL)
 
-                print("Merging temporary files")
-                for temp, out in [(temp_1, os.path.join(sample_dir, "r1.fastq")), (temp_2, os.path.join(sample_dir, "r2.fastq"))]:
-                    with open(temp) as input, open(out, "a") as output:
-                        for line in input:
-                            if line.startswith("IIIII"): #TODO: remove this hack
-                                output.write(adj_qual)
-                            else:
-                                output.write(line)
-                    os.remove(temp)
+            print("Merging temporary files")
+            for temp, output in zip([temp_1, temp_2], outs):
+                with open(temp) as input:
+                    for line in input:
+                        if line.startswith("IIIII"): #TODO: remove this hack
+                            output.write(adj_qual.encode())
+                        else:
+                            output.write(line.encode())
+                os.remove(temp)
             print()
-            first_line = False
 
 parser = argparse.ArgumentParser(description="Metagenomic Time Series Simulator")
 parser.add_argument("--references", "-r", type=str, help="Comma-separated list of references, or a directory with them, or a desc file with reference paths prepended with @", required=True)
@@ -89,6 +105,7 @@ gen_samples_args = subparsers.add_parser("gen", help="Generate reads using a pro
 gen_samples_args.add_argument("--out-dir", "-o", type=str, help="Output directory. Will be totally overwritten!")
 gen_samples_args.add_argument("--read-length", "-l", type=int, help="Read length", default=100)
 gen_samples_args.add_argument("--error-rate", "-e", type=float, help="Base error rate", default=0)
+gen_samples_args.add_argument("--gzip", "-z", action="store_true", help="Compress resulted files")
 gen_samples_args.add_argument("profile", type=str, help="File with reference profiles")
 gen_samples_args.set_defaults(func=gen_samples)
 
diff --git a/src/projects/mts/scripts/make_input.py b/src/projects/mts/scripts/make_input.py
index ae6984c..e6e0fae 100755
--- a/src/projects/mts/scripts/make_input.py
+++ b/src/projects/mts/scripts/make_input.py
@@ -5,15 +5,20 @@ try:
 except ImportError:
     pass
 
+import re
 import argparse
 import os
+import os.path
 import sys
 
+from common import contig_length
+
 parser = argparse.ArgumentParser(description="Binner input formatter")
-parser.add_argument("--type", "-t", type=str, help="Binner type (canopy or concoct)", default="canopy")
+
+parser.add_argument("--type", "-t", choices=["canopy", "concoct", "gattaca", "binsanity"], help="Binner type", default="canopy")
+parser.add_argument("--count", "-n", type=int, help="Number of data samples")
 parser.add_argument("--output", "-o", type=str, help="Output file")
-parser.add_argument("--dir", "-d", type=str, help="Directory with profiles (pairs of .id .mpl files)")
-parser.add_argument("samples", type=str, nargs="+", help="Sample names")
+parser.add_argument("profiles", type=str, help="Groups profiles in .tsv format")
 
 args = parser.parse_args()
 
@@ -25,7 +30,7 @@ class CanopyFormatter:
         pass
 
     def profile(self, file, contig, profile):
-        print(contig, profile, file=out)
+        print(contig, " ".join(profile), file=out)
 
 class ConcoctFormatter:
     def __init__(self):
@@ -35,19 +40,35 @@ class ConcoctFormatter:
         print("\t".join(["contig"] + ["cov_mean_" + sample for sample in samples]), file=out)
 
     def profile(self, file, contig, profile):
-        print(contig.replace(",", "~"), profile.replace(" ", "\t"), sep="\t", file=out)
+        print(contig, *profile, sep="\t", file=out)
 
-formatters = {"canopy": CanopyFormatter(), "concoct": ConcoctFormatter()}
-formatter = formatters[args.type]
+class BinSanityFormatter:
+    def __init__(self):
+        pass
 
-with open(args.output, "w") as out:
-    formatter.header(out, args.samples)
-    for sample in args.samples:
-        id_file = "{}/{}.id".format(args.dir, sample)
-        mpl_file = "{}/{}.mpl".format(args.dir, sample)
+    def header(self, file, samples):
+        pass
 
-        print("Processing abundances from %s" % id_file)
+    def profile(self, file, contig, profile):
+        print(contig, *profile, sep="\t", file=out)
 
-        with open(id_file, "r") as ctg_id, open(mpl_file, "r") as ctg_mpl:
-            for cid, cmpl in zip(ctg_id, ctg_mpl):
-                formatter.profile(out, sample + "-" + cid.strip(), cmpl.strip())
+class GattacaFormatter:
+    def __init__(self):
+        pass
+
+    def header(self, file, samples):
+        print("\t".join(["contig", "length"] + ["cov_mean_" + sample for sample in samples]), file=out)
+
+    def profile(self, file, contig, profile):
+        l = contig_length(contig)
+        print(contig, l, *profile, sep="\t", file=out)
+
+formatters = {"canopy": CanopyFormatter(), "concoct": ConcoctFormatter(), "gattaca": GattacaFormatter(), "binsanity": BinSanityFormatter()}
+formatter = formatters[args.type]
+
+with open(args.output, "w") as out:
+    formatter.header(out, ["sample" + str(i) for i in range(1, args.count + 1)])
+    with open(args.profiles, "r") as input:
+        for line in input:
+            params = line.strip().split("\t")
+            formatter.profile(out, params[0], params[1:])
diff --git a/src/projects/mts/scripts/make_points_matrix.py b/src/projects/mts/scripts/make_points_matrix.py
deleted file mode 100755
index 875462b..0000000
--- a/src/projects/mts/scripts/make_points_matrix.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-
-import random
-
-ctg = open("canopy/contigs.in", "r")
-ctr = open("canopy/clusters.out", "r")
-
-out = open("canopy/points_matrix.csv", "w")
-
-ctg_to_ctr = dict()
-
-while True:
-	s = ctr.readline().strip()
-	if (s == ""):
-		break
-	a = s.split()
-	ctr_id = a[0][3:]
-
-	if (random.randint(1, 25) == 1):
-		ctg_to_ctr[a[1]] = ctr_id
-
-while True:
-	s = ctg.readline().strip()
-	if s == "":
-		break
-
-	a = s.split()
-	if (a[0] in ctg_to_ctr):
-		out.write(ctg_to_ctr[a[0]])
-		for x in a[1:]:
-			out.write("," + x)
-
-		out.write("\n")
-
-out.close()
\ No newline at end of file
diff --git a/src/projects/mts/scripts/parse_output.py b/src/projects/mts/scripts/parse_output.py
deleted file mode 100755
index 17c44bd..0000000
--- a/src/projects/mts/scripts/parse_output.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-from __future__ import print_function
-
-import argparse
-import os.path
-
-argparser = argparse.ArgumentParser(description="Binner output formatter")
-argparser.add_argument("--type", "-t", type=str, help="Binner type (canopy or concoct)", default="canopy")
-argparser.add_argument("--output", "-o", type=str, help="Output directory with annotations")
-argparser.add_argument("input", type=str, help="File with binning info")
-
-class Parser:
-    def __init__(self):
-        self.samples_annotation = dict()
-
-    def add(self, line):
-        sample_contig, bin_id = self.parse(line)
-        sample_contig = sample_contig.split('-', 1)
-        sample = sample_contig[0]
-        contig = sample_contig[1]
-        if sample not in self.samples_annotation:
-            self.samples_annotation[sample] = dict()
-
-        annotation = self.samples_annotation[sample]
-        if contig not in annotation:
-            annotation[contig] = list()
-
-        annotation[contig].append(bin_id)
-
-class CanopyParser(Parser):
-    def parse(self, line):
-        annotation_str = line.split()
-        bin_id = annotation_str[0].strip()
-        sample_contig = annotation_str[1].strip()
-        return (sample_contig, bin_id)
-
-class ConcoctParser(Parser):
-    def parse(self, line):
-        annotation_str = line.split(",", 1)
-        bin_id = annotation_str[1].strip()
-        sample_contig = annotation_str[0].replace("~", ",")
-        return (sample_contig, bin_id)
-
-parsers = {"canopy": CanopyParser(), "concoct": ConcoctParser()}
-
-args = argparser.parse_args()
-parser = parsers[args.type]
-
-with open(args.input, "r") as input_file:
-    for line in input_file:
-        parser.add(line)
-
-for sample, annotation in parser.samples_annotation.items():
-    with open(os.path.join(args.output, sample + ".ann"), "w") as sample_out:
-        annotation = parser.samples_annotation[sample]
-
-        for contig in annotation:
-            print(contig, ":", " ".join(annotation[contig]), file=sample_out)
diff --git a/src/projects/mts/scripts/pca.R b/src/projects/mts/scripts/pca.R
index 1d41f86..aed26c6 100644
--- a/src/projects/mts/scripts/pca.R
+++ b/src/projects/mts/scripts/pca.R
@@ -5,22 +5,40 @@ format_ids <- function(table) {
   unique(table)
 }
 
-load_binning <- function(canopy_in, canopy_out) {
-  data <- read.table(canopy_in)
+# my_normalize<-function(X) {
+#   X_norm<-X
+#   #column normalisation
+#   X_norm<-t(t(X_norm) / ifelse(colSums(X_norm) == 0, 1, colSums(X_norm)))
+#   #row normalisation
+#   X_norm<-X_norm / rowSums(X_norm)
+#   #mean/variance normalisation
+#   #X_norm<-exprs(standardise(ExpressionSet(X_norm)))
+#   #my variant of mean/var normalisation
+#   #X_norm<-t(as.matrix(scale(t(X_norm))))
+#   return(X_norm)
+# }
+
+normalize <- function(X) {
+  return (X / rowSums(X))
+}
+
+load_binning <- function(profiles_in, binning_out) {
+  data <- read.table(profiles_in)
+  data[,-1] <- normalize(data[,-1])
   names(data) <- c('contig', sapply(seq(1, dim(data)[2]-1, 1),
                                     function(x) {paste('mlt', x, sep='')}))
   data <- format_ids(data)
-  binned <- read.table(canopy_out)
-  names(binned) <- c('clust', 'contig')
+  binned <- read.table(binning_out)
+  names(binned) <- c('contig', 'bin')
   binned <- format_ids(binned)
   merge(x=data, y=binned, by='contig')
 }
 
-load_clusters <- function(canopy_in, canopy_out, int_contigs) {
-  data <- load_binning(canopy_in, canopy_out)
+load_clusters <- function(profiles_in, binning_out, int_contigs) {
+  data <- load_binning(profiles_in, binning_out)
   if (missing(int_contigs)) {
-    pieces <- split(data, data$clust)[1:10]
-    lims <- lapply(pieces, function(x) head(x, 50))
+    pieces <- split(data, data$bin)[1:10]
+    lims <- lapply(pieces, function(x) head(x, 500))
     do.call(rbind, c(lims, list(make.row.names=FALSE)))
   } else {
     interesting <- read.table(int_contigs)
@@ -33,14 +51,14 @@ do_prc <- function(clusters) {
   prcomp(~ ., data = clusters[, grep('mlt', colnames(clusters))])
 }
 
-print_clusters <- function(pr, clust, image) {
+print_clusters <- function(pr, bin, image) {
   if (!missing(image))
     png(filename=image, width=1024, height=768)
-  lev <- levels(factor(clust))
+  lev <- levels(factor(bin))
   cols <- 1:length(lev)
   #layout(rbind(1,2), heights=c(7,1))
-  plot(pr$x, col = as.numeric(clust), xlim=c(-100, 200), ylim=c(-50,50))
-  a <- split(as.data.frame(pr$x), clust)
+  plot(pr$x, col = as.numeric(bin))#, xlim=c(-100, 200), ylim=c(-50,50))
+  a <- split(as.data.frame(pr$x), bin)
   for (l in lev) {
     x <- a[[l]]
     text(median(x$PC1), median(x$PC2), l)
@@ -56,7 +74,7 @@ local_data <- function() {
                             "/Volumes/Chihua-Sid/mts/out/70p_3.log")
 
   prc_data <- do_prc(clusters)
-  print_clusters(prc_data, clusters$clust)
+  print_clusters(prc_data, clusters$bin)
   prc_data
 }
 
@@ -72,6 +90,5 @@ if (length(args) < 4) {
   clusters <- load_clusters(in_fn, out_fn, cont_fn)
 }
 
-print(clusters[1:10,])
 prc_data <- do_prc(clusters)
-print_clusters(prc_data, clusters$clust, image_out)
+print_clusters(prc_data, clusters$bin, image_out)
diff --git a/src/projects/mts/scripts/ref_stats.sh b/src/projects/mts/scripts/ref_stats.sh
deleted file mode 100755
index 59dcb8d..0000000
--- a/src/projects/mts/scripts/ref_stats.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#/bin/bash
-
-if [ "$#" -lt 3 ]; then
-    echo "Usage: identify.sh <assemblies_folder> <refs_folder> <out_dir>"
-    exit 1
-fi
-
-CTG_LENGTH_THR=5000
-process_cnt=4
-thread_cnt=8
-assemblies_folder=$1
-refs_folder=$2
-#canopy_out=$3
-out_dir=$3
-
-folder=$out_dir/metaquast
-
-export LC_ALL=C
-mkdir -p $out_dir
-
-~/git/quast/metaquast.py --debug -R $refs_folder -o $out_dir/metaquast $assemblies_folder/*.fasta
-
-#awk ' {print $2,$1} ' $canopy_out | sort > $folder/clusters.txt
-
-rm -rf $out_dir/ref_summary.txt
-
-for ref in $refs_folder/*.fasta ; do
-    echo "Processing reference $ref" 
-    ref_name=$(basename "$ref")
-    ref_name="${ref_name%.*}"
-
-    rm -rf $out_dir/${ref_name}.ctgs
-
-    #for sample in $assemblies_out_dir/sample9.fasta ; do
-    for sample in $assemblies_folder/*.fasta ; do 
-        sample_name=$(basename "$sample")
-        sample_name="${sample_name%.*}"
-        aligned=$out_dir/metaquast/quast_corrected_input/${sample_name}_to_${ref_name}.fasta
-        ~/git/ngs_scripts/contig_length_filter.py $CTG_LENGTH_THR $aligned $out_dir/long.fasta.tmp
-        ~/git/ngs_scripts/contig_info.py $out_dir/long.fasta.tmp $out_dir/ctg.info.tmp
-        sed_command="s/ID_/${sample_name}-/g"
-        grep -Eo "ID_.*$" $out_dir/ctg.info.tmp | sed -e $sed_command >> $out_dir/${ref_name}.ctgs
-        rm $out_dir/long.fasta.tmp
-        rm $out_dir/ctg.info.tmp
-    done
-
-    sed 's/$/ '"${ref_name}"'/g' $out_dir/${ref_name}.ctgs >> $out_dir/ref_summary.txt
-
-    #sort $out_dir/${ref_name}.ctgs.tmp > $out_dir/${ref_name}.ctgs
-
-    #join $out_dir/${ref_name}.ctgs $out_dir/clusters.txt | awk ' { print $2 } ' | sort | uniq -c | sort -nr | head -10 
-
-    #join $out_dir/${ref_name}.ctgs $out_dir/clusters.txt > $out_dir/join.txt 
-    #awk ' { print $2 } ' $out_dir/join.txt | sort | uniq -c | sort -nr | head -10 
-
-    report=$out_dir/metaquast/runs_per_reference/$ref_name/report.txt
-
-    grep "Assembly" $report
-    grep "Genome fraction" $report
-done
-
-#rm -rf $out_dir
-echo "Finished"
diff --git a/src/projects/mts/scripts/run_tsne.py b/src/projects/mts/scripts/run_tsne.py
new file mode 100755
index 0000000..ac96655
--- /dev/null
+++ b/src/projects/mts/scripts/run_tsne.py
@@ -0,0 +1,239 @@
+__author__ = 'tanunia'
+
+
+import copy
+import argparse
+import numpy as np
+import pandas as pd
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot
+from matplotlib.backends.backend_pdf import PdfPages
+import bhtsne
+import matplotlib.patches as mpatches
+import matplotlib.cm as cm
+
+def draw_points(points, names, fig):
+    ax = fig.add_subplot(111)
+    for i in xrange(len(points)):
+        ax.annotate(names[i], xy=points[i], textcoords='data')
+
+def points_mean(x, y):
+    x_mean = sum(x)*1.0/len(x)
+    y_mean = sum(y)*1.0/len(y)
+
+    return x_mean, y_mean
+
+def find_cluster_centers(mp):
+    points = []
+    names = []
+    for c in mp.keys():
+        names.append("Cl-" + str(c))
+        x, y = points_mean(mp[c]['x'], mp[c]['y'])
+        points.append([x, y])
+    return points, names
+
+def divide_by_cluster(names, clusters):
+    res = {}
+    for i in xrange(len(clusters)):
+        c = clusters[i]
+        if not c in res.keys():
+            res[c] = []
+        res[c].append(names[i])
+
+    return res
+
+def take_first_per(clusters, per = 0.1):
+    res = []
+    for c in clusters.keys():
+        for i in xrange(max(int(len(clusters[c])*0.1), min(10, len(clusters[c])) ) ):
+            res.append(clusters[c][i])
+
+    return res
+
+
+
+def divide_by_color(x, y, color):
+    res = {}
+
+    for i in xrange(len(color)):
+        c = color[i]
+        if not c in res.keys():
+            res[c] = {}
+            res[c]["x"] = []
+            res[c]["y"] = []
+        res[c]["x"].append(x[i])
+        res[c]["y"].append(y[i])
+
+    return res
+
+def form_points(df):
+    x = df["x"].tolist()
+    y = df["y"].tolist()
+    names = [z[8:18] for z in df[0].tolist()]
+    points = zip(x, y)
+
+    return points, names
+
+import re
+extract_num = re.compile("\d+")
+
+def run_tsne(features_file, colors_file, output_prefix
+             , filter_sample=[]
+             , filter_cluster=[]
+             , lst=[]
+             , draw_per = 1.0
+             , iter = 1000
+             , perplexity = 50):
+    # read data
+    data_df = pd.read_table(features_file, header=None)
+    cluster_colors = pd.read_table(colors_file, header=None)
+    print(data_df.head())
+
+    # make dataframe pretty
+    cluster_colors = cluster_colors.rename(columns={1:'color'})
+    cluster_colors["color"] = [int(extract_num.findall(str(x))[0]) for x in cluster_colors["color"].tolist()]
+    print(cluster_colors.head())
+    #cluster_colors = cluster_colors.rename(columns={0:0})
+
+    # filter by samples
+    if len(filter_sample) > 0:
+        filter1 = []
+        for x in cluster_colors[0].tolist():
+            for it in filter_sample:
+                st = "sample" + it + "-"
+                if x.startswith(st):
+                    filter1.append(x)
+        cluster_colors = cluster_colors[cluster_colors[0].isin(filter1)]
+
+    # filter by percent
+    if draw_per < 1:
+        clusters = divide_by_cluster(cluster_colors[0].tolist(), cluster_colors["color"].tolist())
+        filter2 = take_first_per(clusters, lst)
+        s = set(filter2)
+        lst_new = []
+        for n in lst:
+            for x in cluster_colors[0].tolist():
+                if x.startswith(n):
+                    print x
+                    lst_new.append(x)
+                    if x not in s:
+                        filter2.append(x)
+        lst = lst_new
+        cluster_colors = cluster_colors[cluster_colors[0].isin(filter2)]
+
+
+    # merge data
+    mapped = pd.merge(cluster_colors, data_df, on=0)
+
+    # filter by length
+    mapped["length"] = [int(x.split("_")[3]) for x in mapped[0].tolist()]
+    mapped = mapped[mapped["length"] > 2000]
+    print(mapped)
+
+    # normalize like in CONCOCT
+    data = mapped.as_matrix(columns=mapped.columns[2:-1])
+
+    v = (1.0/mapped["length"]).as_matrix()[:, np.newaxis]
+    data = data + v
+    along_Y = np.apply_along_axis(sum, 0, data)
+    data = data/along_Y[None, :]
+    along_X = np.apply_along_axis(sum, 1, data)
+    data = data/along_X[:, None]
+    data = np.log(data)
+    #print(data)
+
+    embedding_array = bhtsne.run_bh_tsne(data, initial_dims=data.shape[1], perplexity=perplexity, max_iter=iter)
+    mapped["x"] = embedding_array[:, 0]
+    mapped["y"] = embedding_array[:, 1]
+
+    # draw result of TSNE on scatter plot
+
+    pp = PdfPages(output_prefix)
+
+
+    # filter clusters to show
+    fc = filter_cluster
+    if len(fc) > 0:
+        filtered = mapped[mapped["color"].isin(fc)]
+        #mapped = filtered
+    else:
+        filtered = mapped
+
+    fig = pyplot.figure()
+
+    # draw scatter plot
+    color = mapped["color"].tolist()
+    mx_color = max(color)
+    pyplot.scatter(mapped["x"].tolist(), mapped["y"].tolist(), c=[cm.spectral(float(i) /mx_color) for i in color])
+
+    # make a legend for specific clusters
+    # find cluster centers
+    x = filtered["x"].tolist()
+    y = filtered["y"].tolist()
+    mp = divide_by_color(x, y, filtered["color"].tolist())
+    points, names = find_cluster_centers(mp)
+    patches = []
+    dcolors = list(set(color))
+    for c in dcolors:
+        if c in fc and len(fc) < 5:
+            patches.append(mpatches.Patch(color=cm.spectral(float(c)/mx_color), label='C-'+ str(c)))
+    pyplot.legend(handles=patches)
+    draw_points(points, names, fig)
+
+    # mark specific points
+    filtered = mapped[mapped[0].isin(lst)]
+    pyplot.scatter(filtered["x"].tolist(), filtered["y"].tolist(), marker="p", edgecolors='black', c=[cm.spectral(float(i) /mx_color) for i in filtered["color"].tolist()])
+
+
+    pyplot.title('Perp = '+ str(perplexity)+ ' Iter = ' + str(iter))
+    pp.savefig()
+
+    pp.close()
+
+def get_points(file):
+    if file == "":
+        return []
+    else:
+        points = []
+        fin = open(file, "r")
+        for l in fin.readlines():
+            points.append(l.strip())
+        fin.close()
+        return points
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("profile", help="profile information (depth)")
+    parser.add_argument("binning", help="file with binning results")
+    parser.add_argument("output", help="path to pdf-file to save graph")
+    parser.add_argument("-p", "--percent", help="sets size of random subsample from profile to run TSNE",
+                        type=float,
+                        default=1.0)
+    parser.add_argument("-i", "--iteration", help="number of TSNE iterations",
+                        type=int,
+                        default=1000)
+    parser.add_argument("-e", "--perplexity", help="TSNE perplexity",
+                        type=float,
+                        default=50)
+    parser.add_argument("-s", "--samples", help="run TSNE only on samples from the list",
+                        nargs='+',
+                        default=[])
+    parser.add_argument("-c", "--clusters", help="draw only clusters from the list",
+                        nargs='+',
+                        default=[])
+    parser.add_argument("-f", "--pointsfile", help="highlight specific points on the graph",
+                        default="")
+
+    args = parser.parse_args()
+    points = get_points(args.pointsfile)
+    run_tsne(args.profile, args.binning, args.output
+             , args.samples
+             , args.clusters
+             , points
+             , args.percent
+             , args.iteration
+             , args.perplexity)
+
+if __name__ == "__main__":
+    main()
diff --git a/src/projects/mts/scripts/split_bins.py b/src/projects/mts/scripts/split_bins.py
index dea8914..15db9b2 100755
--- a/src/projects/mts/scripts/split_bins.py
+++ b/src/projects/mts/scripts/split_bins.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import os
@@ -9,11 +9,14 @@ import common
 import subprocess
 
 def print_usage():
-        print("Usage: split_bins.py <contigs> <binning info> <output directory>")
+        print("Usage: split_bins.py <contigs> <binning info> <output directory> [-p]")
 
 contigs = sys.argv[1]
 sample, _ = path.splitext(path.basename(contigs))
 out_dir = sys.argv[3]
+prepend_name = False
+if len(sys.argv) > 4 and sys.argv[4] == "-p":
+    prepend_name = True
 
 binning = common.load_annotation(sys.argv[2], False)
 
@@ -22,9 +25,12 @@ subprocess.call("rm -f {}/{}-*.fasta".format(out_dir, sample), shell=True)
 cags = set()
 for seq in SeqIO.parse(contigs, "fasta"):
     seq_id = seq.id
-    seq.id = sample + "-" + seq_id
-    #seq.id = common.get_id(seq.id, sample)
-    seq.description = ""
+    if prepend_name:
+        seq.id = sample + "-" + seq_id
+        seq.description = ""
     for cag in binning.get(seq_id, []):
-        with open(path.join(out_dir, "{}-{}.fasta".format(sample, cag)), "a") as output:
+        filename = cag + ".fasta"
+        if prepend_name:
+            filename = sample + "-" + filename
+        with open(path.join(out_dir, filename), "a") as output:
             SeqIO.write(seq, output, "fasta")
diff --git a/src/projects/mts/scripts/validate.pl b/src/projects/mts/scripts/validate.pl
new file mode 100755
index 0000000..838a0a2
--- /dev/null
+++ b/src/projects/mts/scripts/validate.pl
@@ -0,0 +1,404 @@
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Long;
+
+my $tFile = '';
+my $zFile = '';
+my $fFile = '';
+my $help = '';
+my $quiet = '';
+my $outFile = "Conf.csv";
+
+my $USAGE = <<"USAGE";
+Usage: ./Validate.pl --cfile=clustering.csv --sfile=species.csv --ffile=Contigs.fasta
+
+Regular options:
+--ffile    Contigs.fasta weight statistics by contig lengths
+--ofile    filename -- outputfile for confusion matrix default Conf.csv
+--quiet             -- suppress variable names
+--help
+
+USAGE
+
+GetOptions("ffile=s"   => \$fFile, "cfile=s"   => \$tFile,"sfile=s"  => \$zFile, "ofile=s" => \$outFile, 'quiet' => \$quiet, 'help' => \$help) or die("Error in command line arguments\n");
+
+if ($help ne '') {print $USAGE;}
+
+die $USAGE unless ($tFile ne '' && $zFile ne '');
+
+my @t = ();
+my $maxt = 0;
+my $N = 0;
+my $S = 0;
+my %hashCluster = {};
+my @ctotals = ();
+
+
+my @Seq         = ();
+my @id          = ();
+my %hashLengths = {};
+my $count = 0;
+if($fFile ne ''){
+    open(FILE, $fFile) or die "Can't open $fFile\n";
+
+    my $seq = "";
+
+    while(my $line = <FILE>){
+        chomp($line);
+
+        if($line =~ />(.*)/){
+
+            $id[$count] = $1;
+
+            if($seq ne ""){
+                $Seq[$count - 1] = $seq;
+
+                $seq = "";
+            }
+
+            $count++;
+        }
+        else{
+            $seq .= $line;
+        }
+    }
+    close(FILE);
+
+    $Seq[$count - 1] = $seq;
+    my $stotal = $count;
+
+
+    for(my $i = 0; $i < $stotal; $i++){
+        my $iid = $id[$i];
+        my $slength = length($Seq[$i]);
+        $hashLengths{$iid}  = $slength;
+    }
+}
+
+open(FILE, $tFile) or die "Can't open $tFile";
+
+while(my $line = <FILE>){
+  $N++;
+  chomp($line);
+
+  my @tokens = split(/\t/,$line);
+
+  my $name = $tokens[0];
+  my $cluster = $tokens[1];
+  $cluster =~ s/\D//g;
+  $ctotals[$cluster]++;
+  $hashCluster{$name} = $cluster;
+  #print "$name $cluster\n";
+  if($cluster > $maxt){
+    $maxt = $cluster;
+  }
+}
+
+close(FILE);
+
+open(FILE, $zFile) or die "Can't open $zFile";
+
+my $tweight = 0.0;
+my %hashC = {};
+my $count = 0;
+while(my $line = <FILE>){
+    chomp($line);
+    my @tokens = split(/\t/,$line);
+
+    my $name = $tokens[0];
+    if($hashCluster{$name} ne undef){
+        my $tcluster = $hashCluster{$name};
+
+        my $genus = $tokens[3];
+
+        my $l = 0.0;
+        if($hashLengths{$name} ne undef){
+            $l = $hashLengths{$name};
+        }
+        else{
+            $l = 1.0;
+        }
+
+        if($hashC{$genus} eq undef){
+            my @temp = ();
+
+            for(my $i = 0; $i < $maxt + 1; $i++){
+	            $temp[$i] = 0;
+            }
+
+            $temp[$tcluster]+=$l;
+
+            $hashC{$genus} = \@temp;
+        }
+        else{
+            @{$hashC{$genus}}[$tcluster]+=$l;
+        }
+        $count++;
+        $tweight += $l;
+        $S++;
+    }
+}
+
+close(FILE);
+
+my $classcount = 0;
+my @cluster = ();
+my $j = 0;
+
+open(OUTFILE,">$outFile") or die "Can't open $outFile\n";
+
+printf OUTFILE "Taxa,";
+
+my @names = ();
+
+for(my $i = 0; $i < $maxt + 1; $i++){
+  if($ctotals[$i] > 0){
+	push(@names,"D$i");
+  }
+}
+
+my $nameString = join(",", at names);
+
+printf OUTFILE "$nameString\n";
+
+my $nJ = 0;
+my $nI = 0;
+
+foreach my $key(sort keys %hashC){
+  if($hashC{$key} ne undef){
+    my @temp = @{$hashC{$key}};
+    my $ptotal = 0;
+    $nI = 0;
+    for(my $i = 0; $i < $maxt + 1; $i++){
+      $ptotal += $temp[$i];
+    }
+
+    if($ptotal > 0){
+      my @vals = ();
+      for(my $i = 0; $i < $maxt + 1; $i++){
+	if($ctotals[$i] > 0){
+		$cluster[$nI][$nJ] = $temp[$i];
+		push(@vals,$temp[$i]);
+		$nI++;
+	}
+      }
+      $nJ++;
+
+      my $cTemp = join(",", at vals);
+
+      print OUTFILE "$key,$cTemp\n";
+    }
+  }
+}
+
+close(OUTFILE);
+
+if($quiet eq ''){
+  printf("N\tM\tTL\tS\tK\tRec.\tPrec.\tNMI\tRand\tAdjRand\n");
+}
+
+my $NK = scalar(@cluster);;
+my $NS = scalar(@{$cluster[0]});
+
+printf("%d\t%d\t%.4e\t%d\t%d\t%f\t%f\t%f\t%f\t%f\n",$N,$S,$tweight,$NS,$NK,recall(@cluster),precision(@cluster),nmi(@cluster),randindex(@cluster),adjrandindex(@cluster));
+
+sub precision(){
+   my @cluster = @_;
+   my $nN = 0;
+   my $nC = scalar(@cluster);
+   my $nK = scalar(@{$cluster[0]});
+   my $precision = 0;
+
+   for(my $i = 0; $i < $nC; $i++){
+     my $maxS = 0;
+
+     for(my $j = 0; $j < $nK; $j++){
+       if($cluster[$i][$j] > $maxS){
+	 $maxS = $cluster[$i][$j];
+       }
+
+       $nN += $cluster[$i][$j];
+     }
+     $precision += $maxS;
+   }
+
+   return $precision/$nN;
+}
+
+sub recall(){
+   my @cluster = @_;
+   my $nN = 0;
+   my $nC = scalar(@cluster);
+   my $nK = scalar(@{$cluster[0]});
+   my $recall = 0;
+
+   for(my $i = 0; $i < $nK; $i++){
+     my $maxS = 0;
+
+     for(my $j = 0; $j < $nC; $j++){
+       if($cluster[$j][$i] > $maxS){
+	 $maxS = $cluster[$j][$i];
+       }
+
+       $nN += $cluster[$j][$i];
+     }
+
+     $recall += $maxS;
+   }
+
+   return $recall/$nN;
+}
+
+sub choose2{
+  my $N = shift;
+  my $ret = $N*($N - 1);
+
+  return int($ret/2);
+}
+
+sub randindex{
+ my @cluster = @_;
+ my @ktotals = ();
+ my @ctotals = ();
+ my $nN = 0;
+ my $nC = scalar(@cluster);
+ my $nK = scalar(@{$cluster[0]});
+ my $cComb = 0;
+ my $kComb = 0;
+ my $kcComb = 0;
+
+ for(my $i = 0; $i < $nK; $i++){
+   $ktotals[$i] = 0;
+   for(my $j = 0; $j < $nC; $j++){
+     $ktotals[$i]+=$cluster[$j][$i];
+   }
+   $nN += $ktotals[$i];
+   $kComb += choose2($ktotals[$i]);
+ }
+
+
+ for(my $i = 0; $i < $nC; $i++){
+   $ctotals[$i] = 0;
+   for(my $j = 0; $j < $nK; $j++){
+     $ctotals[$i]+=$cluster[$i][$j];
+   }
+   $cComb += choose2($ctotals[$i]);
+ }
+
+ for(my $i = 0; $i < $nC; $i++){
+   for(my $j = 0; $j < $nK; $j++){
+     $kcComb += choose2($cluster[$i][$j]);
+   }
+ }
+
+ my $nComb = choose2($nN);
+
+ return ($nComb - $cComb - $kComb + 2*$kcComb)/$nComb;
+
+}
+
+sub adjrandindex{
+ my @cluster = @_;
+ my @ktotals = ();
+ my @ctotals = ();
+ my $nN = 0;
+ my $nC = scalar(@cluster);
+ my $nK = scalar(@{$cluster[0]});
+ my $cComb = 0;
+ my $kComb = 0;
+ my $kcComb = 0;
+
+ for(my $i = 0; $i < $nK; $i++){
+   $ktotals[$i] = 0;
+   for(my $j = 0; $j < $nC; $j++){
+     $ktotals[$i]+=$cluster[$j][$i];
+   }
+   $nN += $ktotals[$i];
+   $kComb += choose2($ktotals[$i]);
+ }
+
+
+ for(my $i = 0; $i < $nC; $i++){
+   $ctotals[$i] = 0;
+   for(my $j = 0; $j < $nK; $j++){
+     $ctotals[$i]+=$cluster[$i][$j];
+   }
+   $cComb += choose2($ctotals[$i]);
+ }
+
+ for(my $i = 0; $i < $nC; $i++){
+   for(my $j = 0; $j < $nK; $j++){
+     $kcComb += choose2($cluster[$i][$j]);
+   }
+ }
+
+ my $nComb = choose2($nN);
+
+ my $temp = ($kComb*$cComb)/$nComb;
+
+ my $ret = $kcComb - $temp;
+
+ return $ret/(0.5*($cComb + $kComb) - $temp);
+
+}
+
+
+
+sub nmi{
+  my @cluster = @_;
+  my @ktotals = ();
+  my @ctotals = ();
+  my $nN = 0;
+  my $nC = scalar(@cluster);
+  my $nK = scalar(@{$cluster[0]});
+  my $HC = 0.0;
+  my $HK = 0.0;
+
+  for(my $i = 0; $i < $nK; $i++){
+    $ktotals[$i] = 0;
+    for(my $j = 0; $j < $nC; $j++){
+      $ktotals[$i]+=$cluster[$j][$i];
+    }
+    $nN += $ktotals[$i];
+  }
+
+
+  for(my $i = 0; $i < $nC; $i++){
+    $ctotals[$i] = 0;
+    for(my $j = 0; $j < $nK; $j++){
+      $ctotals[$i]+=$cluster[$i][$j];
+    }
+    my $dFC = $ctotals[$i]/$nN;
+    if($dFC > 0.0){
+      $HC += -$dFC*log($dFC);
+    }
+  }
+
+  for(my $i = 0; $i < $nK; $i++){
+    my $dFK = $ktotals[$i]/$nN;
+    if($dFK > 0.0){
+      $HK += -$dFK*log($dFK);
+    }
+  }
+
+
+  my $NMI = 0.0;
+
+  for(my $i = 0; $i < $nK; $i++){
+    my $NMII = 0.0;
+
+    for(my $j = 0; $j < $nC; $j++){
+      if($ctotals[$j] >0 && $ktotals[$i] > 0){
+	my $dF = ($nN*$cluster[$j][$i])/($ctotals[$j]*$ktotals[$i]);
+	if($dF > 0.0){
+	  $NMII += $cluster[$j][$i]*log($dF);
+	}
+      }
+    }
+    $NMII /= $nN;
+    $NMI += $NMII;
+  }
+
+  return (2.0*$NMI)/($HC + $HK);
+}
diff --git a/src/projects/mts/stats.cpp b/src/projects/mts/stats.cpp
index 603da47..688a675 100644
--- a/src/projects/mts/stats.cpp
+++ b/src/projects/mts/stats.cpp
@@ -7,8 +7,8 @@
 
 #include "pipeline/graphio.hpp"
 #include "pipeline/graph_pack.hpp"
-#include "utils/simple_tools.hpp"
-#include "utils/path_helper.hpp"
+#include "utils/stl_utils.hpp"
+#include "utils/filesystem/path_helper.hpp"
 #include "utils/logger/log_writers.hpp"
 #include "math/xmath.h"
 #include <iostream>
@@ -34,7 +34,7 @@ io::SingleRead ReadSequence(io::SingleStream& reader) {
 }
 
 io::SingleRead ReadGenome(const string& genome_path) {
-    path::CheckFileExistenceFATAL(genome_path);
+    fs::CheckFileExistenceFATAL(genome_path);
     auto genome_stream_ptr = std::make_shared<io::FileReadStream>(genome_path);
     return ReadSequence(*genome_stream_ptr);
 }
@@ -122,7 +122,7 @@ int main(int argc, char** argv) {
            << "Propagated edges\tPropagated length" << endl;
 
     for (const auto genome_path : genomes_path) {
-        auto ref_name = path::basename(genome_path);
+        auto ref_name = fs::basename(genome_path);
         io::SingleRead genome = ReadGenome(genome_path);
 
         visualization::position_filler::FillPos(gp, genome_path, "", true);
diff --git a/src/projects/mts/test.py b/src/projects/mts/test.py
index 8c0c19f..29defdc 100755
--- a/src/projects/mts/test.py
+++ b/src/projects/mts/test.py
@@ -4,12 +4,12 @@ from __future__ import print_function
 import argparse
 import os
 import os.path
+import pwd
 import re
 import shutil
 import sys
 import subprocess
 from traceback import print_exc
-import yaml
 
 from scripts.common import Table
 
@@ -18,8 +18,10 @@ class Log:
     text = ""
 
     def log(self, s):
-        self.text += s + "\n"
-        print(s)
+        msg = s + "\n"
+        self.text += msg
+        sys.stdout.write(msg)
+        sys.stdout.flush()
 
     def warn(self, s):
         msg = "WARNING: " + s
@@ -82,41 +84,32 @@ def compile_mts(workdir):
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--config", "-c", help="Config template")
+    parser.add_argument("--data", "-c", help="Directory with config and saves")
     parser.add_argument("dir", help="Output directory")
-    parser.add_argument("--saves", "-s", type=str)
-    parser.add_argument("--no-clean", action="store_true")
-    parser.add_argument("--etalons", "-e", type=str, help="Directory of GF etalons")
+    parser.add_argument("--saves", "-s", action="store_true", help="Reuse assemblies and/or profiles from data directory")
+    parser.add_argument("--multirun", "-m", action="store_true", help="Enable multiruns")
+    parser.add_argument("--no-clean", action="store_true", help="Do not clean the prvieous run")
+    parser.add_argument("--etalons", "-e", action="store_true", help="Compare with etalons")
+
     args = parser.parse_args()
     return args
 
-def prepare_config(args, workdir):
-    with open(os.path.join(args.config)) as template:
-        params = yaml.load(template)
-        params["BIN"] = os.path.join(workdir, "build/release/bin")
-        params["SCRIPTS"] = os.path.join(workdir, "src/projects/mts/scripts")
-        with open(os.path.join(args.dir, "config.yaml"), "w") as config:
-            config.write(yaml.dump(params))
-
 def run_mts(args, workdir):
+    mts_args = ["./multirun.py"] if args.multirun else ["./mts.py"]
     if not args.no_clean:
         shutil.rmtree(args.dir, True)
+        mts_args.extend(["--config", os.path.join(args.data, "config.yaml")])
     if not os.path.exists(args.dir):
         os.mkdir(args.dir)
-        prepare_config(args, workdir)
-    mts_args = ["./mts.py", "--stats", args.dir]
     if args.saves:
-        log.log("Copying saves from" + args.saves)
-        for saves_dir in ["assembly", "reassembly"]:
-            full_dir = os.path.join(args.saves, saves_dir)
-            if os.path.isdir(full_dir):
-                #shutil.copytree(os.path.join(args.saves, saves_dir), os.path.join(args.dir, saves_dir))
-                os.symlink(full_dir, os.path.join(args.dir, saves_dir))
-            else:
-                log.warn("No " + saves_dir + " dir provided; skipping")
-        #Don't touch symlinked assemblies because it may corrupt other runs with the same dependencies
-        #mts_args.append("--reuse-assemblies")
+        for dir_name, saves_arg in [("assembly", "--reuse-assemblies"), ("profile", "--reuse-profiles")]:
+            saves_dir = os.path.join(args.data, dir_name)
+            if os.path.exists(saves_dir):
+                log.log("Reusing {} saves from {}".format(dir_name, saves_dir))
+                mts_args.extend([saves_arg, saves_dir])
+    mts_args.append(args.dir)
     os.chdir(os.path.join(workdir, "src/projects/mts"))
+    log.log("Calling " + " ".join(mts_args))
     return subprocess.call(mts_args)
 
 def check_etalons(args, workdir):
@@ -151,8 +144,9 @@ def check_etalons(args, workdir):
             log.err("GF of {} in {} = {}% is higher than expected {:.2f}%".format(cag, ref, est_val, upper))
             mut.res = 7
 
-    for file in os.listdir(args.etalons):
-        etalon = os.path.join(args.etalons, file)
+    etalons_dir = os.path.join(args.data, "etalons")
+    for file in os.listdir(etalons_dir):
+        etalon = os.path.join(etalons_dir, file)
         estimated = os.path.join(args.dir, "stats", "summary", file)
         log.log("Trying to compare " + etalon + " and " + estimated)
         if not os.path.isfile(estimated):
@@ -185,11 +179,12 @@ if __name__ == "__main__":
         ecode = compile_mts(workdir)
         if ecode != 0:
             log.err("MTS compilation finished abnormally with exit code " + str(ecode))
-            sys.exit(3)
+            sys.exit(2)
 
         ecode = run_mts(args, workdir)
         if ecode != 0:
             log.err("Error while running MTS: " + str(ecode))
+            sys.exit(3)
 
         if args.etalons:
             ecode = check_etalons(args, workdir)
diff --git a/src/projects/mts/visualization.hpp b/src/projects/mts/visualization.hpp
index 8ab87b5..3b43869 100644
--- a/src/projects/mts/visualization.hpp
+++ b/src/projects/mts/visualization.hpp
@@ -34,7 +34,7 @@ public:
         auto ann = annotation_.Annotation(edge);
         std::ostringstream ss;
         std::transform(ann.begin(), ann.end(), ostream_iterator<string>(ss, ":"), [&](bin_id b){
-            return get(color_map_, b);
+            return utils::get(color_map_, b);
         });
         return ss.str();
     }
@@ -63,4 +63,4 @@ void PrintAnnotatedAlongPath(const conj_graph_pack& gp,
     visualization::visualization_utils::WriteComponentsAlongPath<Graph>(gp.g, path, output_prefix, colorer_ptr, labeler);
 }
 
-}
\ No newline at end of file
+}
diff --git a/src/projects/online_vis/debruijn_environment.hpp b/src/projects/online_vis/debruijn_environment.hpp
index 9886b25..0c7eb9b 100644
--- a/src/projects/online_vis/debruijn_environment.hpp
+++ b/src/projects/online_vis/debruijn_environment.hpp
@@ -42,7 +42,7 @@ class DebruijnEnvironment : public Environment {
               max_vertices_(40),
               edge_length_bound_(1000),
               gp_(K, "./tmp", cfg::get().ds.reads.lib_count(), 
-                  "",
+                  std::vector<std::string>(0),
                   cfg::get().flanking_range,
                   cfg::get().pos.max_mapping_gap,
                   cfg::get().pos.max_gap_diff),
diff --git a/src/projects/online_vis/drawing_commands/draw_missasemblies.hpp b/src/projects/online_vis/drawing_commands/draw_missasemblies.hpp
index 9b8ef4f..b86ddc5 100644
--- a/src/projects/online_vis/drawing_commands/draw_missasemblies.hpp
+++ b/src/projects/online_vis/drawing_commands/draw_missasemblies.hpp
@@ -78,7 +78,7 @@ private:
         vector<EdgeId> genome_edges = curr_env.path_finder().FindReadPath(genome_path);
         vector<EdgeId> rc_genome_edges = curr_env.path_finder().FindReadPath(reverse_genome_path);
         vector<EdgeId> rc_and_usual_genome_edges(genome_edges);
-        push_back_all(rc_and_usual_genome_edges, rc_genome_edges);
+        utils::push_back_all(rc_and_usual_genome_edges, rc_genome_edges);
         vector<EdgeId> edges = path.simple_path();
         auto filtered_edges = FilterNonUnique(curr_env.graph(), edges, rc_and_usual_genome_edges);
         if(filtered_edges.size() < 2)
@@ -132,7 +132,7 @@ private:
                 }
 
 
-                DrawPicturesAlongPath(curr_env, path_to_draw, name + "_" + ToString(curr_env.graph().int_id(filtered_edges[i])));
+                DrawPicturesAlongPath(curr_env, path_to_draw, name + "_" + std::to_string(curr_env.graph().int_id(filtered_edges[i])));
                 real_difference = (int)genome_path[index_genome].second.initial_range.start_pos - (int)path[index_contig].second.initial_range.start_pos;
                 INFO("Diff is set to " << real_difference);
                 continue;
@@ -143,7 +143,7 @@ private:
                 real_difference = (int)genome_path[index_genome].second.initial_range.start_pos - (int)path[index_contig].second.initial_range.start_pos;
                 vector<EdgeId> path_to_draw;
                 path_to_draw.push_back(genome_path[index_genome].first);
-                DrawPicturesAlongPath(curr_env, path_to_draw, name + "_" + ToString(curr_env.graph().int_id(filtered_edges[i])));
+                DrawPicturesAlongPath(curr_env, path_to_draw, name + "_" + std::to_string(curr_env.graph().int_id(filtered_edges[i])));
                 INFO("Diff is set to " << real_difference);
             }
             ++i;
diff --git a/src/projects/online_vis/drawing_commands/draw_part_of_genome_command.hpp b/src/projects/online_vis/drawing_commands/draw_part_of_genome_command.hpp
index 1529561..394813d 100644
--- a/src/projects/online_vis/drawing_commands/draw_part_of_genome_command.hpp
+++ b/src/projects/online_vis/drawing_commands/draw_part_of_genome_command.hpp
@@ -18,7 +18,7 @@
 namespace online_visualization {
     class DrawPartOfGenomeCommand : public DrawingCommand {
         private:
-            void CheckPathIntegrity(const omnigraph::de::GraphDistanceFinder<Graph>& dist_finder, EdgeId first_edge, EdgeId second_edge) const {
+            void CheckPathIntegrity(const omnigraph::de::GraphDistanceFinder& dist_finder, EdgeId first_edge, EdgeId second_edge) const {
                 vector<size_t> distances = dist_finder.GetGraphDistancesLengths(first_edge, second_edge);
                 if (distances[0] == 0) {
                     INFO("Edges " << first_edge << " and " << second_edge << " are neighbouring");
diff --git a/src/projects/online_vis/drawing_commands/draw_polymorphic_regions.hpp b/src/projects/online_vis/drawing_commands/draw_polymorphic_regions.hpp
index d719cf8..b064e29 100644
--- a/src/projects/online_vis/drawing_commands/draw_polymorphic_regions.hpp
+++ b/src/projects/online_vis/drawing_commands/draw_polymorphic_regions.hpp
@@ -82,11 +82,11 @@ class DrawPolymorphicRegions : public DrawingCommand {
                         using namespace visualization::visualization_utils;
                         WriteComponentSinksSources(polymorphicRegion,
                                                    curr_env.folder() + "/" +
-                                                           ToString(curr_env.graph().int_id(*polymorphicRegion.vertices().begin())) + ".dot",
+                                                           std::to_string(curr_env.graph().int_id(*polymorphicRegion.vertices().begin())) + ".dot",
                                                    visualization::graph_colorer::DefaultColorer(curr_env.graph()),
                                                    curr_env.labeler());
 
-                        INFO("Component is written to " + curr_env.folder() + ToString(curr_env.graph().int_id(*polymorphicRegion.vertices().begin())) + ".dot");
+                        INFO("Component is written to " + curr_env.folder() + std::to_string(curr_env.graph().int_id(*polymorphicRegion.vertices().begin())) + ".dot");
                     }
 
                     i += curr_env.graph().length(positionSecond.first) - positionSecond.second;
diff --git a/src/projects/online_vis/drawing_commands/draw_poorly_assembled.hpp b/src/projects/online_vis/drawing_commands/draw_poorly_assembled.hpp
index 2eb6ead..4699cb7 100644
--- a/src/projects/online_vis/drawing_commands/draw_poorly_assembled.hpp
+++ b/src/projects/online_vis/drawing_commands/draw_poorly_assembled.hpp
@@ -130,8 +130,8 @@ public:
         make_dir(curr_env.folder());
         string pics_folder = curr_env.folder() + "/" + curr_env.GetFormattedPictureCounter()  + "_" + repeat_info.seq_name + "/";
         make_dir(pics_folder);
-        string pic_name = ToString(repeat_info.local_cnt) + "_" +  ToString(repeat_info.genomic_gap) +
-                "_" + ToString(curr_env.graph().int_id(repeat_info.e1)) + "_" + ToString(curr_env.graph().int_id(repeat_info.e2)) + "_";
+        string pic_name = std::to_string(repeat_info.local_cnt) + "_" +  std::to_string(repeat_info.genomic_gap) +
+                "_" + std::to_string(curr_env.graph().int_id(repeat_info.e1)) + "_" + std::to_string(curr_env.graph().int_id(repeat_info.e2)) + "_";
 
         DrawGap(curr_env, repeat_info.ref_path, pics_folder + pic_name);
     }
@@ -153,7 +153,7 @@ public:
         const Graph& g = curr_env.graph();
         vector<EdgeId> edges;
         edges.push_back(repeat_info.e1);
-        push_back_all(edges, repeat_info.ref_path);
+        utils::push_back_all(edges, repeat_info.ref_path);
         edges.push_back(repeat_info.e2);
         for (EdgeId e : edges) {
             if (!CheckInfo(clustered_pi_idx, repeat_info.e1, e)) {
diff --git a/src/projects/online_vis/errors.hpp b/src/projects/online_vis/errors.hpp
index 5c1ae80..d6c4e11 100644
--- a/src/projects/online_vis/errors.hpp
+++ b/src/projects/online_vis/errors.hpp
@@ -52,7 +52,7 @@ namespace online_visualization {
   }
 
   bool CheckFileExists(const string& file) {
-    if (!path::is_regular_file(file)) {
+    if (!fs::is_regular_file(file)) {
       FireFileDoesNotExist(file);
       return false;
     }
diff --git a/src/projects/online_vis/main.cpp b/src/projects/online_vis/main.cpp
index 7684637..339af99 100644
--- a/src/projects/online_vis/main.cpp
+++ b/src/projects/online_vis/main.cpp
@@ -13,11 +13,11 @@
 #include "utils/stacktrace.hpp"
 #include "pipeline/config_struct.hpp"
 #include "io/reads/io_helper.hpp"
-#include "utils/simple_tools.hpp"
+#include "utils/stl_utils.hpp"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include "utils/memory_limit.hpp"
+#include "utils/perf/memory_limit.hpp"
 #include "io/dataset_support/read_converter.hpp"
 
 #include "debruijn_online_visualizer.hpp"
@@ -27,10 +27,10 @@ void create_console_logger(string const& cfg_filename) {
 
     string log_props_file = cfg::get().log_filename;
 
-    if (!path::FileExists(log_props_file))
-        log_props_file = path::append_path(path::parent_path(cfg_filename), cfg::get().log_filename);
+    if (!fs::FileExists(log_props_file))
+        log_props_file = fs::append_path(fs::parent_path(cfg_filename), cfg::get().log_filename);
 
-    logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+    logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
     lg->add_writer(std::make_shared<console_writer>());
 
     attach_logger(lg);
@@ -43,7 +43,7 @@ int main(int argc, char** argv) {
         VERIFY(argc > 1)
         using namespace online_visualization;
         string cfg_filename = argv[1];
-        path::CheckFileExistenceFATAL(cfg_filename);
+        fs::CheckFileExistenceFATAL(cfg_filename);
 
         cfg::create_instance(cfg_filename);
 
@@ -53,7 +53,7 @@ int main(int argc, char** argv) {
         create_console_logger(cfg_filename);
         cout << "\nGAF (Graph Analysis Framework) started" << endl;
         cout << "Print help to see readme file" << endl;
-        limit_memory(cfg::get().max_memory * GB);
+        utils::limit_memory(cfg::get().max_memory * GB);
 
         DebruijnOnlineVisualizer online_vis;
         online_vis.init();
diff --git a/src/projects/online_vis/online_visualizer.hpp b/src/projects/online_vis/online_visualizer.hpp
index 2d6e337..10b05b0 100644
--- a/src/projects/online_vis/online_visualizer.hpp
+++ b/src/projects/online_vis/online_visualizer.hpp
@@ -58,15 +58,15 @@ class OnlineVisualizer {
   }
 
   inline void init() {
-    string p = path::append_path(cfg::get().load_from, "simplification"); // just for default
+    string p = fs::append_path(cfg::get().load_from, "simplification"); // just for default
 
-    path::make_dir("tmp");
+    fs::make_dir("tmp");
     DEBUG("Adding Commands");
     AddBaseCommands();
     AddSpecificCommands();
     DEBUG("Commands added");
     DEBUG("Adding auto-completion option");
-    InitAutocompletion(command_mapping_.GetCommandNamesList());
+    utils::InitAutocompletion(command_mapping_.GetCommandNamesList());
     //stringstream ss("load default " + p);
     //const Command<Env>& load_command = command_mapping_.GetCommand("load");
     //DEBUG("Loading current environment");
diff --git a/src/projects/online_vis/setting_commands.hpp b/src/projects/online_vis/setting_commands.hpp
index 3d78d6a..8de58e1 100644
--- a/src/projects/online_vis/setting_commands.hpp
+++ b/src/projects/online_vis/setting_commands.hpp
@@ -120,7 +120,7 @@ class SetFolderCommand : public LocalCommand<DebruijnEnvironment> {
             if (!CheckCorrectness(args))
                 return;
             string folder_name = args[1];
-            path::make_dirs(folder_name);
+            fs::make_dirs(folder_name);
             curr_env.set_folder(folder_name);
         }
 };
diff --git a/src/projects/scaffold_correction/main.cpp b/src/projects/scaffold_correction/main.cpp
index 56eca8d..f0d8676 100644
--- a/src/projects/scaffold_correction/main.cpp
+++ b/src/projects/scaffold_correction/main.cpp
@@ -11,9 +11,9 @@
 
 #include "utils/segfault_handler.hpp"
 #include "utils/stacktrace.hpp"
-#include "utils/memory_limit.hpp"
-#include "utils/copy_file.hpp"
-#include "utils/perfcounter.hpp"
+#include "utils/perf/memory_limit.hpp"
+#include "utils/filesystem/copy_file.hpp"
+#include "utils/perf/perfcounter.hpp"
 #include "scaffold_correction.hpp"
 
 #include "pipeline/config_struct.hpp"
@@ -26,7 +26,7 @@
 //FIXME code duplication
 void load_config(const vector<string>& cfg_fns) {
     for (const auto& s : cfg_fns) {
-        path::CheckFileExistenceFATAL(s);
+        fs::CheckFileExistenceFATAL(s);
     }
 
     cfg::create_instance(cfg_fns);
@@ -49,16 +49,16 @@ void create_console_logger(string cfg_filename) {
 
     string log_props_file = cfg::get().log_filename;
 
-    if (!path::FileExists(log_props_file))
-        log_props_file = path::append_path(path::parent_path(cfg_filename), cfg::get().log_filename);
+    if (!fs::FileExists(log_props_file))
+        log_props_file = fs::append_path(fs::parent_path(cfg_filename), cfg::get().log_filename);
 
-    logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+    logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
     lg->add_writer(std::make_shared<console_writer>());
     attach_logger(lg);
 }
 
 int main(int argc, char** argv) {
-    perf_counter pc;
+    utils::perf_counter pc;
 
     const size_t GB = 1 << 30;
 
@@ -68,7 +68,7 @@ int main(int argc, char** argv) {
     try {
         using namespace debruijn_graph;
 
-        string cfg_dir = path::parent_path(argv[1]);
+        string cfg_dir = fs::parent_path(argv[1]);
 
         vector<string> cfg_fns;
         for (int i = 1; i < argc; ++i) {
@@ -83,7 +83,7 @@ int main(int argc, char** argv) {
 
         // read configuration file (dataset path etc.)
 
-        limit_memory(cfg::get().max_memory * GB);
+        utils::limit_memory(cfg::get().max_memory * GB);
 
         // assemble it!
         INFO("Assembling dataset (" << cfg::get().dataset_file << ") with K=" << cfg::get().K);
diff --git a/src/projects/scaffold_correction/scaffold_correction.hpp b/src/projects/scaffold_correction/scaffold_correction.hpp
index 7f056aa..82e3432 100644
--- a/src/projects/scaffold_correction/scaffold_correction.hpp
+++ b/src/projects/scaffold_correction/scaffold_correction.hpp
@@ -6,6 +6,7 @@
 
 #pragma once
 #include "io/reads/osequencestream.hpp"
+#include "io/reads/single_read.hpp"
 #include "io/reads/file_reader.hpp"
 #include "pipeline/stage.hpp"
 #include "pipeline/graph_pack.hpp"
@@ -254,12 +255,11 @@ namespace spades {
         }
 
         void OutputResults(const vector<io::SingleRead> &results) {
-            io::osequencestream_simple oss(output_file_);
+            io::OutputSequenceStream oss(output_file_);
             for(size_t i = 0; i < results.size(); i++) {
                 string sequence = results[i].GetSequenceString();
-                if(sequence != "") {
-                    oss.set_header(results[i].name());
-                    oss << sequence;
+                if (sequence != "") {
+                    oss << io::SingleRead(results[i].name(), sequence);
                 }
             }
         }
diff --git a/src/projects/spades/CMakeLists.txt b/src/projects/spades/CMakeLists.txt
index e8f4743..90e2bcb 100644
--- a/src/projects/spades/CMakeLists.txt
+++ b/src/projects/spades/CMakeLists.txt
@@ -17,6 +17,7 @@ add_executable(spades main.cpp
             contig_output_stage.cpp
             hybrid_aligning.cpp
             chromosome_removal.cpp
+            series_analysis.cpp
             ../mts/contig_abundance.cpp)
 
 target_include_directories(spades PRIVATE ${EXT_DIR}/include/ConsensusCore)
diff --git a/src/projects/spades/chromosome_removal.cpp b/src/projects/spades/chromosome_removal.cpp
index fdedc68..40ff497 100644
--- a/src/projects/spades/chromosome_removal.cpp
+++ b/src/projects/spades/chromosome_removal.cpp
@@ -15,7 +15,7 @@
 
 namespace debruijn_graph {
 
-
+//TODO replace with standard methods
 void ChromosomeRemoval::CompressAll(Graph &g) {
     for (auto it = g.SmartVertexBegin(); ! it.IsEnd(); ++it) {
         if (g.IsDeadStart(*it) && g.IsDeadEnd(*it)) {
@@ -26,27 +26,6 @@ void ChromosomeRemoval::CompressAll(Graph &g) {
     }
 }
 
-void ChromosomeRemoval::DeleteAndCompress(EdgeId e, Graph &g){
-    auto start = g.EdgeStart(e);
-    auto end = g.EdgeEnd(e);
-    g.DeleteEdge(e);
-    bool is_cycle = (start == end || start == g.conjugate(end));
-    if (g.IsDeadStart(start) && g.IsDeadEnd(start)) {
-        g.DeleteVertex(start);
-    } else {
-        g.CompressVertex(start);
-    }
-    if (is_cycle) {
-        return;
-    }
-    if (g.IsDeadStart(end) && g.IsDeadEnd(end)) {
-        g.DeleteVertex(end);
-    } else {
-        g.CompressVertex(end);
-    }
-}
-
-
 size_t ChromosomeRemoval::CalculateComponentSize(EdgeId e, Graph &g_) {
     std::stack<EdgeId> next;
     size_t deadend_count = 0;
@@ -158,7 +137,7 @@ void ChromosomeRemoval::PlasmidSimplify(conj_graph_pack &gp, size_t long_edge_bo
 
 void ChromosomeRemoval::run(conj_graph_pack &gp, const char*) {
     //FIXME Seriously?! cfg::get().ds like hundred times...
-    OutputContigs(gp.g, cfg::get().output_dir + "before_chromosome_removal", false);
+    OutputEdgeSequences(gp.g, cfg::get().output_dir + "before_chromosome_removal");
     INFO("Before iteration " << 0 << ", " << gp.g.size() << " vertices in graph");
     double chromosome_coverage = RemoveLongGenomicEdges(gp, cfg::get().pd->long_edge_length, cfg::get().pd->relative_coverage );
     PlasmidSimplify(gp, cfg::get().pd->long_edge_length);
diff --git a/src/projects/spades/chromosome_removal.hpp b/src/projects/spades/chromosome_removal.hpp
index 77eb078..7752c34 100644
--- a/src/projects/spades/chromosome_removal.hpp
+++ b/src/projects/spades/chromosome_removal.hpp
@@ -31,6 +31,5 @@ private:
     void PlasmidSimplify(conj_graph_pack &gp, size_t long_edge_bound,
                                             std::function<void(typename Graph::EdgeId)> removal_handler = 0);
     void CompressAll(Graph &g);
-    void DeleteAndCompress(EdgeId e, Graph &g);
 };
 }
diff --git a/src/projects/spades/contig_output_stage.cpp b/src/projects/spades/contig_output_stage.cpp
index fd309e6..1ccbf85 100644
--- a/src/projects/spades/contig_output_stage.cpp
+++ b/src/projects/spades/contig_output_stage.cpp
@@ -5,23 +5,59 @@
 //* See file LICENSE for details.
 //***************************************************************************
 
+#include "modules/path_extend/pe_resolver.hpp"
 #include "contig_output_stage.hpp"
 #include "assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp"
 
 namespace debruijn_graph {
 
+vector<path_extend::PathsWriterT> CreatePathsWriters(const std::string &fn_base,
+                                                     path_extend::FastgWriter<Graph> &fastg_writer) {
+    using namespace path_extend;
+    vector<PathsWriterT> writers;
+
+    writers.push_back(ContigWriter::BasicFastaWriter(fn_base + ".fasta"));
+    INFO("Outputting FastG paths to " << fn_base << ".paths");
+    writers.push_back([=](const ScaffoldStorage& scaffold_storage) {
+        fastg_writer.WritePaths(scaffold_storage, fn_base + ".paths");
+    });
+    return writers;
+}
+
+template<class Graph>
+path_extend::EdgeNamingF<Graph> PlasmidNamingF(path_extend::EdgeNamingF<Graph> naming_f,
+                                  const ConnectedComponentCounter &cc_counter) {
+    return [=, &cc_counter](const Graph &g, EdgeId e) {
+        return io::AddComponentId(naming_f(g, e), cc_counter.GetComponent(e));
+    };
+}
+
 void ContigOutput::run(conj_graph_pack &gp, const char*) {
-    auto output_dir = cfg::get().output_dir + contig_name_prefix_;
+    using namespace path_extend;
+    auto output_dir = cfg::get().output_dir;
 
-    OutputContigs(gp.g, output_dir + "before_rr", false);
-    OutputContigsToFASTG(gp.g, output_dir + "assembly_graph", gp.components);
+    std::string gfa_fn = output_dir + "assembly_graph_with_scaffolds.gfa";
+    INFO("Writing GFA to " << gfa_fn);
 
-    if (output_paths_ && gp.contig_paths.size() != 0) {
-        DefaultContigCorrector<ConjugateDeBruijnGraph> corrector(gp.g);
-        DefaultContigConstructor<ConjugateDeBruijnGraph> constructor(gp.g, corrector);
+    std::ofstream os(gfa_fn);
+    GFAWriter<Graph> gfa_writer(gp.g, os,
+                                cfg::get().pd ? PlasmidNamingF<Graph>(IdNamingF<Graph>(), gp.components)
+                                              : IdNamingF<Graph>());
+    gfa_writer.WriteSegmentsAndLinks();
+
+    OutputEdgeSequences(gp.g, output_dir + "before_rr");
 
-        auto name_generator = path_extend::MakeContigNameGenerator(cfg::get().mode, gp);
-        path_extend::ContigWriter writer(gp.g, constructor, gp.components, name_generator);
+    INFO("Outputting FastG graph to " << output_dir << "assembly_graph.fastg");
+    std::string fastg_fn = output_dir + "assembly_graph.fastg";
+
+    FastgWriter<Graph> fastg_writer(gp.g,
+                                    cfg::get().pd ? PlasmidNamingF<Graph>(BasicNamingF<Graph>(), gp.components)
+                                                  : BasicNamingF<Graph>());
+    fastg_writer.WriteSegmentsAndLinks(fastg_fn);
+
+    if (output_paths_ && gp.contig_paths.size() != 0) {
+        auto name_generator = MakeContigNameGenerator(cfg::get().mode, gp);
+        ContigWriter writer(gp.g, name_generator);
 
         bool output_broken_scaffolds = cfg::get().pe_params.param_set.scaffolder_options.enabled &&
             cfg::get().use_scaffolder &&
@@ -37,18 +73,32 @@ void ContigOutput::run(conj_graph_pack &gp, const char*) {
                 WARN("Unsupported contig output mode");
             }
 
-            path_extend::ScaffoldBreaker breaker(min_gap);
-            path_extend::PathContainer broken_scaffolds;
+            ScaffoldBreaker breaker(min_gap);
+            PathContainer broken_scaffolds;
             breaker.Break(gp.contig_paths, broken_scaffolds);
-            writer.OutputPaths(broken_scaffolds, output_dir + cfg::get().co.contigs_name);
-        }
 
-        writer.OutputPaths(gp.contig_paths, output_dir + cfg::get().co.scaffolds_name);
+            //FIXME don't we want to use FinalizePaths here?
+            GraphCoverageMap cover_map(gp.g, broken_scaffolds, true);
+            Deduplicate(gp.g, broken_scaffolds, cover_map,
+                    /*min_edge_len*/0,
+                    /*max_path_diff*/0);
+            broken_scaffolds.FilterEmptyPaths();
+            broken_scaffolds.SortByLength();
+
+            writer.OutputPaths(broken_scaffolds,
+                               CreatePathsWriters(output_dir + contigs_name_,
+                                                  fastg_writer));
+        }
 
-        OutputContigsToGFA(gp.g, gp.contig_paths, output_dir + "assembly_graph");
+        auto writers = CreatePathsWriters(output_dir + cfg::get().co.scaffolds_name, fastg_writer);
+        writers.push_back([&](const ScaffoldStorage &storage) {
+            gfa_writer.WritePaths(storage);
+        });
+        writer.OutputPaths(gp.contig_paths, writers);
     } else {
-        OutputContigs(gp.g, output_dir + "simplified_contigs", cfg::get().use_unipaths);
-        OutputContigs(gp.g, output_dir + cfg::get().co.contigs_name, false);
+        //FIXME weird logic
+        OutputEdgeSequences(gp.g, output_dir + "simplified_contigs");
+        OutputEdgeSequences(gp.g, output_dir + contigs_name_);
     }
 }
 
diff --git a/src/projects/spades/contig_output_stage.hpp b/src/projects/spades/contig_output_stage.hpp
index de06d3d..93cce29 100644
--- a/src/projects/spades/contig_output_stage.hpp
+++ b/src/projects/spades/contig_output_stage.hpp
@@ -15,15 +15,16 @@ namespace debruijn_graph {
 class ContigOutput : public spades::AssemblyStage {
 private:
     bool output_paths_;
-    string contig_name_prefix_;
-
+    std::string contigs_name_;
 public:
-    ContigOutput(bool output_paths = true, bool preliminary = false, const string& contig_name_prefix = "")
-        : AssemblyStage("Contig Output", preliminary ? "preliminary_contig_output" : "contig_output"),
-          output_paths_(output_paths), contig_name_prefix_(contig_name_prefix) { }
+    ContigOutput(bool output_paths = true, std::string contigs_name = cfg::get().co.contigs_name)
+        : AssemblyStage("Contig Output", "contig_output"), output_paths_(output_paths), contigs_name_(contigs_name) { }
 
-    void run(conj_graph_pack &gp, const char *);
+    void load(conj_graph_pack &, const std::string &, const char *) { }
 
+    void save(const conj_graph_pack &, const std::string &, const char *) const { }
+
+    void run(conj_graph_pack &gp, const char *);
 };
 
-}
\ No newline at end of file
+}
diff --git a/src/projects/spades/distance_estimation.cpp b/src/projects/spades/distance_estimation.cpp
index 1950e85..80f869e 100644
--- a/src/projects/spades/distance_estimation.cpp
+++ b/src/projects/spades/distance_estimation.cpp
@@ -24,7 +24,7 @@ using namespace omnigraph::de;
 
 template<class Graph>
 void estimate_with_estimator(const Graph &graph,
-                             const omnigraph::de::AbstractDistanceEstimator<Graph>& estimator,
+                             const omnigraph::de::AbstractDistanceEstimator& estimator,
                              omnigraph::de::AbstractPairInfoChecker<Graph>& checker,
                              PairedIndexT& clustered_index) {
     using config::estimation_mode;
@@ -107,7 +107,7 @@ void estimate_distance(conj_graph_pack& gp,
     const config::debruijn_config& config = cfg::get();
     size_t delta = size_t(lib.data().insert_size_deviation);
     size_t linkage_distance = size_t(config.de.linkage_distance_coeff * lib.data().insert_size_deviation);
-    GraphDistanceFinder<Graph> dist_finder(gp.g,  (size_t)math::round(lib.data().mean_insert_size), lib.data().read_length, delta);
+    GraphDistanceFinder dist_finder(gp.g,  (size_t)math::round(lib.data().mean_insert_size), lib.data().read_length, delta);
     size_t max_distance = size_t(config.de.max_distance_coeff * lib.data().insert_size_deviation);
 
     std::function<double(int)> weight_function;
@@ -131,27 +131,27 @@ void estimate_distance(conj_graph_pack& gp,
 
     switch (config.est_mode) {
         case estimation_mode::simple: {
-            const AbstractDistanceEstimator<Graph>&
+            const AbstractDistanceEstimator&
                     estimator =
-                    DistanceEstimator<Graph>(gp.g, paired_index, dist_finder,
+                    DistanceEstimator(gp.g, paired_index, dist_finder,
                                              linkage_distance, max_distance);
 
             estimate_with_estimator<Graph>(gp.g, estimator, checker, clustered_index);
             break;
         }
         case estimation_mode::weighted: {
-            const AbstractDistanceEstimator<Graph>&
+            const AbstractDistanceEstimator&
                     estimator =
-                    WeightedDistanceEstimator<Graph>(gp.g, paired_index,
+                    WeightedDistanceEstimator(gp.g, paired_index,
                                                      dist_finder, weight_function, linkage_distance, max_distance);
 
             estimate_with_estimator<Graph>(gp.g, estimator, checker, clustered_index);
             break;
         }
         case estimation_mode::smoothing: {
-            const AbstractDistanceEstimator<Graph>&
+            const AbstractDistanceEstimator&
                     estimator =
-                    SmoothingDistanceEstimator<Graph>(gp.g, paired_index,
+                    SmoothingDistanceEstimator(gp.g, paired_index,
                                                       dist_finder, weight_function, linkage_distance, max_distance,
                                                       config.ade.threshold,
                                                       config.ade.range_coeff,
@@ -185,7 +185,7 @@ void estimate_distance(conj_graph_pack& gp,
         double is_var = lib.data().insert_size_deviation;
         size_t delta = size_t(is_var);
         size_t linkage_distance = size_t(cfg::get().de.linkage_distance_coeff * is_var);
-        GraphDistanceFinder<Graph> dist_finder(gp.g, (size_t) math::round(lib.data().mean_insert_size),
+        GraphDistanceFinder dist_finder(gp.g, (size_t) math::round(lib.data().mean_insert_size),
                                                lib.data().read_length, delta);
         size_t max_distance = size_t(cfg::get().de.max_distance_coeff_scaff * is_var);
         std::function<double(int)> weight_function;
@@ -205,8 +205,8 @@ void estimate_distance(conj_graph_pack& gp,
         PairInfoWeightChecker<Graph> checker(gp.g, 0.);
         DEBUG("Weight Filter Done");
 
-        const AbstractDistanceEstimator<Graph>& estimator =
-                SmoothingDistanceEstimator<Graph>(gp.g, paired_index, dist_finder,
+        const AbstractDistanceEstimator& estimator =
+                SmoothingDistanceEstimator(gp.g, paired_index, dist_finder,
                                                   weight_function, linkage_distance, max_distance,
                                                   cfg::get().ade.threshold, cfg::get().ade.range_coeff,
                                                   cfg::get().ade.delta_coeff, cfg::get().ade.cutoff,
diff --git a/src/projects/spades/gap_closer.cpp b/src/projects/spades/gap_closer.cpp
index 4a17509..7e980a1 100644
--- a/src/projects/spades/gap_closer.cpp
+++ b/src/projects/spades/gap_closer.cpp
@@ -173,7 +173,6 @@ class GapCloser {
     omnigraph::de::PairedInfoIndexT<Graph> &tips_paired_idx_;
     const size_t min_intersection_;
     const size_t hamming_dist_bound_;
-    const int init_gap_val_;
     const omnigraph::de::DEWeight weight_threshold_;
 
     std::vector<size_t> DiffPos(const Sequence &s1, const Sequence &s2) const {
@@ -390,7 +389,6 @@ public:
               tips_paired_idx_(tips_paired_idx),
               min_intersection_(min_intersection),
               hamming_dist_bound_(hamming_dist_bound),
-              init_gap_val_(-10),
               weight_threshold_(weight_threshold)  {
         VERIFY(min_intersection_ < g_.k());
         DEBUG("weight_threshold=" << weight_threshold_);
diff --git a/src/projects/spades/gap_closing.hpp b/src/projects/spades/gap_closing.hpp
index 182f055..0a4d85d 100644
--- a/src/projects/spades/gap_closing.hpp
+++ b/src/projects/spades/gap_closing.hpp
@@ -13,52 +13,42 @@ typedef omnigraph::GapDescription<Graph> GapDescription;
 class GapJoiner {
     Graph& g_;
     omnigraph::EdgeRemover<Graph> edge_remover_;
-    bool add_flanks_;
 
-    EdgeId ClipEnd(EdgeId e, size_t pos) {
-        VERIFY(pos > 0);
+    EdgeId ClipEnd(EdgeId e, size_t to_trim) {
+        VERIFY(to_trim < g_.length(e));
         VERIFY(omnigraph::TerminalVertexCondition<Graph>(g_).Check(g_.EdgeEnd(e)));
         VERIFY(e != g_.conjugate(e));
-        if (pos == g_.length(e)) {
+        if (to_trim == 0) {
             return e;
         } else {
-            auto split_res = g_.SplitEdge(e, pos);
+            auto split_res = g_.SplitEdge(e, g_.length(e) - to_trim);
             edge_remover_.DeleteEdge(split_res.second);
             return split_res.first;
         }
     }
 
-    EdgeId ClipStart(EdgeId e, size_t pos) {
-        return g_.conjugate(ClipEnd(g_.conjugate(e), g_.length(e) - pos));
+    EdgeId ClipStart(EdgeId e, size_t to_trim) {
+        return g_.conjugate(ClipEnd(g_.conjugate(e), to_trim));
     }
 
-    EdgeId AddEdge(VertexId v1, VertexId v2, const Sequence& gap_seq) {
-        if (!add_flanks_) {
-            VERIFY_MSG(g_.VertexNucls(v1) == gap_seq.Subseq(0, g_.k()), 
-                       g_.VertexNucls(v1) << " not equal " << gap_seq.Subseq(0, g_.k()));
-            VERIFY_MSG(g_.VertexNucls(v2) == gap_seq.Subseq(gap_seq.size() - g_.k()),
-                       g_.VertexNucls(v2) << " not equal " << gap_seq.Subseq(gap_seq.size() - g_.k()));
-            return g_.AddEdge(v1, v2, gap_seq);
-        } else {
-            DEBUG("Adding gap seq " << gap_seq);
-            DEBUG("Between vertices " << g_.VertexNucls(v1) << " and " << g_.VertexNucls(v2));
-            return g_.AddEdge(v1, v2, g_.VertexNucls(v1) + gap_seq + g_.VertexNucls(v2));
-        }
+    EdgeId AddEdge(VertexId v1, VertexId v2, const Sequence &gap_seq) {
+        DEBUG("Adding gap seq " << gap_seq);
+        DEBUG("Between vertices " << g_.VertexNucls(v1) << " and " << g_.VertexNucls(v2));
+        return g_.AddEdge(v1, v2, g_.VertexNucls(v1) + gap_seq + g_.VertexNucls(v2));
     }
 
 public:
-    GapJoiner(Graph& g, bool add_flanks = false) :
+    GapJoiner(Graph& g) :
             g_(g),
-            edge_remover_(g),
-            add_flanks_(add_flanks) {
+            edge_remover_(g) {
     }
 
     EdgeId operator() (const GapDescription& gap, bool compress = true) {
-        VERIFY(gap.start != gap.end && gap.start != g_.conjugate(gap.end));
+        VERIFY(gap.left() != gap.right() && gap.left() != g_.conjugate(gap.right()));
         DEBUG("Processing gap " << gap.str(g_));
-        EdgeId start = ClipEnd(gap.start, gap.edge_gap_start_position);
-        EdgeId end = ClipStart(gap.end, gap.edge_gap_end_position);
-        EdgeId new_edge = AddEdge(g_.EdgeEnd(start), g_.EdgeStart(end), gap.gap_seq);
+        EdgeId start = ClipEnd(gap.left(), gap.left_trim());
+        EdgeId end = ClipStart(gap.right(), gap.right_trim());
+        EdgeId new_edge = AddEdge(g_.EdgeEnd(start), g_.EdgeStart(end), gap.filling_seq());
 
         if (compress) {
             return omnigraph::Compressor<Graph>(g_).CompressVertexEdgeId(g_.EdgeStart(new_edge));
diff --git a/src/projects/spades/hybrid_aligning.cpp b/src/projects/spades/hybrid_aligning.cpp
index ffdd915..6939ec6 100644
--- a/src/projects/spades/hybrid_aligning.cpp
+++ b/src/projects/spades/hybrid_aligning.cpp
@@ -86,7 +86,7 @@ class GapTrackingListener : public SequenceMapperListener {
                 DEBUG("Gap info successfully created");
                 return GapDescription(left, right,
                                       *gap_seq,
-                                      left_offset,
+                                      g_.length(left) - left_offset,
                                       right_offset);
             } else {
                 DEBUG("Something wrong with read subsequence");
@@ -416,7 +416,7 @@ void HybridLibrariesAligning::run(conj_graph_pack& gp, const char*) {
                 //FIXME make const
                 auto& reads = cfg::get_writable().ds.reads[lib_id];
 
-                SequenceMapperNotifier notifier(gp);
+                SequenceMapperNotifier notifier(gp, cfg::get_writable().ds.reads.lib_count());
                 //FIXME pretty awful, would be much better if listeners were shared ptrs
                 LongReadMapper read_mapper(gp.g, gp.single_long_reads[lib_id],
                                            ChooseProperReadPathExtractor(gp.g, reads.type()));
diff --git a/src/projects/spades/hybrid_gap_closer.hpp b/src/projects/spades/hybrid_gap_closer.hpp
index 0443715..d0cd88a 100644
--- a/src/projects/spades/hybrid_gap_closer.hpp
+++ b/src/projects/spades/hybrid_gap_closer.hpp
@@ -59,7 +59,7 @@ private:
     DECL_LOGGER("GapStorage");
 
     void HiddenAddGap(const GapDescription& p) {
-        inner_index_[p.start].push_back(p);
+        inner_index_[p.left()].push_back(p);
     }
 
     size_t FillIndex() {
@@ -87,8 +87,8 @@ private:
             auto copy_dest = gaps.begin();
             for (const info_it_pair& ep_gaps : ep_ranges) {
                 if (filter_f(ep_gaps.first, ep_gaps.second)) {
-                    DEBUG("Erasing candidates between " << g_.int_id(ep_gaps.first->start) << " and "
-                                                        << g_.int_id(ep_gaps.first->end));
+                    DEBUG("Erasing candidates between " << g_.int_id(ep_gaps.first->left()) << " and "
+                                                        << g_.int_id(ep_gaps.first->right()));
                 } else {
                     if (copy_dest == const_iterator_cast(gaps, ep_gaps.first)) {
                         copy_dest = const_iterator_cast(gaps, ep_gaps.second);
@@ -108,7 +108,7 @@ private:
 
     void FilterByEdgePair(const EdgePairPred &filter_f) {
         FilterByCandidates([=](gap_info_it info_start, gap_info_it /*info_end*/) {
-            return filter_f(EdgePair(info_start->start, info_start->end));
+            return filter_f(EdgePair(info_start->left(), info_start->right()));
         });
     }
 
@@ -128,7 +128,7 @@ private:
     vector<EdgeId> SecondEdges(const GapInfos& edge_gaps) const {
         vector<EdgeId> jump_edges;
         for (auto it_pair : EdgePairGaps(edge_gaps)) {
-            jump_edges.push_back(it_pair.first->end);
+            jump_edges.push_back(it_pair.first->right());
         }
         return jump_edges;
     };
@@ -196,8 +196,8 @@ private:
     void FilterIndex(size_t min_weight, size_t max_flank) {
         DEBUG("Filtering by maximal allowed flanking length " << max_flank);
         FilterByDescription([=](const GapDescription &gap) {
-            return gap.edge_gap_start_position + max_flank < g_.length(gap.start)
-                   || gap.edge_gap_end_position > max_flank;
+            return gap.left_trim() > max_flank
+                   || gap.right_trim() > max_flank;
         });
 
         DEBUG("Filtering by weight " << min_weight);
@@ -243,7 +243,7 @@ public:
     }
 
     void AddGap(const GapDescription& p) {
-        if (IsCanonical(g_, p.start, p.end)) {
+        if (IsCanonical(g_, p.left(), p.right())) {
             HiddenAddGap(p);
         } else {
             HiddenAddGap(p.conjugate(g_));
@@ -277,38 +277,12 @@ public:
         }
     }
 
-//    void LoadFromFile(const string s) {
-//        FILE* file = fopen((s).c_str(), "r");
-//        int res;
-//        char ss[5000];
-//        map<int, EdgeId> tmp_map;
-//        for (auto iter = g.ConstEdgeBegin(); !iter.IsEnd(); ++iter) {
-//            tmp_map[g.int_id(*iter)] = *iter;
-//        }
-//        while (!feof(file)) {
-//            int first_id, second_id, first_ind, second_ind;
-//            int size;
-//            res = fscanf(file, "%d %d\n", &first_id, &size);
-//            VERIFY(res == 2);
-//            for (int i = 0; i < size; i++) {
-//                res = fscanf(file, "%d %d\n", &first_id, &first_ind);
-//                VERIFY(res == 2);
-//                res = fscanf(file, "%d %d\n", &second_id, &second_ind);
-//                VERIFY(res == 2);
-//                res = fscanf(file, "%s\n", ss);
-//                VERIFY(res == 1);
-//                GapDescription<Graph> gap(tmp_map[first_id], tmp_map[second_id], Sequence(ss), first_ind, second_ind);
-//                this->AddGap(gap);
-//            }
-//        }
-//    }
-
     //edge_gaps must be sorted
     vector<info_it_pair> EdgePairGaps(const GapInfos& edge_gaps) const {
         vector<info_it_pair> answer;
         auto ep_start = edge_gaps.begin();
         for (auto it = ep_start; it != edge_gaps.end(); ++it) {
-            if (it->end != ep_start->end) {
+            if (it->right() != ep_start->right()) {
                 answer.push_back({ep_start, it});
                 ep_start = it;
             }
@@ -414,7 +388,8 @@ class MultiGapJoiner {
     bool CheckGapsValidity(const vector<GapDescription>& gaps) const {
         vector<GapDescription> answer;
         return std::all_of(gaps.begin(), gaps.end(), [&](const GapDescription &gap) {
-            return IsCanonical(g_, gap.start, gap.end) && gap.start != gap.end && gap.start != g_.conjugate(gap.end);
+            return IsCanonical(g_, gap.left(), gap.right()) &&
+                    gap.left() != gap.right() && gap.left() != g_.conjugate(gap.right());
         });
     }
 
@@ -431,7 +406,7 @@ class MultiGapJoiner {
 
     vector<EdgeId> EdgesNeedingSplit(const SplitInfo& left_split_info, const SplitInfo& right_split_info) const {
         vector<EdgeId> answer;
-        for (EdgeId e : key_set(left_split_info))
+        for (EdgeId e : utils::key_set(left_split_info))
             if (right_split_info.count(e))
                 answer.push_back(e);
         return answer;
@@ -445,63 +420,63 @@ class MultiGapJoiner {
         return (left_split + right_split) / 2;
     }
 
-    bool Update(EdgeId& e, size_t& gap_pos, EdgePair split_orig_ep, EdgePair split_res, bool gap_start) const {
+    bool UpdateLeft(GapDescription &gap, EdgePair split_orig_ep, EdgePair split_res) const {
+        EdgeId e = gap.left();
+
         EdgeId split_orig = split_orig_ep.first;
         if (e == split_orig_ep.second) {
             split_orig = split_orig_ep.second;
             split_res = Conjugate(g_, split_res);
         }
+
         if (e == split_orig) {
-            if (gap_start) {
-                e = split_res.second;
-                gap_pos = gap_pos - g_.length(split_res.first);
-            } else {
-                e = split_res.first;
-            }
+            VERIFY(gap.left_trim() < g_.length(split_res.second));
+            gap.set_left(split_res.second);
             return true;
         }
+
         return false;
     }
 
-    void UpdateGap(GapDescription& gap, EdgePair split_orig, EdgePair split_res) const {
-        bool u1 = Update(gap.start, gap.edge_gap_start_position, split_orig, split_res, true);
-        bool u2 = Update(gap.end, gap.edge_gap_end_position, split_orig, split_res, false);
-        VERIFY(u1 != u2);
-    }
+    bool UpdateRight(GapDescription &gap, EdgePair split_orig_ep, EdgePair split_res) const {
+        EdgeId e = gap.right();
 
-    bool CheckInsert(EdgeId e, set<EdgeId>& used_edges) const {
-        return used_edges.insert(e).second;
-    }
+        EdgeId split_orig = split_orig_ep.first;
+        if (e == split_orig_ep.second) {
+            split_orig = split_orig_ep.second;
+            split_res = Conjugate(g_, split_res);
+        }
 
-    bool CheckInsert(const vector<EdgeId> edges, set<EdgeId>& used_edges) const {
-        for (EdgeId e : edges) {
-            if (!CheckInsert(e, used_edges)) {
-                return false;
-            }
+        if (e == split_orig) {
+            VERIFY(gap.right_trim() < g_.length(split_res.first));
+            gap.set_right(split_res.first);
+            return true;
         }
-        return true;
+
+        return false;
+    }
+
+    void UpdateGap(GapDescription& gap, EdgePair split_orig, EdgePair split_res) const {
+        bool u1 = UpdateLeft(gap, split_orig, split_res);
+        bool u2 = UpdateRight(gap, split_orig, split_res);
+        VERIFY(u1 != u2);
     }
 
     std::set<EdgeId> RelevantEdges(const GapDescription& gap) const {
         std::set<EdgeId> answer;
-        answer.insert(gap.start);
-        answer.insert(g_.conjugate(gap.start));
-        answer.insert(gap.end);
-        answer.insert(g_.conjugate(gap.end));
+        answer.insert(gap.left());
+        answer.insert(g_.conjugate(gap.left()));
+        answer.insert(gap.right());
+        answer.insert(g_.conjugate(gap.right()));
         return answer;
     }
 
     bool CheckGaps(const vector<GapDescription>& gaps) const {
         set<EdgeId> used_edges;
-        for (const auto& gap : gaps) {
-            const auto relevant = RelevantEdges(gap);
-            //TODO check the semantics of all_of
-            if (!std::all_of(relevant.begin(), relevant.end(), [&](const EdgeId& e) {
-                return used_edges.insert(e).second;
-            })) {
-                return false;
-            }
-        }
+        for (const auto& gap : gaps)
+            for (EdgeId e : RelevantEdges(gap))
+                if (!used_edges.insert(e).second)
+                    return false;
         return true;
     }
 
@@ -511,8 +486,8 @@ class MultiGapJoiner {
         for (size_t i = 0; i < canonical_gaps.size(); ++i) {
             const auto& gap = canonical_gaps[i];
             DEBUG("Processing gap " << gap.str(g_));
-            Add(i, gap.start, gap.edge_gap_start_position, right_split_pos, left_split_pos);
-            Add(i, gap.end, gap.edge_gap_end_position, left_split_pos, right_split_pos);
+            Add(i, gap.left(), g_.length(gap.left()) - gap.left_trim(), right_split_pos, left_split_pos);
+            Add(i, gap.right(), gap.right_trim(), left_split_pos, right_split_pos);
         }
 
         set<size_t> to_ignore;
@@ -545,7 +520,7 @@ class MultiGapJoiner {
     };
 
 public:
-    MultiGapJoiner(Graph& g) : g_(g), inner_joiner_(g, true) {
+    MultiGapJoiner(Graph& g) : g_(g), inner_joiner_(g) {
     }
 
     //Resulting graph should be condensed
@@ -587,39 +562,39 @@ private:
         return ss.str();
     }
 
-    GapDescription ConstructConsensus(EdgeId start,
-                                      EdgeId end,
-                                      size_t edge_gap_start_position,
-                                      size_t edge_gap_end_position,
+    GapDescription ConstructConsensus(EdgeId left,
+                                      EdgeId right,
+                                      size_t left_trim,
+                                      size_t right_trim,
                                       const vector<string>& gap_variants) const {
         DEBUG(gap_variants.size() << " gap closing variants, lengths: " << PrintLengths(gap_variants));
         DEBUG("var size original " << gap_variants.size());
         vector<string> new_gap_variants(gap_variants.begin(), gap_variants.end());
         new_gap_variants.resize(std::min(max_consensus_reads_, gap_variants.size()));
         auto s = consensus_(new_gap_variants);
-        DEBUG("consenus for " << g_.int_id(start)
-                              << " and " << g_.int_id(end)
+        DEBUG("consenus for " << g_.int_id(left)
+                              << " and " << g_.int_id(right)
                               << " found: '" << s << "'");
-        return GapDescription(start, end,
+        return GapDescription(left, right,
                               Sequence(s),
-                              edge_gap_start_position, edge_gap_end_position);
+                              left_trim, right_trim);
     }
 
     //all gaps guaranteed to correspond to a single edge pair
     GapInfos PadGaps(gap_info_it start, gap_info_it end) const {
-        size_t start_min = std::numeric_limits<size_t>::max();
-        size_t end_max = 0;
+        size_t start_trim = 0;
+        size_t end_trim = 0;
         size_t long_seqs = 0;
         size_t short_seqs = 0;
         for (auto it = start; it != end; ++it) {
             const auto& gap = *it;
-            if (gap.gap_seq.size() > long_seq_limit_)
+            if (gap.filling_seq().size() > long_seq_limit_)
                 long_seqs++;
             else
                 short_seqs++;
 
-            start_min = std::min(start_min, gap.edge_gap_start_position);
-            end_max = std::max(end_max, gap.edge_gap_end_position);
+            start_trim = std::max(start_trim, gap.left_trim());
+            end_trim = std::max(end_trim, gap.right_trim());
         }
 
         const bool exclude_long_seqs = (short_seqs >= min_weight_ && short_seqs > long_seqs);
@@ -628,19 +603,20 @@ private:
         for (auto it = start; it != end; ++it) {
             const auto& gap = *it;
 
-            if (exclude_long_seqs && gap.gap_seq.size() > long_seq_limit_)
+            if (exclude_long_seqs && gap.filling_seq().size() > long_seq_limit_)
                 continue;
 
-            string s = g_.EdgeNucls(gap.start).Subseq(start_min + g_.k(), gap.edge_gap_start_position + g_.k()).str();
-            s += gap.gap_seq.str();
-            s += g_.EdgeNucls(gap.end).Subseq(gap.edge_gap_end_position, end_max).str();
-            answer.push_back(GapDescription(gap.start, gap.end, Sequence(s), start_min, end_max));
+            size_t start_nucl_size = g_.length(gap.left()) + g_.k();
+            string s = g_.EdgeNucls(gap.left()).Subseq(start_nucl_size - start_trim, start_nucl_size - gap.left_trim()).str();
+            s += gap.filling_seq().str();
+            s += g_.EdgeNucls(gap.right()).Subseq(gap.right_trim(), end_trim).str();
+            answer.push_back(GapDescription(gap.left(), gap.right(), Sequence(s), start_trim, end_trim));
         }
         return answer;
     }
 
     GapDescription ConstructConsensus(gap_info_it start_it, gap_info_it end_it) const {
-        DEBUG("Considering extension " << g_.str(start_it->end));
+        DEBUG("Considering extension " << g_.str(start_it->right()));
         size_t cur_len = end_it - start_it;
 
         //low weight connections filtered earlier
@@ -656,7 +632,7 @@ private:
         vector<string> gap_variants;
         std::transform(padded_gaps.begin(), padded_gaps.end(), std::back_inserter(gap_variants), 
                        [](const GapDescription& gap) {
-            return gap.gap_seq.str();
+            return gap.filling_seq().str();
         });
 
         //for (auto it = start_it; it != end_it; ++it) {
@@ -667,16 +643,16 @@ private:
         //}
         auto padded_gap = padded_gaps.front();
 
-        return ConstructConsensus(padded_gap.start, padded_gap.end,
-                                  padded_gap.edge_gap_start_position,
-                                  padded_gap.edge_gap_end_position,
+        return ConstructConsensus(padded_gap.left(), padded_gap.right(),
+                                  padded_gap.left_trim(),
+                                  padded_gap.right_trim(),
                                   gap_variants);
     }
 
     GapDescription ConstructConsensus(EdgeId e) const {
         DEBUG("Constructing consensus for edge " << g_.str(e));
         vector<GapDescription> closures;
-        for (const auto& edge_pair_gaps : storage_.EdgePairGaps(get(storage_.inner_index(), e))) {
+        for (const auto& edge_pair_gaps : storage_.EdgePairGaps(utils::get(storage_.inner_index(), e))) {
             auto consensus = ConstructConsensus(edge_pair_gaps.first, edge_pair_gaps.second);
             if (consensus != INVALID_GAP) {
                 closures.push_back(consensus);
@@ -733,7 +709,7 @@ public:
 
         gap_joiner(ConstructConsensus());
 
-        CompressAllVertices(g_, true, /*chunk_cnt*/100);
+        CompressAllVertices(g_, /*chunk_cnt*/100);
         return fate_tracker.Old2NewMapping();
     };
 
diff --git a/src/projects/spades/launch.hpp b/src/projects/spades/launch.hpp
index 42f3bf6..81b7ca2 100644
--- a/src/projects/spades/launch.hpp
+++ b/src/projects/spades/launch.hpp
@@ -96,13 +96,12 @@ void assemble_genome() {
         SPAdes.add(new debruijn_graph::MismatchCorrection());
     if (cfg::get().rr_enable) {
         if (two_step_rr) {
-            string prelim_prefix = "preliminary_";
             if (cfg::get().use_intermediate_contigs)
                 SPAdes.add(new debruijn_graph::PairInfoCount(true))
                       .add(new debruijn_graph::DistanceEstimation(true))
                       .add(new debruijn_graph::RepeatResolution(true))
-                      .add(new debruijn_graph::ContigOutput(true, true, prelim_prefix))
-                      .add(new debruijn_graph::SecondPhaseSetup(prelim_prefix));
+                      .add(new debruijn_graph::ContigOutput())
+                      .add(new debruijn_graph::SecondPhaseSetup());
 
             SPAdes.add(new debruijn_graph::Simplification());
         }
@@ -120,10 +119,12 @@ void assemble_genome() {
 
         //No graph modification allowed after HybridLibrariesAligning stage!
 
-        SPAdes.add(new debruijn_graph::ContigOutput(false, false, "pre_pe_"))
+        SPAdes.add(new debruijn_graph::ContigOutput(false, "intermediate_contigs"))
               .add(new debruijn_graph::PairInfoCount())
               .add(new debruijn_graph::DistanceEstimation())
               .add(new debruijn_graph::RepeatResolution());
+    } else {
+        SPAdes.add(new debruijn_graph::ContigOutput(false));
     }
 
     SPAdes.add(new debruijn_graph::ContigOutput());
@@ -131,7 +132,7 @@ void assemble_genome() {
     SPAdes.run(conj_gp, cfg::get().entry_point.c_str());
 
     // For informing spades.py about estimated params
-    debruijn_graph::config::write_lib_data(path::append_path(cfg::get().output_dir, "final"));
+    debruijn_graph::config::write_lib_data(fs::append_path(cfg::get().output_dir, "final"));
 
     INFO("SPAdes finished");
 }
diff --git a/src/projects/spades/main.cpp b/src/projects/spades/main.cpp
index e162e2e..8a886bc 100644
--- a/src/projects/spades/main.cpp
+++ b/src/projects/spades/main.cpp
@@ -10,15 +10,15 @@
  */
 #include "utils/logger/log_writers.hpp"
 
-#include "utils/memory_limit.hpp"
+#include "utils/perf/memory_limit.hpp"
 #include "utils/segfault_handler.hpp"
 #include "launch.hpp"
-#include "utils/copy_file.hpp"
+#include "utils/filesystem/copy_file.hpp"
 #include "version.hpp"
 
 void load_config(const vector<string>& cfg_fns) {
     for (const auto& s : cfg_fns) {
-        path::CheckFileExistenceFATAL(s);
+        fs::CheckFileExistenceFATAL(s);
     }
 
     cfg::create_instance(cfg_fns);
@@ -41,16 +41,17 @@ void create_console_logger(const string& dir) {
 
     string log_props_file = cfg::get().log_filename;
 
-    if (!path::FileExists(log_props_file))
-        log_props_file = path::append_path(dir, cfg::get().log_filename);
+    if (!fs::FileExists(log_props_file))
+        log_props_file = fs::append_path(dir, cfg::get().log_filename);
 
-    logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+    logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
     lg->add_writer(std::make_shared<console_writer>());
+    //lg->add_writer(std::make_shared<mutex_writer>(std::make_shared<console_writer>()));
     attach_logger(lg);
 }
 
 int main(int argc, char **argv) {
-    perf_counter pc;
+    utils::perf_counter pc;
 
     const size_t GB = 1 << 30;
 
@@ -60,7 +61,7 @@ int main(int argc, char **argv) {
     try {
         using namespace debruijn_graph;
 
-        string cfg_dir = path::parent_path(argv[1]);
+        string cfg_dir = fs::parent_path(argv[1]);
 
         vector<string> cfg_fns;
         for (int i = 1; i < argc; ++i) {
@@ -79,7 +80,7 @@ int main(int argc, char **argv) {
 
         // read configuration file (dataset path etc.)
 
-        limit_memory(cfg::get().max_memory * GB);
+        utils::limit_memory(cfg::get().max_memory * GB);
 
         // assemble it!
         INFO("Starting SPAdes, built from "
diff --git a/src/projects/spades/pair_info_count.cpp b/src/projects/spades/pair_info_count.cpp
index 30edba3..285be68 100644
--- a/src/projects/spades/pair_info_count.cpp
+++ b/src/projects/spades/pair_info_count.cpp
@@ -14,6 +14,8 @@
 #include "modules/alignment/bwa_sequence_mapper.hpp"
 #include "paired_info/pair_info_filler.hpp"
 #include "modules/path_extend/split_graph_pair_info.hpp"
+#include "modules/alignment/rna/ss_coverage_filler.hpp"
+
 
 #include "adt/bf.hpp"
 #include "adt/hll.hpp"
@@ -110,6 +112,64 @@ class EdgePairCounterFiller : public SequenceMapperListener {
     EdgePairCounter counter_;
 };
 
+static bool HasGoodRRLibs() {
+    for (const auto &lib : cfg::get().ds.reads) {
+        if (lib.is_contig_lib())
+            continue;
+
+        if (lib.is_paired() &&
+            lib.data().mean_insert_size == 0.0)
+            continue;
+
+        if (lib.is_repeat_resolvable())
+            return true;
+    }
+
+    return false;
+}
+
+static bool HasOnlyMP() {
+    for (const auto &lib : cfg::get().ds.reads) {
+        if (lib.type() == io::LibraryType::PathExtendContigs)
+            continue;
+
+        if (lib.type() != io::LibraryType::MatePairs &&
+            lib.type() != io::LibraryType::HQMatePairs)
+            return false;
+    }
+
+    return true;
+}
+
+static bool ShouldObtainLibCoverage() {
+    return cfg::get().calculate_coverage_for_each_lib;
+}
+
+//todo improve logic
+static bool ShouldObtainSingleReadsPaths(size_t ilib) {
+    using config::single_read_resolving_mode;
+    switch (cfg::get().single_reads_rr) {
+        case single_read_resolving_mode::all:
+            return true;
+        case single_read_resolving_mode::only_single_libs:
+            //Map when no PacBio/paried libs or only mate-pairs or single lib itself
+            if (!HasGoodRRLibs() || HasOnlyMP() ||
+                cfg::get().ds.reads[ilib].type() == io::LibraryType::SingleReads) {
+                if (cfg::get().mode != debruijn_graph::config::pipeline_type::meta) {
+                    return true;
+                } else {
+                    WARN("Single reads are not used in metagenomic mode");
+                }
+            }
+            break;
+        case single_read_resolving_mode::none:
+            break;
+        default:
+            VERIFY_MSG(false, "Invalid mode value");
+    }
+    return false;
+}
+
 static bool CollectLibInformation(const conj_graph_pack &gp,
                                   size_t &edgepairs,
                                   size_t ilib, size_t edge_length_threshold) {
@@ -117,7 +177,7 @@ static bool CollectLibInformation(const conj_graph_pack &gp,
     InsertSizeCounter hist_counter(gp, edge_length_threshold);
     EdgePairCounterFiller pcounter(cfg::get().max_threads);
 
-    SequenceMapperNotifier notifier(gp);
+    SequenceMapperNotifier notifier(gp, cfg::get_writable().ds.reads.lib_count());
     notifier.Subscribe(ilib, &hist_counter);
     notifier.Subscribe(ilib, &pcounter);
 
@@ -157,19 +217,30 @@ static bool CollectLibInformation(const conj_graph_pack &gp,
 }
 
 // FIXME: This needs to be static
-void ProcessSingleReads(conj_graph_pack &gp,
+static void ProcessSingleReads(conj_graph_pack &gp,
                         size_t ilib,
                         bool use_binary = true,
                         bool map_paired = false) {
     //FIXME make const
     auto& reads = cfg::get_writable().ds.reads[ilib];
 
-    SequenceMapperNotifier notifier(gp);
-    //FIXME pretty awful, would be much better if listeners were shared ptrs
+    SequenceMapperNotifier notifier(gp, cfg::get_writable().ds.reads.lib_count());
+
     LongReadMapper read_mapper(gp.g, gp.single_long_reads[ilib],
                                ChooseProperReadPathExtractor(gp.g, reads.type()));
 
-    notifier.Subscribe(ilib, &read_mapper);
+    if (ShouldObtainSingleReadsPaths(ilib) || reads.is_contig_lib()) {
+        //FIXME pretty awful, would be much better if listeners were shared ptrs
+        notifier.Subscribe(ilib, &read_mapper);
+        cfg::get_writable().ds.reads[ilib].data().single_reads_mapped = true;
+    }
+
+    SSCoverageFiller ss_coverage_filler(gp.g, gp.ss_coverage[ilib], !cfg::get().ss.ss_enabled);
+    if (cfg::get().calculate_coverage_for_each_lib) {
+        INFO("Will calculate lib coverage as well");
+        map_paired = true;
+        notifier.Subscribe(ilib, &ss_coverage_filler);
+    }
 
     auto mapper_ptr = ChooseProperMapper(gp, reads, cfg::get().bwa.bwa_enable);
     if (use_binary) {
@@ -180,9 +251,9 @@ void ProcessSingleReads(conj_graph_pack &gp,
                                                   map_paired, /*handle Ns*/false);
         notifier.ProcessLibrary(single_streams, ilib, *mapper_ptr);
     }
-    cfg::get_writable().ds.reads[ilib].data().single_reads_mapped = true;
 }
 
+
 static void ProcessPairedReads(conj_graph_pack &gp,
                                std::unique_ptr<PairedInfoFilter> filter, unsigned filter_threshold,
                                size_t ilib) {
@@ -197,7 +268,7 @@ static void ProcessPairedReads(conj_graph_pack &gp,
         round_thr = unsigned(std::min(cfg::get().de.max_distance_coeff * data.insert_size_deviation * cfg::get().de.rounding_coeff,
                                       cfg::get().de.rounding_thr));
 
-    SequenceMapperNotifier notifier(gp);
+    SequenceMapperNotifier notifier(gp, cfg::get_writable().ds.reads.lib_count());
     INFO("Left insert size quantile " << data.insert_size_left_quantile <<
          ", right insert size quantile " << data.insert_size_right_quantile <<
          ", filtering threshold " << filter_threshold <<
@@ -237,60 +308,6 @@ static void ProcessPairedReads(conj_graph_pack &gp,
     cfg::get_writable().ds.reads[ilib].data().pi_threshold = split_graph.GetThreshold();
 }
 
-static bool HasGoodRRLibs() {
-    for (const auto &lib : cfg::get().ds.reads) {
-        if (lib.is_contig_lib())
-            continue;
-
-        if (lib.is_paired() &&
-            lib.data().mean_insert_size == 0.0)
-            continue;
-
-        if (lib.is_repeat_resolvable())
-            return true;
-    }
-
-    return false;
-}
-
-static bool HasOnlyMP() {
-    for (const auto &lib : cfg::get().ds.reads) {
-        if (lib.type() == io::LibraryType::PathExtendContigs)
-            continue;
-
-        if (lib.type() != io::LibraryType::MatePairs &&
-            lib.type() != io::LibraryType::HQMatePairs)
-            return false;
-    }
-
-    return true;
-}
-
-//todo improve logic
-static bool ShouldMapSingleReads(size_t ilib) {
-    using config::single_read_resolving_mode;
-    switch (cfg::get().single_reads_rr) {
-        case single_read_resolving_mode::all:
-            return true;
-        case single_read_resolving_mode::only_single_libs:
-            //Map when no PacBio/paried libs or only mate-pairs or single lib itself
-            if (!HasGoodRRLibs() || HasOnlyMP() ||
-                cfg::get().ds.reads[ilib].type() == io::LibraryType::SingleReads) {
-                if (cfg::get().mode != debruijn_graph::config::pipeline_type::meta) {
-                    return true;
-                } else {
-                    WARN("Single reads are not used in metagenomic mode");
-                }
-            }
-            break;
-        case single_read_resolving_mode::none:
-            break;
-        default:
-            VERIFY_MSG(false, "Invalid mode value");
-    }
-    return false;
-}
-
 void PairInfoCount::run(conj_graph_pack &gp, const char *) {
     gp.InitRRIndices();
     gp.EnsureBasicMapping();
@@ -350,7 +367,7 @@ void PairInfoCount::run(conj_graph_pack &gp, const char *) {
 
                     INFO("Filtering data for library #" << i);
                     {
-                        SequenceMapperNotifier notifier(gp);
+                        SequenceMapperNotifier notifier(gp, cfg::get_writable().ds.reads.lib_count());
                         DEFilter filter_counter(*filter, gp.g);
                         notifier.Subscribe(i, &filter_counter);
 
@@ -367,8 +384,8 @@ void PairInfoCount::run(conj_graph_pack &gp, const char *) {
                 }
             }
 
-            if (ShouldMapSingleReads(i)) {
-                cfg::get_writable().use_single_reads = true;
+            if (ShouldObtainSingleReadsPaths(i) || ShouldObtainLibCoverage()) {
+                cfg::get_writable().use_single_reads |= ShouldObtainSingleReadsPaths(i);
                 INFO("Mapping single reads of library #" << i);
                 ProcessSingleReads(gp, i, /*use_binary*/true, /*map_paired*/true);
                 INFO("Total paths obtained from single reads: " << gp.single_long_reads[i].size());
diff --git a/src/projects/spades/repeat_resolving.cpp b/src/projects/spades/repeat_resolving.cpp
index 8deb72b..b2841ad 100644
--- a/src/projects/spades/repeat_resolving.cpp
+++ b/src/projects/spades/repeat_resolving.cpp
@@ -16,6 +16,7 @@ namespace debruijn_graph {
 static void PEResolving(conj_graph_pack& gp) {
     path_extend::PathExtendParamsContainer params(cfg::get().ds,
                                                   cfg::get().pe_params,
+                                                  cfg::get().ss,
                                                   cfg::get().output_dir,
                                                   cfg::get().mode,
                                                   cfg::get().uneven_depth,
diff --git a/src/projects/spades/second_phase_setup.cpp b/src/projects/spades/second_phase_setup.cpp
index f85e6dd..0875bf5 100644
--- a/src/projects/spades/second_phase_setup.cpp
+++ b/src/projects/spades/second_phase_setup.cpp
@@ -21,10 +21,10 @@ void SecondPhaseSetup::run(conj_graph_pack &gp, const char*) {
     gp.ClearRRIndices();
     gp.ClearPaths();
 
-    std::string old_pe_contigs_filename = cfg::get().output_dir + contig_name_prefix_ + "final_contigs.fasta";
+    std::string old_pe_contigs_filename = cfg::get().output_dir + "final_contigs.fasta";
     std::string new_pe_contigs_filename = cfg::get().output_dir + "first_pe_contigs.fasta";
 
-    VERIFY(path::check_existence(old_pe_contigs_filename));
+    VERIFY(fs::check_existence(old_pe_contigs_filename));
     INFO("Moving preliminary contigs from " << old_pe_contigs_filename << " to " << new_pe_contigs_filename);
     int code = rename(old_pe_contigs_filename.c_str(), new_pe_contigs_filename.c_str());
     VERIFY(code == 0);
diff --git a/src/projects/spades/second_phase_setup.hpp b/src/projects/spades/second_phase_setup.hpp
index 87fc7c4..bd40d88 100644
--- a/src/projects/spades/second_phase_setup.hpp
+++ b/src/projects/spades/second_phase_setup.hpp
@@ -12,12 +12,9 @@ namespace debruijn_graph {
 
 //todo rename
 class SecondPhaseSetup : public spades::AssemblyStage {
-private:
-    string contig_name_prefix_;
-
 public:
-    SecondPhaseSetup(const string& contig_name_prefix = "")
-            : AssemblyStage("Second Phase Setup", "second_phase_setup"),contig_name_prefix_(contig_name_prefix)  { }
+    SecondPhaseSetup()
+            : AssemblyStage("Second Phase Setup", "second_phase_setup") { }
 
     void run(conj_graph_pack &gp, const char *);
 };
diff --git a/src/projects/spades/series_analysis.hpp b/src/projects/spades/series_analysis.cpp
similarity index 50%
copy from src/projects/spades/series_analysis.hpp
copy to src/projects/spades/series_analysis.cpp
index 7860e51..ceb0f98 100644
--- a/src/projects/spades/series_analysis.hpp
+++ b/src/projects/spades/series_analysis.cpp
@@ -1,11 +1,16 @@
-#pragma once
+//***************************************************************************
+//* Copyright (c) 2016-2017 Saint Petersburg State University
+//* All Rights Reserved
+//* See file LICENSE for details.
+//***************************************************************************
 
-#include "pipeline/stage.hpp"
+#include "assembly_graph/handlers/id_track_handler.hpp"
 #include "assembly_graph/graph_support/graph_processing_algorithm.hpp"
 #include "assembly_graph/graph_support/basic_edge_conditions.hpp"
 #include "modules/simplification/tip_clipper.hpp"
 #include "projects/mts/contig_abundance.hpp"
 #include "io/reads/osequencestream.hpp"
+#include "series_analysis.hpp"
 
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -176,148 +181,141 @@ private:
     DECL_LOGGER("AggressiveClearing");
 };
 
-class SeriesAnalysis : public spades::AssemblyStage {
-
-    boost::optional<AbundanceVector> InferAbundance(const std::string& bin_mult_fn,
-                                                    const std::string& b_id) const {
-        path::CheckFileExistenceFATAL(bin_mult_fn);
-
-        ifstream is(bin_mult_fn);
-        vector<AbundanceVector> abundances;
-        while (true) {
-            string name;
-            is >> name;
-            if (!is.fail()) {
-                AbundanceVector vec(SampleCount(), 0.0);
-                for (size_t i = 0; i < SampleCount(); ++i) {
-                    is >> vec[i];
-                    VERIFY(!is.fail());
-                }
-                if (name == b_id) {
-                    abundances.push_back(vec);
-                }
-            } else {
-                INFO("Read " << abundances.size() << " profiles for bin " << b_id);
-                break;
+boost::optional<AbundanceVector> InferAbundance(const std::string& bin_mult_fn,
+                                                const std::string& b_id) {
+    fs::CheckFileExistenceFATAL(bin_mult_fn);
+
+    ifstream is(bin_mult_fn);
+    std::vector<AbundanceVector> abundances;
+    std::string name;
+    while (true) {
+        is >> name;
+        if (!is.fail()) {
+            AbundanceVector vec(SampleCount(), 0.0);
+            for (size_t i = 0; i < SampleCount(); ++i) {
+                is >> vec[i];
+                VERIFY(!is.fail());
             }
-        }
-        return boost::optional<AbundanceVector>(MeanVector(abundances));
-    }
-
-    void PrintEdgeFragmentProfiles(const conj_graph_pack &gp, const ContigAbundanceCounter &abundance_counter, 
-                                   size_t split_length, size_t min_len, std::ostream &os) const {
-        for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            EdgeId e = *it;
-            io::SingleRead full_contig(ToString(gp.g.int_id(e)), gp.g.EdgeNucls(e).str());
-            for (size_t i = 0; i < full_contig.size(); i += split_length) {
-                if (full_contig.size() - i < min_len) {
-                    DEBUG("Fragment shorter than min_length_bound " << min_len);
-                    break;
-                }
-
-                io::SingleRead contig = full_contig.Substr(i, std::min(i + split_length, full_contig.size()));
-
-                DEBUG("Processing fragment # " << (i / split_length) << " with id " << contig.name());
-
-                auto abundance_vec = abundance_counter(contig.GetSequenceString(), contig.name());
-
-                if (abundance_vec) {
-                    size_t len = contig.GetSequenceString().size();
-                    os << contig.name() << " " << len << " " << PrintVector(*abundance_vec) << std::endl;
-                    //copy(abundance_vec->begin(), abundance_vec->begin() + config.sample_cnt,
-                    //     ostream_iterator<Mpl>(ss, " "));
-                    DEBUG("Successfully estimated abundance of " << contig.name());
-                } else {
-                    DEBUG("Failed to estimate abundance of " << contig.name());
-                }
+            if (name == b_id) {
+                abundances.push_back(vec);
             }
+        } else {
+            INFO("Read " << abundances.size() << " profiles for bin " << b_id);
+            break;
         }
     }
+    return boost::optional<AbundanceVector>(MeanVector(abundances));
+}
 
-public:
-    SeriesAnalysis() : AssemblyStage("Series Analysis", "series_analysis") { }
-
-    void load(conj_graph_pack &, const std::string &, const char *) { }
-
-    void save(const conj_graph_pack &, const std::string &, const char *) const { }
-
-    void run(conj_graph_pack &gp, const char *) {
-        std::string cfg = cfg::get().series_analysis;
-        INFO("Series analysis enabled with config " << cfg);
-
-        auto Buf = llvm::MemoryBuffer::getFile(cfg);
-        VERIFY_MSG(Buf, "Failed to load config file " + cfg);
-
-        llvm::yaml::Input yin(*Buf.get());
-        SeriesAnalysisConfig config;
-        yin >> config;
+void PrintEdgeFragmentProfiles(const conj_graph_pack &gp, const ContigAbundanceCounter &abundance_counter,
+                               size_t split_length, size_t min_len, std::ostream &os) {
+    for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
+        EdgeId e = *it;
+        io::SingleRead full_contig(std::to_string(gp.g.int_id(e)), gp.g.EdgeNucls(e).str());
+        for (size_t i = 0; i < full_contig.size(); i += split_length) {
+            if (full_contig.size() - i < min_len) {
+                DEBUG("Fragment shorter than min_length_bound " << min_len);
+                break;
+            }
 
-        SetSampleCount(config.sample_cnt);
+            io::SingleRead contig = full_contig.Substr(i, std::min(i + split_length, full_contig.size()));
 
-        ContigAbundanceCounter abundance_counter(config.k, 
-                                                 SingleClusterAnalyzer(2., 0.4),
-                                                 cfg::get().tmp_dir);
+            DEBUG("Processing fragment # " << (i / split_length) << " with id " << contig.name());
 
-        DEBUG("Initiating abundance counter");
-        abundance_counter.Init(config.kmer_mult);
-        DEBUG("Abundance counter ready");
+            auto abundance_vec = abundance_counter(contig.GetSequenceString(), contig.name());
 
-        if (!config.edges_sqn.empty()) {
-            io::osequencestream oss(config.edges_sqn);
-            for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-                EdgeId e = *it;
-                string s = gp.g.EdgeNucls(e).str();
-                oss << io::SingleRead(io::MakeContigId(gp.g.int_id(e), s.size()), s);
+            if (abundance_vec) {
+                size_t len = contig.GetSequenceString().size();
+                os << contig.name() << " " << len << " " << PrintVector(*abundance_vec) << std::endl;
+                //copy(abundance_vec->begin(), abundance_vec->begin() + config.sample_cnt,
+                //     ostream_iterator<Mpl>(ss, " "));
+                DEBUG("Successfully estimated abundance of " << contig.name());
+            } else {
+                DEBUG("Failed to estimate abundance of " << contig.name());
             }
         }
+    }
+}
 
-        if (!config.edges_mpl.empty()) {
-            ofstream os(config.edges_mpl);
-            PrintEdgeFragmentProfiles(gp, abundance_counter, -1ul, config.min_len, os);
-        }
+void SeriesAnalysis::run(conj_graph_pack &gp, const char *) {
+    std::string cfg = cfg::get().series_analysis;
+    INFO("Series analysis enabled with config " << cfg);
 
-        if (!config.edge_fragments_mpl.empty()) {
-            ofstream os(config.edge_fragments_mpl);
-            PrintEdgeFragmentProfiles(gp, abundance_counter, config.frag_size, config.min_len, os);
-        }
+    auto buf = llvm::MemoryBuffer::getFile(cfg);
+    VERIFY_MSG(buf, "Failed to load config file " + cfg);
 
-        boost::optional<AbundanceVector> bin_profile = InferAbundance(config.bin_prof, config.bin);
-        if (!bin_profile) {
-            ERROR("Couldn't estimate profile of bin");
-            return;
-        }
+    llvm::yaml::Input yin(*buf.get());
+    SeriesAnalysisConfig config;
+    yin >> config;
+
+    SetSampleCount(config.sample_cnt);
+
+    ContigAbundanceCounter abundance_counter(config.k,
+                                             make_shared<TrivialClusterAnalyzer>(),
+                                             cfg::get().tmp_dir);
 
-        EdgeAbundance<Graph> edge_abundance(gp.g, abundance_counter);
-        edge_abundance.Fill();
-
-        gp.EnsureBasicMapping();
-        gp.FillQuality();
-        visualization::graph_labeler::DefaultLabeler<Graph> labeler(gp.g, gp.edge_pos);
-        auto colorer = DefaultGPColorer(gp);
-        path::make_dir(cfg::get().output_dir + "pictures/");
-        QualityEdgeLocalityPrintingRH<Graph> qual_removal_handler(gp.g, gp.edge_qual, labeler, colorer,
-                                       cfg::get().output_dir + "pictures/");
-
-        INFO("Launching aggressive graph clearing");
-        //positive quality edges removed (folder colored_edges_deleted)
-        AggressiveClearing<Graph> clearing(gp.g, edge_abundance,
-                                            *bin_profile, 0.8, 0.3, [&](EdgeId e) {
-                        qual_removal_handler.HandleDelete(e);});
-        clearing.Run();
-        INFO("Graph clearing finished");
-
-        INFO("Drawing edges with failed abundance estimate")
-        path::make_dir(cfg::get().output_dir + "pictures_no_ab/");
-        QualityEdgeLocalityPrintingRH<Graph> qual_removal_handler2(gp.g, gp.edge_qual, labeler, colorer,
-                                       cfg::get().output_dir + "pictures_no_ab/");
+    DEBUG("Initiating abundance counter");
+    abundance_counter.Init(config.kmer_mult);
+    DEBUG("Abundance counter ready");
 
+    if (!config.edges_sqn.empty()) {
+        io::OutputSequenceStream oss(config.edges_sqn);
         for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
             EdgeId e = *it;
-            if (edge_abundance.count(e) == 0) {
-                qual_removal_handler2.HandleDelete(e);
-            }
+            string s = gp.g.EdgeNucls(e).str();
+            oss << io::SingleRead(io::MakeContigId(gp.g.int_id(e), s.size()), s);
         }
     }
-};
+
+    if (!config.edges_mpl.empty()) {
+        ofstream os(config.edges_mpl);
+        PrintEdgeFragmentProfiles(gp, abundance_counter, -1ul, config.min_len, os);
+    }
+
+    if (!config.edge_fragments_mpl.empty()) {
+        ofstream os(config.edge_fragments_mpl);
+        PrintEdgeFragmentProfiles(gp, abundance_counter, config.frag_size, config.min_len, os);
+    }
+
+//    boost::optional<AbundanceVector> bin_profile = InferAbundance(config.bin_prof, config.bin);
+//    if (!bin_profile) {
+//        ERROR("Couldn't estimate profile of bin");
+//        return;
+//    }
+//
+//    EdgeAbundance<Graph> edge_abundance(gp.g, abundance_counter);
+//    edge_abundance.Fill();
+//
+//    gp.EnsureBasicMapping();
+//    gp.FillQuality();
+//    visualization::graph_labeler::DefaultLabeler<Graph> labeler(gp.g, gp.edge_pos);
+//    auto colorer = DefaultGPColorer(gp);
+//
+//    /*
+//    fs::make_dir(cfg::get().output_dir + "pictures/");
+//    QualityEdgeLocalityPrintingRH<Graph> qual_removal_handler(gp.g, gp.edge_qual, labeler, colorer,
+//                                   cfg::get().output_dir + "pictures/");
+//
+//    INFO("Launching aggressive graph clearing");
+//    //positive quality edges removed (folder colored_edges_deleted)
+//    AggressiveClearing<Graph> clearing(gp.g, edge_abundance,
+//                                        *bin_profile, 0.8, 0.3, [&](EdgeId e) {
+//                    qual_removal_handler.HandleDelete(e);});
+//    clearing.Run();
+//    INFO("Graph clearing finished");
+//    */
+//
+//    INFO("Drawing edges with failed abundance estimate")
+//    fs::make_dir(cfg::get().output_dir + "pictures_no_ab/");
+//    QualityEdgeLocalityPrintingRH<Graph> qual_removal_handler2(gp.g, gp.edge_qual, labeler, colorer,
+//                                   cfg::get().output_dir + "pictures_no_ab/");
+//
+//    for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
+//        EdgeId e = *it;
+//        if (edge_abundance.count(e) == 0) {
+//            qual_removal_handler2.HandleDelete(e);
+//        }
+//    }
+}
 
 }
diff --git a/src/projects/spades/series_analysis.hpp b/src/projects/spades/series_analysis.hpp
index 7860e51..ba13aea 100644
--- a/src/projects/spades/series_analysis.hpp
+++ b/src/projects/spades/series_analysis.hpp
@@ -1,239 +1,11 @@
 #pragma once
 
 #include "pipeline/stage.hpp"
-#include "assembly_graph/graph_support/graph_processing_algorithm.hpp"
-#include "assembly_graph/graph_support/basic_edge_conditions.hpp"
-#include "modules/simplification/tip_clipper.hpp"
-#include "projects/mts/contig_abundance.hpp"
-#include "io/reads/osequencestream.hpp"
-
-#include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/YAMLTraits.h"
 
 namespace debruijn_graph {
 
-struct SeriesAnalysisConfig {
-    uint k;
-    uint sample_cnt;
-    uint frag_size;
-    uint min_len;
-
-    std::string kmer_mult, bin, bin_prof, edges_sqn, edges_mpl, edge_fragments_mpl;
-};
-
-}
-
-namespace llvm { namespace yaml {
-
-template<> struct MappingTraits<debruijn_graph::SeriesAnalysisConfig> {
-    static void mapping(IO& io, debruijn_graph::SeriesAnalysisConfig& cfg) {
-        io.mapRequired("k", cfg.k);
-        io.mapRequired("sample_cnt", cfg.sample_cnt);
-        io.mapRequired("kmer_mult", cfg.kmer_mult);
-        io.mapRequired("bin", cfg.bin);
-        io.mapRequired("bin_prof", cfg.bin_prof);
-        io.mapRequired("min_len", cfg.min_len);
-        io.mapRequired("edges_sqn", cfg.edges_sqn);
-        io.mapRequired("edges_mpl", cfg.edges_mpl);
-        io.mapRequired("edge_fragments_mpl", cfg.edge_fragments_mpl);
-        io.mapRequired("frag_size", cfg.frag_size);
-    }
-};
-
-} }
-
-namespace debruijn_graph {
-
-template<class graph_pack>
-shared_ptr<visualization::graph_colorer::GraphColorer<typename graph_pack::graph_t>> DefaultGPColorer(
-    const graph_pack& gp) {
-    io::SingleRead genome("ref", gp.genome.str());
-    auto mapper = MapperInstance(gp);
-    auto path1 = mapper->MapRead(genome).path();
-    auto path2 = mapper->MapRead(!genome).path();
-    return visualization::graph_colorer::DefaultColorer(gp.g, path1, path2);
-}
-
-inline double l2_norm(const AbundanceVector& v) {
-    double s = 0.;
-    for (auto val : v) {
-        s += val * val;
-    }
-    return std::sqrt(s);
-}
-
-inline double cosine_sim(const AbundanceVector& v1, const AbundanceVector& v2) {
-    double s = 0.;
-    for (size_t i = 0; i < v1.size(); ++i) {
-        s += v1[i] * v2[i];
-    }
-    return s / (l2_norm(v1) * l2_norm(v2));
-}
-
-template<class Graph>
-class EdgeAbundance: public omnigraph::GraphActionHandler<Graph> {
-    typedef map<EdgeId, AbundanceVector> Storage;
-    typedef Storage::const_iterator const_iterator;
-    Storage edge_abundance_;
-    const ContigAbundanceCounter& abundance_counter_;
-
-public:
-    EdgeAbundance(const Graph& g, const ContigAbundanceCounter& abundance_counter) :
-        omnigraph::GraphActionHandler<Graph>(g, "EdgeAbundance"),
-        abundance_counter_(abundance_counter){}
-
-    void Fill() {
-        for (auto it = this->g().ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            HandleAdd(*it);
-        }
-    }
-
-    virtual void HandleAdd(EdgeId e) override {
-        auto ab = abundance_counter_(this->g().EdgeNucls(e).str());
-        if (!ab) {
-            INFO("Couldn't estimate abundance of edge " << this->g().str(e));
-        } else {
-            edge_abundance_[e] = *ab;
-        }
-    }
-
-    const_iterator begin() const {
-        return edge_abundance_.begin();
-    }
-
-    const_iterator end() const {
-        return edge_abundance_.end();
-    }
-
-    const_iterator find(EdgeId e) const {
-        return edge_abundance_.find(e);
-    }
-
-    size_t count(EdgeId e) const {
-        return edge_abundance_.count(e);
-    }
-
-private:
-    DECL_LOGGER("EdgeAbundance");
-};
-
-template<class Graph>
-class AggressiveClearing: public omnigraph::EdgeProcessingAlgorithm<Graph> {
-    typedef typename Graph::EdgeId EdgeId;
-    const EdgeAbundance<Graph>& edge_abundance_;
-    const AbundanceVector base_profile_;
-    const double similarity_threshold_;
-    const double norm_ratio_threshold_;
-    EdgeRemover<Graph> edge_remover_;
-    func::TypedPredicate<EdgeId> topological_condition_;
-
-protected:
-    virtual bool ProcessEdge(EdgeId e) override {
-        DEBUG("Processing edge " << this->g().str(e));
-        if (!topological_condition_(e)) {
-            DEBUG("Topological condition failed");
-            return false;
-        }
-        auto it = edge_abundance_.find(e);
-        if (it == edge_abundance_.end()) {
-            DEBUG("Edge " << this->g().str(e) << " did not have valid abundance profile");
-            return false;
-        }
-        const auto& profile = it->second;
-        DEBUG("Edge profile " << PrintVector(profile));
-        double sim = cosine_sim(profile, base_profile_);
-        double norm_ratio = l2_norm(profile) / l2_norm(base_profile_);
-
-        DEBUG("Similarity between edge and base profiles " << sim);
-        DEBUG("Norm ratio " << norm_ratio);
-        if (math::ls(norm_ratio, norm_ratio_threshold_)
-                || math::ls(sim, similarity_threshold_)) {
-            DEBUG("Removing edge " << this->g().str(e));
-
-            edge_remover_.DeleteEdge(e);
-            return true;
-        }
-        return false;
-    }
-
-public:
-    AggressiveClearing(Graph &g,
-                       const EdgeAbundance<Graph>& edge_abundance,
-                       const AbundanceVector& base_profile,
-                       double similarity_threshold,
-                       double norm_ratio_threshold,
-                       const std::function<void(EdgeId)> &removal_handler = 0) :
-        EdgeProcessingAlgorithm<Graph>(g, true),
-        edge_abundance_(edge_abundance),
-        base_profile_(base_profile),
-        similarity_threshold_(similarity_threshold),
-        norm_ratio_threshold_(norm_ratio_threshold),
-        edge_remover_(g, removal_handler),
-        topological_condition_(func::Or(AlternativesPresenceCondition<Graph>(g), TipCondition<Graph>(g))) {
-            DEBUG("Base profile " << PrintVector(base_profile_));
-        }
-private:
-    DECL_LOGGER("AggressiveClearing");
-};
-
 class SeriesAnalysis : public spades::AssemblyStage {
 
-    boost::optional<AbundanceVector> InferAbundance(const std::string& bin_mult_fn,
-                                                    const std::string& b_id) const {
-        path::CheckFileExistenceFATAL(bin_mult_fn);
-
-        ifstream is(bin_mult_fn);
-        vector<AbundanceVector> abundances;
-        while (true) {
-            string name;
-            is >> name;
-            if (!is.fail()) {
-                AbundanceVector vec(SampleCount(), 0.0);
-                for (size_t i = 0; i < SampleCount(); ++i) {
-                    is >> vec[i];
-                    VERIFY(!is.fail());
-                }
-                if (name == b_id) {
-                    abundances.push_back(vec);
-                }
-            } else {
-                INFO("Read " << abundances.size() << " profiles for bin " << b_id);
-                break;
-            }
-        }
-        return boost::optional<AbundanceVector>(MeanVector(abundances));
-    }
-
-    void PrintEdgeFragmentProfiles(const conj_graph_pack &gp, const ContigAbundanceCounter &abundance_counter, 
-                                   size_t split_length, size_t min_len, std::ostream &os) const {
-        for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            EdgeId e = *it;
-            io::SingleRead full_contig(ToString(gp.g.int_id(e)), gp.g.EdgeNucls(e).str());
-            for (size_t i = 0; i < full_contig.size(); i += split_length) {
-                if (full_contig.size() - i < min_len) {
-                    DEBUG("Fragment shorter than min_length_bound " << min_len);
-                    break;
-                }
-
-                io::SingleRead contig = full_contig.Substr(i, std::min(i + split_length, full_contig.size()));
-
-                DEBUG("Processing fragment # " << (i / split_length) << " with id " << contig.name());
-
-                auto abundance_vec = abundance_counter(contig.GetSequenceString(), contig.name());
-
-                if (abundance_vec) {
-                    size_t len = contig.GetSequenceString().size();
-                    os << contig.name() << " " << len << " " << PrintVector(*abundance_vec) << std::endl;
-                    //copy(abundance_vec->begin(), abundance_vec->begin() + config.sample_cnt,
-                    //     ostream_iterator<Mpl>(ss, " "));
-                    DEBUG("Successfully estimated abundance of " << contig.name());
-                } else {
-                    DEBUG("Failed to estimate abundance of " << contig.name());
-                }
-            }
-        }
-    }
-
 public:
     SeriesAnalysis() : AssemblyStage("Series Analysis", "series_analysis") { }
 
@@ -241,83 +13,7 @@ public:
 
     void save(const conj_graph_pack &, const std::string &, const char *) const { }
 
-    void run(conj_graph_pack &gp, const char *) {
-        std::string cfg = cfg::get().series_analysis;
-        INFO("Series analysis enabled with config " << cfg);
-
-        auto Buf = llvm::MemoryBuffer::getFile(cfg);
-        VERIFY_MSG(Buf, "Failed to load config file " + cfg);
-
-        llvm::yaml::Input yin(*Buf.get());
-        SeriesAnalysisConfig config;
-        yin >> config;
-
-        SetSampleCount(config.sample_cnt);
-
-        ContigAbundanceCounter abundance_counter(config.k, 
-                                                 SingleClusterAnalyzer(2., 0.4),
-                                                 cfg::get().tmp_dir);
-
-        DEBUG("Initiating abundance counter");
-        abundance_counter.Init(config.kmer_mult);
-        DEBUG("Abundance counter ready");
-
-        if (!config.edges_sqn.empty()) {
-            io::osequencestream oss(config.edges_sqn);
-            for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-                EdgeId e = *it;
-                string s = gp.g.EdgeNucls(e).str();
-                oss << io::SingleRead(io::MakeContigId(gp.g.int_id(e), s.size()), s);
-            }
-        }
-
-        if (!config.edges_mpl.empty()) {
-            ofstream os(config.edges_mpl);
-            PrintEdgeFragmentProfiles(gp, abundance_counter, -1ul, config.min_len, os);
-        }
-
-        if (!config.edge_fragments_mpl.empty()) {
-            ofstream os(config.edge_fragments_mpl);
-            PrintEdgeFragmentProfiles(gp, abundance_counter, config.frag_size, config.min_len, os);
-        }
-
-        boost::optional<AbundanceVector> bin_profile = InferAbundance(config.bin_prof, config.bin);
-        if (!bin_profile) {
-            ERROR("Couldn't estimate profile of bin");
-            return;
-        }
-
-        EdgeAbundance<Graph> edge_abundance(gp.g, abundance_counter);
-        edge_abundance.Fill();
-
-        gp.EnsureBasicMapping();
-        gp.FillQuality();
-        visualization::graph_labeler::DefaultLabeler<Graph> labeler(gp.g, gp.edge_pos);
-        auto colorer = DefaultGPColorer(gp);
-        path::make_dir(cfg::get().output_dir + "pictures/");
-        QualityEdgeLocalityPrintingRH<Graph> qual_removal_handler(gp.g, gp.edge_qual, labeler, colorer,
-                                       cfg::get().output_dir + "pictures/");
-
-        INFO("Launching aggressive graph clearing");
-        //positive quality edges removed (folder colored_edges_deleted)
-        AggressiveClearing<Graph> clearing(gp.g, edge_abundance,
-                                            *bin_profile, 0.8, 0.3, [&](EdgeId e) {
-                        qual_removal_handler.HandleDelete(e);});
-        clearing.Run();
-        INFO("Graph clearing finished");
-
-        INFO("Drawing edges with failed abundance estimate")
-        path::make_dir(cfg::get().output_dir + "pictures_no_ab/");
-        QualityEdgeLocalityPrintingRH<Graph> qual_removal_handler2(gp.g, gp.edge_qual, labeler, colorer,
-                                       cfg::get().output_dir + "pictures_no_ab/");
-
-        for (auto it = gp.g.ConstEdgeBegin(true); !it.IsEnd(); ++it) {
-            EdgeId e = *it;
-            if (edge_abundance.count(e) == 0) {
-                qual_removal_handler2.HandleDelete(e);
-            }
-        }
-    }
+    void run(conj_graph_pack &gp, const char *);
 };
 
 }
diff --git a/src/projects/truseq_analysis/AlignmentAnalyserNew.cpp b/src/projects/truseq_analysis/AlignmentAnalyserNew.cpp
index de95af6..6340d50 100644
--- a/src/projects/truseq_analysis/AlignmentAnalyserNew.cpp
+++ b/src/projects/truseq_analysis/AlignmentAnalyserNew.cpp
@@ -14,7 +14,6 @@
 #include "AlignmentAnalyserNew.hpp"
 
 namespace alignment_analysis {
-    using omnigraph::Range;
 
     size_t AlignmentAnalyserNew::StepBack(const vector<ConsistentMapping> &path) const {
         size_t cur_step = 0;
diff --git a/src/projects/truseq_analysis/consistent_mapping.cpp b/src/projects/truseq_analysis/consistent_mapping.cpp
index 449f9cf..10aa4ac 100644
--- a/src/projects/truseq_analysis/consistent_mapping.cpp
+++ b/src/projects/truseq_analysis/consistent_mapping.cpp
@@ -10,7 +10,6 @@
 #include "consistent_mapping.h"
 
 namespace alignment_analysis {
-    using omnigraph::Range;
     using omnigraph::MappingRange;
 
     bool ConsistentMapping::CheckConnect(EdgeId e, Range r) const {
@@ -242,4 +241,4 @@ namespace alignment_analysis {
         os << ")";
         return os;
     }
-}
\ No newline at end of file
+}
diff --git a/src/projects/truseq_analysis/consistent_mapping.h b/src/projects/truseq_analysis/consistent_mapping.h
index 162be38..f0fa22b 100644
--- a/src/projects/truseq_analysis/consistent_mapping.h
+++ b/src/projects/truseq_analysis/consistent_mapping.h
@@ -13,9 +13,9 @@ namespace alignment_analysis {
     typedef Graph::EdgeId EdgeId;
 
     struct EdgeRange {
-        EdgeRange(const EdgeId &first, const omnigraph::Range &second) : first(first), second(second) { }
+        EdgeRange(const EdgeId &first, const Range &second) : first(first), second(second) { }
         EdgeId first;
-        omnigraph::Range second;
+        Range second;
     };
 
     ostream & operator<<(ostream& os, const EdgeRange& er);
@@ -27,9 +27,9 @@ namespace alignment_analysis {
 
         ConsistentMapping(const Graph &graph, const omnigraph::MappingPath<EdgeId> &path);
 
-        ConsistentMapping(Graph const &graph, omnigraph::Range r, const vector<EdgeRange> &path);
+        ConsistentMapping(Graph const &graph, Range r, const vector<EdgeRange> &path);
 
-        bool CheckConnect(EdgeId e, omnigraph::Range r) const;
+        bool CheckConnect(EdgeId e, Range r) const;
 
         bool CheckConnect(const EdgeRange &er) const;
 
@@ -45,7 +45,7 @@ namespace alignment_analysis {
 
         void ForceJoin(const ConsistentMapping &other, const vector <EdgeId> &path);
 
-        omnigraph::Range const &GetInitialRange() const;
+        Range const &GetInitialRange() const;
 
         const vector <EdgeRange> &GetMappedPath() const;
 
@@ -81,7 +81,7 @@ namespace alignment_analysis {
         vector<EdgeRange> GenerateMappingPath(const vector<EdgeId> &path) const;
 
         const Graph &graph_;
-        omnigraph::Range initial_range;
+        Range initial_range;
         vector <EdgeRange> mapped_path;
         DECL_LOGGER("ConsistentMapping");
     };
diff --git a/src/projects/truseq_analysis/main.cpp b/src/projects/truseq_analysis/main.cpp
index 1588396..a09a0b4 100644
--- a/src/projects/truseq_analysis/main.cpp
+++ b/src/projects/truseq_analysis/main.cpp
@@ -10,13 +10,13 @@
  */
 #include "utils/logger/log_writers.hpp"
 #include "utils/segfault_handler.hpp"
-#include "utils/memory_limit.hpp"
-#include "utils/copy_file.hpp"
+#include "utils/perf/memory_limit.hpp"
+#include "utils/filesystem/copy_file.hpp"
 #include "pipeline/config_struct.hpp"
 #include "analysis_pipeline.hpp"
 
 void load_config(string cfg_filename) {
-    path::CheckFileExistenceFATAL(cfg_filename);
+    fs::CheckFileExistenceFATAL(cfg_filename);
 
     cfg::create_instance(cfg_filename);
 
@@ -38,16 +38,16 @@ void create_console_logger(string cfg_filename) {
 
     string log_props_file = cfg::get().log_filename;
 
-    if (!path::FileExists(log_props_file))
-        log_props_file = path::append_path(path::parent_path(cfg_filename), cfg::get().log_filename);
+    if (!fs::FileExists(log_props_file))
+        log_props_file = fs::append_path(fs::parent_path(cfg_filename), cfg::get().log_filename);
 
-    logger *lg = create_logger(path::FileExists(log_props_file) ? log_props_file : "");
+    logger *lg = create_logger(fs::FileExists(log_props_file) ? log_props_file : "");
     lg->add_writer(std::make_shared<console_writer>());
     attach_logger(lg);
 }
 
 int main(int /*argc*/, char** argv) {
-    perf_counter pc;
+    utils::perf_counter pc;
 
     const size_t GB = 1 << 30;
 
@@ -67,7 +67,7 @@ int main(int /*argc*/, char** argv) {
 
         // read configuration file (dataset path etc.)
 
-        limit_memory(cfg::get().max_memory * GB);
+        utils::limit_memory(cfg::get().max_memory * GB);
 
         // assemble it!
         INFO("Assembling dataset (" << cfg::get().dataset_file << ") with K=" << cfg::get().K);
diff --git a/src/spades_pipeline/CMakeLists.txt b/src/spades_pipeline/CMakeLists.txt
index 57d94bd..0bea073 100644
--- a/src/spades_pipeline/CMakeLists.txt
+++ b/src/spades_pipeline/CMakeLists.txt
@@ -16,6 +16,6 @@ install(FILES truspades/reference_construction.py truspades/moleculo_filter_cont
         DESTINATION share/spades/spades_pipeline/truspades
         COMPONENT runtime)
 # Common module
-install(FILES common/alignment.py common/parallel_launcher.py common/sam_parser.py common/SeqIO.py
+install(FILES common/alignment.py common/parallel_launcher.py common/sam_parser.py common/SeqIO.py common/__init__.py
         DESTINATION share/spades/spades_pipeline/common
         COMPONENT runtime)
diff --git a/src/spades_pipeline/common/SeqIO.py b/src/spades_pipeline/common/SeqIO.py
index 9d5b2b9..c4b3e8f 100644
--- a/src/spades_pipeline/common/SeqIO.py
+++ b/src/spades_pipeline/common/SeqIO.py
@@ -6,12 +6,17 @@
 
 import itertools
 import sys
+import gzip
+import codecs
+
+fasta_ext = ['.fa', '.fas', '.fasta', '.seq', '.fsa', '.fna', '.ffn', '.frn']
+fastq_ext = ['.fq', 'fastq']
 
 def Open(f, mode):
     if f.endswith(".gz"):
-        return gzip.open(f, mode)
+        return codecs.getreader('UTF-8')(gzip.open(f, mode))
     else:
-        return open(f, mode)
+        return codecs.open(f, mode, encoding='utf-8')
 
 class Reader:
     def __init__(self, handler):
@@ -145,3 +150,28 @@ def RemoveNs(input_handler, output_handler):
             r -= 1
         if r > l:
             write(SeqRecord(contig.seq[l:r], contig.id))
+
+
+def is_fasta(file_name):
+    for ext in fasta_ext:
+        if ext in file_name:
+            return True
+
+    return False
+
+
+def is_fastq(file_name):
+    for ext in fastq_ext:
+        if ext in file_name:
+            return True
+
+    return False
+
+
+def get_read_file_type(file_name):
+    if is_fastq(file_name):
+        return 'fastq'
+    elif is_fasta(file_name):
+        return 'fasta'
+    else:
+        return None
diff --git a/configs/debruijn/simplification.info.template b/src/spades_pipeline/common/__init__.py
similarity index 100%
rename from configs/debruijn/simplification.info.template
rename to src/spades_pipeline/common/__init__.py
diff --git a/src/spades_pipeline/corrector_logic.py b/src/spades_pipeline/corrector_logic.py
index 7459c5f..a60d093 100644
--- a/src/spades_pipeline/corrector_logic.py
+++ b/src/spades_pipeline/corrector_logic.py
@@ -27,12 +27,13 @@ def prepare_config_corr(filename, cfg, ext_python_modules_home):
     data = pyyaml.load(open(filename, 'r'))
     data["dataset"] = cfg.dataset
     data["output_dir"] = cfg.output_dir
-    data["work_dir"] = process_cfg.process_spaces(cfg.tmp_dir)
+    data["work_dir"] = cfg.tmp_dir
     #data["hard_memory_limit"] = cfg.max_memory
     data["max_nthreads"] = cfg.max_threads
     data["bwa"] = cfg.bwa
     file_c = open(filename, 'w')
-    pyyaml.dump(data, file_c, default_flow_style = False, default_style='"', width=100500)
+    pyyaml.dump(data, file_c,
+                default_flow_style=False, default_style='"', width=float("inf"))
     file_c.close()
 
 
diff --git a/src/spades_pipeline/hammer_logic.py b/src/spades_pipeline/hammer_logic.py
index 1d971b8..908055d 100644
--- a/src/spades_pipeline/hammer_logic.py
+++ b/src/spades_pipeline/hammer_logic.py
@@ -93,7 +93,8 @@ def prepare_config_ih(filename, cfg, ext_python_modules_home):
     data["output_dir"] = cfg.output_dir
     data["hard_memory_limit"] = cfg.max_memory
     data["max_nthreads"] = cfg.max_threads
-    pyyaml.dump(data, open(filename, 'w'), default_flow_style = False, default_style='"', width=100500)
+    pyyaml.dump(data, open(filename, 'w'),
+                default_flow_style=False, default_style='"', width=float("inf"))
 
 
 def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg,
@@ -109,7 +110,8 @@ def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg
         not_used_dataset_data = support.get_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
         to_correct_dataset_data = support.rm_libs_by_type(dataset_data, options_storage.LONG_READS_TYPES)
         to_correct_dataset_yaml_filename = os.path.join(cfg.output_dir, "to_correct.yaml")
-        pyyaml.dump(to_correct_dataset_data, open(to_correct_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500)
+        pyyaml.dump(to_correct_dataset_data, open(to_correct_dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
         cfg.dataset_yaml_filename = to_correct_dataset_yaml_filename
     else:
         not_used_dataset_data = None
@@ -154,7 +156,8 @@ def run_hammer(corrected_dataset_yaml_filename, configs_dir, execution_home, cfg
         is_changed = True
         corrected_dataset_data += not_used_dataset_data
     if is_changed:
-        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'), default_flow_style = False, default_style='"', width=100500)
+        pyyaml.dump(corrected_dataset_data, open(corrected_dataset_yaml_filename, 'w'),
+                    default_flow_style=False, default_style='"', width=float("inf"))
     log.info("\n== Dataset description file was created: " + corrected_dataset_yaml_filename + "\n")
 
     if os.path.isdir(cfg.tmp_dir):
diff --git a/src/spades_pipeline/options_storage.py b/src/spades_pipeline/options_storage.py
index 1919e5a..e4814fe 100644
--- a/src/spades_pipeline/options_storage.py
+++ b/src/spades_pipeline/options_storage.py
@@ -12,7 +12,7 @@ import sys
 import support
 from os.path import basename
 
-SUPPORTED_PYTHON_VERSIONS = ['2.4', '2.5', '2.6', '2.7', '3.2', '3.3', '3.4', '3.5']
+SUPPORTED_PYTHON_VERSIONS = ['2.4-2.7', '3.2+']  # major.minor format only, close ('-') and open ('+') ranges allowed
 # allowed reads extensions for BayesHammer and for thw whole SPAdes pipeline
 BH_ALLOWED_READS_EXTENSIONS = ['.fq', '.fastq', '.bam']
 CONTIGS_ALLOWED_READS_EXTENSIONS = ['.fa', '.fasta']
@@ -33,11 +33,12 @@ LONG_READS_TYPES = ["pacbio", "sanger", "nanopore", "tslr", "trusted-contigs", "
 contigs_name = "contigs.fasta"
 scaffolds_name = "scaffolds.fasta"
 assembly_graph_name = "assembly_graph.fastg"
-assembly_graph_name_gfa = "assembly_graph.gfa"
+assembly_graph_name_gfa = "assembly_graph_with_scaffolds.gfa"
 contigs_paths = "contigs.paths"
 scaffolds_paths = "scaffolds.paths"
 transcripts_name = "transcripts.fasta"
 transcripts_paths = "transcripts.paths"
+filtering_types = ["hard", "soft", "default"]
 
 #other constants
 MIN_K = 1
@@ -87,6 +88,7 @@ qvoffset = None  # auto-detect by default
 cov_cutoff = 'off'  # default is 'off'
 
 # hidden options
+save_gp = False
 mismatch_corrector = None
 reference = None
 series_analysis = None
@@ -95,6 +97,7 @@ iterations = None
 bh_heap_check = None
 spades_heap_check = None
 read_buffer_size = None
+lcer_cutoff = None 
 ### END OF OPTIONS
 
 # for restarting SPAdes
@@ -114,6 +117,7 @@ restart_developer_mode = None
 restart_reference = None
 restart_configs_dir = None
 restart_read_buffer_size = None
+restart_fast = None
 
 # for running to specific check-point
 stop_after = None
@@ -124,17 +128,22 @@ truseq_mode = False
 correct_scaffolds = False
 run_truseq_postprocessing = False
 
+#rna options
+strand_specific = None  # None, True, False are possible
+fast = None
+
 dict_of_prefixes = dict()
 dict_of_rel2abs = dict()
 
 # list of spades.py options
 long_options = "12= threads= memory= tmp-dir= iterations= phred-offset= sc iontorrent meta large-genome rna plasmid "\
+               "ss-fr ss-rf fast fast:false "\
                "only-error-correction only-assembler "\
                "disable-gzip-output disable-gzip-output:false disable-rr disable-rr:false " \
                "help version test debug debug:false reference= series-analysis= config-file= dataset= "\
                "bh-heap-check= spades-heap-check= read-buffer-size= help-hidden "\
-               "mismatch-correction mismatch-correction:false careful careful:false "\
-               "continue restart-from= diploid truseq cov-cutoff= configs-dir= stop-after=".split()
+               "mismatch-correction mismatch-correction:false careful careful:false save-gp save-gp:false "\
+               "continue restart-from= diploid truseq cov-cutoff= hidden-cov-cutoff= configs-dir= stop-after=".split()
 short_options = "o:1:2:s:k:t:m:i:hv"
 
 # adding multiple paired-end, mate-pair and other (long reads) libraries support
@@ -166,13 +175,13 @@ def get_mode():
 
 
 def version(spades_version, mode=None):
-    sys.stderr.write("SPAdes v" + str(spades_version))
+    sys.stdout.write("SPAdes v" + str(spades_version))
     if mode is None:
         mode = get_mode()
     if mode is not None:
-        sys.stderr.write(" [" + mode + "SPAdes mode]")
-    sys.stderr.write("\n")
-    sys.stderr.flush()
+        sys.stdout.write(" [" + mode + "SPAdes mode]")
+    sys.stdout.write("\n")
+    sys.stdout.flush()
 
 
 def usage(spades_version, show_hidden=False, mode=None):
@@ -247,12 +256,15 @@ def usage(spades_version, show_hidden=False, mode=None):
         sys.stderr.write("--sanger\t<filename>\tfile with Sanger reads\n")
         sys.stderr.write("--pacbio\t<filename>\tfile with PacBio reads\n")
         sys.stderr.write("--nanopore\t<filename>\tfile with Nanopore reads\n")
-    sys.stderr.write("--tslr\t<filename>\tfile with TSLR-contigs\n")
+    if not mode == "rna":
+        sys.stderr.write("--tslr\t<filename>\tfile with TSLR-contigs\n")
     sys.stderr.write("--trusted-contigs\t<filename>\tfile with trusted contigs\n")
     sys.stderr.write("--untrusted-contigs\t<filename>\tfile with untrusted contigs\n")
     if mode == "dip":
         sys.stderr.write("Input haplocontigs:" + "\n")
         sys.stderr.write("--hap\t<filename>\tfile with haplocontigs" + "\n")
+    if mode == "rna":
+        sys.stderr.write("--ss-<type>\tstrand specific data, <type> = fr (normal) and rf (antisense)\n")
 
     sys.stderr.write("" + "\n")
     sys.stderr.write("Pipeline options:" + "\n")
@@ -281,6 +293,8 @@ def usage(spades_version, show_hidden=False, mode=None):
     sys.stderr.write("" + "\n")
     sys.stderr.write("Advanced options:" + "\n")
     sys.stderr.write("--dataset\t<filename>\tfile with dataset description in YAML format" + "\n")
+    if mode == "rna":
+        sys.stderr.write("--fast\t\t\t\tspeeds up isoform detection, but may miss short and low-expressed isoforms\n")
     sys.stderr.write("-t/--threads\t<int>\t\tnumber of threads" + "\n")
     sys.stderr.write("\t\t\t\t[default: %s]\n" % THREADS)
     sys.stderr.write("-m/--memory\t<int>\t\tRAM limit for SPAdes in Gb"\
@@ -294,7 +308,7 @@ def usage(spades_version, show_hidden=False, mode=None):
         sys.stderr.write("\t\t\t\tless than " + str(MAX_K + 1) + ") [default: 'auto']" + "\n")
     else:
         sys.stderr.write("-k\t\t<int>\t\tk-mer size (must be odd and less than " + str(MAX_K + 1) + ") " \
-                         "[default: " + str(K_MERS_RNA[0]) + "]\n")
+                         "[default: 'auto']\n")
 
     if mode not in ["rna", "meta"]:
         sys.stderr.write("--cov-cutoff\t<float>\t\tcoverage cutoff value (a positive float number, "
@@ -323,6 +337,9 @@ def usage(spades_version, show_hidden=False, mode=None):
         sys.stderr.write("--spades-heap-check\t<value>\tsets HEAPCHECK environment variable"\
                              " for SPAdes" + "\n")
         sys.stderr.write("--large-genome\tEnables optimizations for large genomes \n")
+        sys.stderr.write("--save-gp\tEnables saving graph pack before repeat resolution (even without --debug) \n")
+        sys.stderr.write("--hidden-cov-cutoff\t<float>\t\tcoverage cutoff value deeply integrated in simplification"\
+                            " (a positive float number). Base coverage! Will be adjusted depending on K and RL! \n")
         sys.stderr.write("--help-hidden\tprints this usage message with all hidden options" + "\n")
 
     if show_hidden and mode == "dip":
@@ -354,6 +371,7 @@ def set_default_values():
     global qvoffset
     global cov_cutoff
     global tmp_dir
+    global fast
 
     if threads is None:
         threads = THREADS
@@ -380,12 +398,15 @@ def set_default_values():
         cov_cutoff = 'off'
     if tmp_dir is None:
         tmp_dir = os.path.join(output_dir, TMP_DIR)
+    if fast is None:
+        fast = False
 
 
 def set_test_options():
     global output_dir
     global single_cell
     global test_mode
+    global meta
 
     output_dir = os.path.abspath('spades_test')
     single_cell = False
@@ -406,6 +427,8 @@ def save_restart_options(log):
         support.error("you cannot specify --only-assembler with --restart-from option!", log)
     if only_error_correction:
         support.error("you cannot specify --only-error-correction with --restart-from option!", log)
+    if strand_specific is not None:
+        support.error("you cannot specify strand specificity (--ss-rf or --ss-fr) with --restart-from option!", log)
 
     global restart_k_mers
     global restart_careful
@@ -421,6 +444,7 @@ def save_restart_options(log):
     global restart_reference
     global restart_configs_dir
     global restart_read_buffer_size
+    global restart_fast
 
     restart_k_mers = k_mers
     restart_careful = careful
@@ -436,6 +460,7 @@ def save_restart_options(log):
     restart_reference = reference
     restart_configs_dir = configs_dir
     restart_read_buffer_size = read_buffer_size
+    restart_fast = fast
 
 
 def load_restart_options():
@@ -454,6 +479,7 @@ def load_restart_options():
     global configs_dir
     global read_buffer_size
     global original_k_mers
+    global fast
 
     if restart_k_mers:
         original_k_mers = k_mers
@@ -487,6 +513,8 @@ def load_restart_options():
         configs_dir = restart_configs_dir
     if restart_read_buffer_size is not None:
         read_buffer_size = restart_read_buffer_size
+    if restart_fast is not None:
+        fast = restart_fast
 
 
 def enable_truseq_mode():
diff --git a/src/spades_pipeline/spades_logic.py b/src/spades_pipeline/spades_logic.py
index 8b47c0d..7998321 100644
--- a/src/spades_pipeline/spades_logic.py
+++ b/src/spades_pipeline/spades_logic.py
@@ -43,6 +43,7 @@ def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage
 #    subst_dict["topology_simplif_enabled"] = bool_to_str(last_one)
     subst_dict["max_threads"] = cfg.max_threads
     subst_dict["max_memory"] = cfg.max_memory
+    subst_dict["save_gp"] = bool_to_str(cfg.save_gp)
     if (not last_one):
         subst_dict["correct_mismatches"] = bool_to_str(False)
     if "resolving_mode" in cfg.__dict__:
@@ -58,6 +59,9 @@ def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage
             subst_dict["coverage_threshold"] = 0.0
         else:
             subst_dict["coverage_threshold"] = cfg.cov_cutoff
+    if cfg.lcer_cutoff is not None:
+        subst_dict["lcer_enabled"] = bool_to_str(True)
+        subst_dict["lcer_coverage_threshold"] = cfg.lcer_cutoff
 
     #TODO: make something about spades.py and config param substitution 
     if "bwa_paired" in cfg.__dict__:
@@ -68,6 +72,15 @@ def prepare_config_spades(filename, cfg, log, additional_contigs_fname, K, stage
     process_cfg.substitute_params(filename, subst_dict, log)
 
 
+def prepare_config_rnaspades(filename, log):
+    if not options_storage.rna:
+        return
+    subst_dict = dict()
+    subst_dict["ss_enabled"] = bool_to_str(options_storage.strand_specific is not None)
+    subst_dict["antisense"] = bool_to_str(options_storage.strand_specific)
+    process_cfg.substitute_params(filename, subst_dict, log)
+
+
 def get_read_length(output_dir, K, ext_python_modules_home, log):
     est_params_filename = os.path.join(output_dir, "K%d" % K, "final.lib_data")
     max_read_length = 0
@@ -119,6 +132,7 @@ def reveal_original_k_mers(RL):
     original_k_mers = [k for k in original_k_mers if k < RL]
     return original_k_mers
 
+
 def add_configs(command, configs_dir):
     #Order matters here!
     mode_config_mapping = [("single_cell", "mda_mode"), 
@@ -127,13 +141,22 @@ def add_configs(command, configs_dir):
                            ("rna", "rna_mode"),
                            ("large_genome", "large_genome_mode"),
                            ("plasmid", "plasmid_mode"),
-                           ("careful", "careful_mode"),
+                           #("careful", "careful_mode"),
                            ("diploid_mode", "diploid_mode")]
     for (mode, config) in mode_config_mapping:
         if options_storage.__dict__[mode]:
             if mode == "rna" or mode == "meta":
                 command.append(os.path.join(configs_dir, "mda_mode.info"))
             command.append(os.path.join(configs_dir, config + ".info"))
+    if options_storage.__dict__["careful"]:
+        if options_storage.__dict__["single_cell"]:
+            command.append(os.path.join(configs_dir, "careful_mda_mode.info"))
+        else:
+            command.append(os.path.join(configs_dir, "careful_mode.info"))
+
+    # special case: extra config
+    if options_storage.rna and options_storage.fast:
+        command.append(os.path.join(configs_dir, "rna_fast_mode.info"))
     
 
 def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
@@ -178,6 +201,7 @@ def run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one):
         #FIXME why here???
         process_cfg.substitute_params(os.path.join(dst_configs, "pe_params.info"), {"scaffolding_mode": cfg.scaffolding_mode}, log)
 
+    prepare_config_rnaspades(os.path.join(dst_configs, "rna_mode.info"), log)
     cfg_fn = os.path.join(dst_configs, "config.info")
     prepare_config_spades(cfg_fn, cfg, log, additional_contigs_fname, K, stage, saves_dir, last_one, execution_home)
 
@@ -347,11 +371,18 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
                 shutil.copyfile(os.path.join(latest, "before_rr.fasta"), result_before_rr_contigs)
         if options_storage.rna:
             if os.path.isfile(os.path.join(latest, "transcripts.fasta")):
-                    if not os.path.isfile(cfg.result_transcripts) or not options_storage.continue_mode:
-                        shutil.copyfile(os.path.join(latest, "transcripts.fasta"), cfg.result_transcripts)
+                if not os.path.isfile(cfg.result_transcripts) or not options_storage.continue_mode:
+                    shutil.copyfile(os.path.join(latest, "transcripts.fasta"), cfg.result_transcripts)
             if os.path.isfile(os.path.join(latest, "transcripts.paths")):
                 if not os.path.isfile(cfg.result_transcripts_paths) or not options_storage.continue_mode:
                     shutil.copyfile(os.path.join(latest, "transcripts.paths"), cfg.result_transcripts_paths)
+            for filtering_type in options_storage.filtering_types:
+                prefix = filtering_type + "_filtered_"
+                result_filtered_transcripts = os.path.join(cfg.output_dir, prefix + options_storage.transcripts_name)
+                latest_filtered_transcripts = os.path.join(latest, prefix + "final_paths.fasta")
+                if os.path.isfile(latest_filtered_transcripts):
+                    if not os.path.isfile(result_filtered_transcripts) or not options_storage.continue_mode:
+                        shutil.copyfile(latest_filtered_transcripts, result_filtered_transcripts)
         else:
             if os.path.isfile(os.path.join(latest, "final_contigs.fasta")):
                 if not os.path.isfile(cfg.result_contigs) or not options_storage.continue_mode:
@@ -367,9 +398,9 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
                 if os.path.isfile(os.path.join(latest, "scaffolds.paths")):
                     if not os.path.isfile(cfg.result_scaffolds_paths) or not options_storage.continue_mode:
                         shutil.copyfile(os.path.join(latest, "scaffolds.paths"), cfg.result_scaffolds_paths)
-            if os.path.isfile(os.path.join(latest, "assembly_graph.gfa")):
+            if os.path.isfile(os.path.join(latest, "assembly_graph_with_scaffolds.gfa")):
                 if not os.path.isfile(cfg.result_graph_gfa) or not options_storage.continue_mode:
-                    shutil.copyfile(os.path.join(latest, "assembly_graph.gfa"), cfg.result_graph_gfa)
+                    shutil.copyfile(os.path.join(latest, "assembly_graph_with_scaffolds.gfa"), cfg.result_graph_gfa)
             if os.path.isfile(os.path.join(latest, "assembly_graph.fastg")):
                 if not os.path.isfile(cfg.result_graph) or not options_storage.continue_mode:
                     shutil.copyfile(os.path.join(latest, "assembly_graph.fastg"), cfg.result_graph)
diff --git a/src/spades_pipeline/support.py b/src/spades_pipeline/support.py
index 7fc8d15..06f175f 100644
--- a/src/spades_pipeline/support.py
+++ b/src/spades_pipeline/support.py
@@ -18,7 +18,10 @@ import tempfile
 import shutil
 import options_storage
 import itertools
+from common import SeqIO
+import math
 from os.path import abspath, expanduser, join
+from distutils.version import LooseVersion
 
 # constants to print and detect warnings and errors in logs
 SPADES_PY_ERROR_MESSAGE = "== Error == "
@@ -40,10 +43,12 @@ def error(err_str, log=None, dipspades=False, prefix=SPADES_PY_ERROR_MESSAGE):
         log.info("\n\n" + prefix + " " + err_str)
         log_warnings(log, with_error=True)
         log.info("\nIn case you have troubles running " + binary_name + ", you can write to spades.support at cab.spbu.ru")
+        log.info("or report an issue on our GitHub repository github.com/ablab/spades")
         log.info("Please provide us with params.txt and " + binary_name.lower() + ".log files from the output directory.")
     else:
         sys.stderr.write("\n\n" + prefix + " " + err_str + "\n\n")
         sys.stderr.write("\nIn case you have troubles running " + binary_name + ", you can write to spades.support at cab.spbu.ru\n")
+        sys.stderr.write("or report an issue on our GitHub repository github.com/ablab/spades\n")
         sys.stderr.write("Please provide us with params.txt and " + binary_name.lower() + ".log files from the output directory.\n")
         sys.stderr.flush()
     if current_tmp_dir and os.path.isdir(current_tmp_dir):
@@ -60,14 +65,35 @@ def warning(warn_str, log=None, prefix="== Warning == "):
 
 
 def check_python_version():
-    if sys.version[0:3] not in options_storage.SUPPORTED_PYTHON_VERSIONS:
-        error("python version " + sys.version[0:3] + " is not supported!\n" + \
-              "Supported versions are " + ", ".join(options_storage.SUPPORTED_PYTHON_VERSIONS))
+    def __next_version(version):
+        components = version.split('.')
+        for i in reversed(range(len(components))):
+            if components[i].isdigit():
+                components[i] = str(int(components[i]) + 1)
+                break
+        return '.'.join(components)
+
+    current_version = sys.version.split()[0]
+    supported_versions_msg = []
+    for supported_versions in options_storage.SUPPORTED_PYTHON_VERSIONS:
+        major = supported_versions[0]
+        if '-' in supported_versions:  # range
+            min_inc, max_inc = supported_versions.split('-')
+        elif supported_versions.endswith('+'):  # half open range
+            min_inc, max_inc = supported_versions[:-1], major
+        else:  # exact version
+            min_inc = max_inc = supported_versions
+        max_exc = __next_version(max_inc)
+        supported_versions_msg.append("Python%s: %s" % (major, supported_versions.replace('+', " and higher")))
+        if LooseVersion(min_inc) <= LooseVersion(current_version) < LooseVersion(max_exc):
+            return True
+    error("Python version " + current_version + " is not supported!\n" +
+          "Supported versions are " + ", ".join(supported_versions_msg))
 
 
 def get_spades_binaries_info_message():
     return "You can obtain SPAdes binaries in one of two ways:" +\
-           "\n1. Download them from http://bioinf.spbau.ru/content/spades-download" +\
+           "\n1. Download them from http://cab.spbu.ru/software/spades/" +\
            "\n2. Build source code with ./spades_compile.sh script"
 
 
@@ -577,6 +603,36 @@ def relative2abs_paths(dataset_data, dirname):
     return abs_paths_dataset_data
 
 
+def get_reads_length(dataset_data, log, num_checked=10 ** 4, diff_len_allowable=25):
+    max_reads_lenghts = [get_max_reads_length(reads_file, log, num_checked) for reads_file in get_reads_files(dataset_data)]
+
+    avg_len = sum(max_reads_lenghts) / len(max_reads_lenghts)
+    for max_len in max_reads_lenghts:
+        if math.fabs(max_len - avg_len) > diff_len_allowable:
+            warning('Read lengths differ more than allowable. Length: ' + str(max_len) + '. Avg. length: ' + str(avg_len) + '.', log)
+    reads_length = min(max_reads_lenghts)
+    log.info('Reads length: ' + str(reads_length))
+    return reads_length
+
+
+def get_reads_files(dataset_data):
+    for reads_library in dataset_data:
+        for key, value in reads_library.items():
+            if key.endswith('reads'):
+                for reads_file in value:
+                    yield reads_file
+
+
+def get_max_reads_length(reads_file, log, num_checked):
+    file_type = SeqIO.get_read_file_type(reads_file)
+    if not file_type:
+        error('Incorrect type of reads file: ' + reads_file, log)
+
+    max_reads_length = max([len(rec) for rec in itertools.islice(SeqIO.parse(SeqIO.Open(reads_file, "r"), file_type), num_checked)])
+    log.info('Max reads length: ' + str(max_reads_length))
+    return max_reads_length
+
+
 def check_dataset_reads(dataset_data, only_assembler, log):
     all_files = []
     for id, reads_library in enumerate(dataset_data):
diff --git a/test_dataset/reference_1K.fa.gz b/test_dataset/reference_1K.fa.gz
index 05cd8ef..9062ac3 100644
Binary files a/test_dataset/reference_1K.fa.gz and b/test_dataset/reference_1K.fa.gz differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/spades.git



More information about the debian-med-commit mailing list